python压缩javascript文件代码,pythonjavascript,通过正规表达式实现```


通过正规表达式实现```python'''a regex-based JavaScript code compression kludge'''import re

class JSCompressor(object):

def __init__(self, compressionLevel=2, measureCompression=False):    '''    compressionLevel:    0 - no compression, script returned unchanged. For debugging only -        try if you suspect that compression compromises your script    1 - Strip comments and empty lines, don't change line breaks and indentation (code remains readable)    2 - Additionally strip insignificant whitespace (code will become quite unreadable)    measureCompression: append a comment stating the extent of compression    '''    self.compressionLevel = compressionLevel    self.measureCompression = measureCompression# a bunch of regexes used in compression# first, exempt string and regex literals from compression by transient substitutionfindLiterals = re.compile(r'''    (\'.*?(?<=[^\\])\')             |       # single-quoted strings    (\".*?(?<=[^\\])\")             |       # double-quoted strings    ((?<![\*\/])\/(?![\/\*]).*?(?<![\\])\/) # JS regexes, trying hard not to be tripped up by comments    ''', re.VERBOSE)# literals are temporarily replaced by numbered placeholdersliteralMarker = '@_@%d@_@'                  # temporary replacementbackSubst = re.compile('@_@(\d+)@_@')       # put the string literals back inmlc1 = re.compile(r'(\/\*.*?\*\/)')         # /* ... */ comments on single linemlc = re.compile(r'(\/\*.*?\*\/)', re.DOTALL)  # real multiline commentsslc = re.compile('\/\/.*')                  # remove single line commentscollapseWs = re.compile('(?<=\S)[ \t]+')    # collapse successive non-leading white space characters into onesqueeze = re.compile('''    \s+(?=[\}\]\)\:\&amp;\|\=\;\,\.\+])   |     # remove whitespace preceding control characters    (?<=[\{\[\(\:\&amp;\|\=\;\,\.\+])\s+  |     # ... or following such    [ \t]+(?=\W)                      |     # remove spaces or tabs preceding non-word characters    (?<=\W)[ \t]+                           # ... or following such    '''    , re.VERBOSE | re.DOTALL)def compress(self, script):    '''    perform compression and return compressed script    '''    if self.compressionLevel == 0:        return script    lengthBefore = len(script)    # first, substitute string literals by placeholders to prevent the regexes messing with them    literals = []    def insertMarker(mo):        l = mo.group()        literals.append(l)        return self.literalMarker % (len(literals) - 1)    script = self.findLiterals.sub(insertMarker, script)    # now, to the literal-stripped carcass, apply some kludgy regexes for deflation...    script = self.slc.sub('', script)       # strip single line comments    script = self.mlc1.sub(' ', script)     # replace /* .. */ comments on single lines by space    script = self.mlc.sub('\n', script)     # replace real multiline comments by newlines    # remove empty lines and trailing whitespace    script = '\n'.join([l.rstrip() for l in script.splitlines() if l.strip()])    if self.compressionLevel == 2:              # squeeze out any dispensible whitespace        script = self.squeeze.sub('', script)    elif self.compressionLevel == 1:            # only collapse multiple whitespace characters        script = self.collapseWs.sub(' ', script)    # now back-substitute the string and regex literals    def backsub(mo):        return literals[int(mo.group(1))]    script = self.backSubst.sub(backsub, script)    if self.measureCompression:        lengthAfter = float(len(script))        squeezedBy = int(100*(1-lengthAfter/lengthBefore))        script += '\n// squeezed out %s%%\n' % squeezedBy    return script

if name == 'main': script = '''

/* this is a totally useless multiline comment, containing a silly "quoted string",   surrounded by several superfluous line breaks */// and this is an equally important single line commentsth = "this string contains 'quotes', a /regex/ and a // comment yet it will survive compression";function wurst(){           // this is a great function    var hans = 33;}sthelse = 'and another useless string';function hans(){            // another function    var   bill   =   66;    // successive spaces will be collapsed into one;    var bob = 77            // this line break will be preserved b/c of lacking semicolon    var george = 88;}'''for x in range(1,3):    print '\ncompression level', x, ':\n--------------'    c = JSCompressor(compressionLevel=x, measureCompression=True)    cpr = c.compress(script)    print cpr    print 'length', len(cpr)

```

评论关闭