diff options
Diffstat (limited to 'catapult/common/py_vulcanize/third_party/rjsmin/rjsmin.py')
-rwxr-xr-x | catapult/common/py_vulcanize/third_party/rjsmin/rjsmin.py | 515 |
1 files changed, 515 insertions, 0 deletions
diff --git a/catapult/common/py_vulcanize/third_party/rjsmin/rjsmin.py b/catapult/common/py_vulcanize/third_party/rjsmin/rjsmin.py new file mode 100755 index 00000000..54e20ec1 --- /dev/null +++ b/catapult/common/py_vulcanize/third_party/rjsmin/rjsmin.py @@ -0,0 +1,515 @@ +#!/usr/bin/env python +# -*- coding: ascii -*- +r""" +===================== + Javascript Minifier +===================== + +rJSmin is a javascript minifier written in python. + +The minifier is based on the semantics of `jsmin.c by Douglas Crockford`_\\. + +:Copyright: + + Copyright 2011 - 2015 + Andr\xe9 Malo or his licensors, as applicable + +:License: + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +The module is a re-implementation aiming for speed, so it can be used at +runtime (rather than during a preprocessing step). Usually it produces the +same results as the original ``jsmin.c``. It differs in the following ways: + +- there is no error detection: unterminated string, regex and comment + literals are treated as regular javascript code and minified as such. +- Control characters inside string and regex literals are left untouched; they + are not converted to spaces (nor to \\n) +- Newline characters are not allowed inside string and regex literals, except + for line continuations in string literals (ECMA-5). +- "return /regex/" is recognized correctly. +- Line terminators after regex literals are handled more sensibly +- "+ +" and "- -" sequences are not collapsed to '++' or '--' +- Newlines before ! operators are removed more sensibly +- Comments starting with an exclamation mark (``!``) can be kept optionally +- rJSmin does not handle streams, but only complete strings. (However, the + module provides a "streamy" interface). + +Since most parts of the logic are handled by the regex engine it's way faster +than the original python port of ``jsmin.c`` by Baruch Even. The speed factor +varies between about 6 and 55 depending on input and python version (it gets +faster the more compressed the input already is). Compared to the +speed-refactored python port by Dave St.Germain the performance gain is less +dramatic but still between 3 and 50 (for huge inputs). See the docs/BENCHMARKS +file for details. + +rjsmin.c is a reimplementation of rjsmin.py in C and speeds it up even more. + +Both python 2 and python 3 are supported. + +.. _jsmin.c by Douglas Crockford: + http://www.crockford.com/javascript/jsmin.c +""" +if __doc__: + # pylint: disable = redefined-builtin + __doc__ = __doc__.encode('ascii').decode('unicode_escape') +__author__ = r"Andr\xe9 Malo".encode('ascii').decode('unicode_escape') +__docformat__ = "restructuredtext en" +__license__ = "Apache License, Version 2.0" +__version__ = '1.0.12' +__all__ = ['jsmin'] + +import re as _re + + +def _make_jsmin(python_only=False): + """ + Generate JS minifier based on `jsmin.c by Douglas Crockford`_ + + .. _jsmin.c by Douglas Crockford: + http://www.crockford.com/javascript/jsmin.c + + :Parameters: + `python_only` : ``bool`` + Use only the python variant. If true, the c extension is not even + tried to be loaded. + + :Return: Minifier + :Rtype: ``callable`` + """ + # pylint: disable = unused-variable + # pylint: disable = too-many-locals + + if not python_only: + try: + import _rjsmin + except ImportError: + pass + else: + return _rjsmin.jsmin + try: + xrange + except NameError: + xrange = range # pylint: disable = redefined-builtin + + space_chars = r'[\000-\011\013\014\016-\040]' + + line_comment = r'(?://[^\r\n]*)' + space_comment = r'(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)' + space_comment_nobang = r'(?:/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/)' + bang_comment = r'(?:/\*![^*]*\*+(?:[^/*][^*]*\*+)*/)' + + string1 = \ + r'(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|\r)[^\047\\\r\n]*)*\047)' + string2 = r'(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|\r)[^"\\\r\n]*)*")' + string3 = r'(?:`(?:[^`\\]|\\.)*`)' + strings = r'(?:%s|%s|%s)' % (string1, string2, string3) + + charclass = r'(?:\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\])' + nospecial = r'[^/\\\[\r\n]' + regex = r'(?:/(?![\r\n/*])%s*(?:(?:\\[^\r\n]|%s)%s*)*/)' % ( + nospecial, charclass, nospecial + ) + space = r'(?:%s|%s)' % (space_chars, space_comment) + newline = r'(?:%s?[\r\n])' % line_comment + + def fix_charclass(result): + """ Fixup string of chars to fit into a regex char class """ + pos = result.find('-') + if pos >= 0: + result = r'%s%s-' % (result[:pos], result[pos + 1:]) + + def sequentize(string): + """ + Notate consecutive characters as sequence + + (1-4 instead of 1234) + """ + first, last, result = None, None, [] + for char in map(ord, string): + if last is None: + first = last = char + elif last + 1 == char: + last = char + else: + result.append((first, last)) + first = last = char + if last is not None: + result.append((first, last)) + return ''.join(['%s%s%s' % ( + chr(first), + last > first + 1 and '-' or '', + last != first and chr(last) or '' + ) for first, last in result]) # noqa + + return _re.sub( + r'([\000-\040\047])', # \047 for better portability + lambda m: '\\%03o' % ord(m.group(1)), ( + sequentize(result) + .replace('\\', '\\\\') + .replace('[', '\\[') + .replace(']', '\\]') + ) + ) + + def id_literal_(what): + """ Make id_literal like char class """ + match = _re.compile(what).match + result = ''.join([ + chr(c) for c in xrange(127) if not match(chr(c)) + ]) + return '[^%s]' % fix_charclass(result) + + def not_id_literal_(keep): + """ Make negated id_literal like char class """ + match = _re.compile(id_literal_(keep)).match + result = ''.join([ + chr(c) for c in xrange(127) if not match(chr(c)) + ]) + return r'[%s]' % fix_charclass(result) + + not_id_literal = not_id_literal_(r'[a-zA-Z0-9_$]') + preregex1 = r'[(,=:\[!&|?{};\r\n]' + preregex2 = r'%(not_id_literal)sreturn' % locals() + + id_literal = id_literal_(r'[a-zA-Z0-9_$]') + id_literal_open = id_literal_(r'[a-zA-Z0-9_${\[(!+-]') + id_literal_close = id_literal_(r'[a-zA-Z0-9_$}\])"\047+-]') + post_regex_off = id_literal_(r'[^\000-\040}\])?:|,;.&=+-]') + + dull = r'[^\047"`/\000-\040]' + + space_sub_simple = _re.compile(( + # noqa pylint: disable = bad-continuation + + r'(%(dull)s+)' # 0 + r'|(%(strings)s%(dull)s*)' # 1 + r'|(?<=%(preregex1)s)' + r'%(space)s*(?:%(newline)s%(space)s*)*' + r'(%(regex)s)' # 2 + r'(%(space)s*(?:%(newline)s%(space)s*)+' # 3 + r'(?=%(post_regex_off)s))?' + r'|(?<=%(preregex2)s)' + r'%(space)s*(?:(%(newline)s)%(space)s*)*' # 4 + r'(%(regex)s)' # 5 + r'(%(space)s*(?:%(newline)s%(space)s*)+' # 6 + r'(?=%(post_regex_off)s))?' + r'|(?<=%(id_literal_close)s)' + r'%(space)s*(?:(%(newline)s)%(space)s*)+' # 7 + r'(?=%(id_literal_open)s)' + r'|(?<=%(id_literal)s)(%(space)s)+(?=%(id_literal)s)' # 8 + r'|(?<=\+)(%(space)s)+(?=\+)' # 9 + r'|(?<=-)(%(space)s)+(?=-)' # 10 + r'|%(space)s+' + r'|(?:%(newline)s%(space)s*)+' + ) % locals()).sub + + # print space_sub_simple.__self__.pattern + + def space_subber_simple(match): + """ Substitution callback """ + # pylint: disable = too-many-return-statements + + groups = match.groups() + if groups[0]: + return groups[0] + elif groups[1]: + return groups[1] + elif groups[2]: + if groups[3]: + return groups[2] + '\n' + return groups[2] + elif groups[5]: + return "%s%s%s" % ( + groups[4] and '\n' or '', + groups[5], + groups[6] and '\n' or '', + ) + elif groups[7]: + return '\n' + elif groups[8] or groups[9] or groups[10]: + return ' ' + else: + return '' + + space_sub_banged = _re.compile(( + # noqa pylint: disable = bad-continuation + + r'(%(dull)s+)' # 0 + r'|(%(strings)s%(dull)s*)' # 1 + r'|(?<=%(preregex1)s)' + r'(%(space)s*(?:%(newline)s%(space)s*)*)' # 2 + r'(%(regex)s)' # 3 + r'(%(space)s*(?:%(newline)s%(space)s*)+' # 4 + r'(?=%(post_regex_off)s))?' + r'|(?<=%(preregex2)s)' + r'(%(space)s*(?:(%(newline)s)%(space)s*)*)' # 5, 6 + r'(%(regex)s)' # 7 + r'(%(space)s*(?:%(newline)s%(space)s*)+' # 8 + r'(?=%(post_regex_off)s))?' + r'|(?<=%(id_literal_close)s)' + r'(%(space)s*(?:%(newline)s%(space)s*)+)' # 9 + r'(?=%(id_literal_open)s)' + r'|(?<=%(id_literal)s)(%(space)s+)(?=%(id_literal)s)' # 10 + r'|(?<=\+)(%(space)s+)(?=\+)' # 11 + r'|(?<=-)(%(space)s+)(?=-)' # 12 + r'|(%(space)s+)' # 13 + r'|((?:%(newline)s%(space)s*)+)' # 14 + ) % locals()).sub + + # print space_sub_banged.__self__.pattern + + keep = _re.compile(( + r'%(space_chars)s+|%(space_comment_nobang)s+|%(newline)s+' + r'|(%(bang_comment)s+)' + ) % locals()).sub + keeper = lambda m: m.groups()[0] or '' + + # print keep.__self__.pattern + + def space_subber_banged(match): + """ Substitution callback """ + # pylint: disable = too-many-return-statements + + groups = match.groups() + if groups[0]: + return groups[0] + elif groups[1]: + return groups[1] + elif groups[3]: + return "%s%s%s%s" % ( + keep(keeper, groups[2]), + groups[3], + keep(keeper, groups[4] or ''), + groups[4] and '\n' or '', + ) + elif groups[7]: + return "%s%s%s%s%s" % ( + keep(keeper, groups[5]), + groups[6] and '\n' or '', + groups[7], + keep(keeper, groups[8] or ''), + groups[8] and '\n' or '', + ) + elif groups[9]: + return keep(keeper, groups[9]) + '\n' + elif groups[10] or groups[11] or groups[12]: + return keep(keeper, groups[10] or groups[11] or groups[12]) or ' ' + else: + return keep(keeper, groups[13] or groups[14]) + + def jsmin(script, keep_bang_comments=False): + r""" + Minify javascript based on `jsmin.c by Douglas Crockford`_\. + + Instead of parsing the stream char by char, it uses a regular + expression approach which minifies the whole script with one big + substitution regex. + + .. _jsmin.c by Douglas Crockford: + http://www.crockford.com/javascript/jsmin.c + + :Parameters: + `script` : ``str`` + Script to minify + + `keep_bang_comments` : ``bool`` + Keep comments starting with an exclamation mark? (``/*!...*/``) + + :Return: Minified script + :Rtype: ``str`` + """ + # pylint: disable = redefined-outer-name + + if keep_bang_comments: + return space_sub_banged( + space_subber_banged, '\n%s\n' % script + ).strip() + else: + return space_sub_simple( + space_subber_simple, '\n%s\n' % script + ).strip() + + return jsmin + +jsmin = _make_jsmin() + + +def jsmin_for_posers(script, keep_bang_comments=False): + r""" + Minify javascript based on `jsmin.c by Douglas Crockford`_\. + + Instead of parsing the stream char by char, it uses a regular + expression approach which minifies the whole script with one big + substitution regex. + + .. _jsmin.c by Douglas Crockford: + http://www.crockford.com/javascript/jsmin.c + + :Warning: This function is the digest of a _make_jsmin() call. It just + utilizes the resulting regexes. It's here for fun and may + vanish any time. Use the `jsmin` function instead. + + :Parameters: + `script` : ``str`` + Script to minify + + `keep_bang_comments` : ``bool`` + Keep comments starting with an exclamation mark? (``/*!...*/``) + + :Return: Minified script + :Rtype: ``str`` + """ + if not keep_bang_comments: + rex = ( + r'([^\047"/\000-\040]+)|((?:(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]' + r'|\r?\n|\r)[^\047\\\r\n]*)*\047)|(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]' + r'|\r?\n|\r)[^"\\\r\n]*)*"))[^\047"/\000-\040]*)|(?<=[(,=:\[!&|?' + r'{};\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*' + r'][^*]*\*+)*/))*(?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\0' + r'14\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*)*((?:/(?![\r' + r'\n/*])[^/\\\[\r\n]*(?:(?:\\[^\r\n]|(?:\[[^\\\]\r\n]*(?:\\[^\r' + r'\n][^\\\]\r\n]*)*\]))[^/\\\[\r\n]*)*/))((?:[\000-\011\013\014' + r'\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*(?:(?:(?://[^\r' + r'\n]*)?[\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:' + r'[^/*][^*]*\*+)*/))*)+(?=[^\000-\040&)+,.:;=?\]|}-]))?|(?<=[\00' + r'0-#%-,./:-@\[-^`{-~-]return)(?:[\000-\011\013\014\016-\040]|(?' + r':/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*(?:((?:(?://[^\r\n]*)?[\r\n]' + r'))(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*' + r'\*+)*/))*)*((?:/(?![\r\n/*])[^/\\\[\r\n]*(?:(?:\\[^\r\n]|(?:\[' + r'[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\]))[^/\\\[\r\n]*)*/))((' + r'?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)' + r'*/))*(?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\014\016-\04' + r'0]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+(?=[^\000-\040&)+,.:;' + r'=?\]|}-]))?|(?<=[^\000-!#%&(*,./:-@\[\\^`{|~])(?:[\000-\011\01' + r'3\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*(?:((?:(?:' + r'//[^\r\n]*)?[\r\n]))(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]' + r'*\*+(?:[^/*][^*]*\*+)*/))*)+(?=[^\000-\040"#%-\047)*,./:-@\\-^' + r'`|-~])|(?<=[^\000-#%-,./:-@\[-^`{-~-])((?:[\000-\011\013\014\0' + r'16-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?=[^\000-#%-,./' + r':-@\[-^`{-~-])|(?<=\+)((?:[\000-\011\013\014\016-\040]|(?:/\*[' + r'^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?=\+)|(?<=-)((?:[\000-\011\013' + r'\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?=-)|(?:[' + r'\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)' + r')+|(?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\014\016-\040]' + r'|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+' + ) + + def subber(match): + """ Substitution callback """ + groups = match.groups() + return ( + groups[0] or + groups[1] or + (groups[3] and (groups[2] + '\n')) or + groups[2] or + (groups[5] and "%s%s%s" % ( + groups[4] and '\n' or '', + groups[5], + groups[6] and '\n' or '', + )) or + (groups[7] and '\n') or + (groups[8] and ' ') or + (groups[9] and ' ') or + (groups[10] and ' ') or + '' + ) + else: + rex = ( + r'([^\047"/\000-\040]+)|((?:(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]' + r'|\r?\n|\r)[^\047\\\r\n]*)*\047)|(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]' + r'|\r?\n|\r)[^"\\\r\n]*)*"))[^\047"/\000-\040]*)|(?<=[(,=:\[!&|?' + r'{};\r\n])((?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/' + r'*][^*]*\*+)*/))*(?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013' + r'\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*)*)((?:/(?!' + r'[\r\n/*])[^/\\\[\r\n]*(?:(?:\\[^\r\n]|(?:\[[^\\\]\r\n]*(?:\\[^' + r'\r\n][^\\\]\r\n]*)*\]))[^/\\\[\r\n]*)*/))((?:[\000-\011\013\01' + r'4\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*(?:(?:(?://[^' + r'\r\n]*)?[\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(' + r'?:[^/*][^*]*\*+)*/))*)+(?=[^\000-\040&)+,.:;=?\]|}-]))?|(?<=[' + r'\000-#%-,./:-@\[-^`{-~-]return)((?:[\000-\011\013\014\016-\040' + r']|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*(?:((?:(?://[^\r\n]*)?[' + r'\r\n]))(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][' + r'^*]*\*+)*/))*)*)((?:/(?![\r\n/*])[^/\\\[\r\n]*(?:(?:\\[^\r\n]|' + r'(?:\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\]))[^/\\\[\r\n]*)*' + r'/))((?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]' + r'*\*+)*/))*(?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\014\01' + r'6-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+(?=[^\000-\040&)' + r'+,.:;=?\]|}-]))?|(?<=[^\000-!#%&(*,./:-@\[\\^`{|~])((?:[\000-' + r'\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*(?:' + r'(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\014\016-\040]|(?:/' + r'\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+)(?=[^\000-\040"#%-\047)*,./' + r':-@\\-^`|-~])|(?<=[^\000-#%-,./:-@\[-^`{-~-])((?:[\000-\011\01' + r'3\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))+)(?=[^\000' + r'-#%-,./:-@\[-^`{-~-])|(?<=\+)((?:[\000-\011\013\014\016-\040]|' + r'(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))+)(?=\+)|(?<=-)((?:[\000-\0' + r'11\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))+)(?=-' + r')|((?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*' + r'\*+)*/))+)|((?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\014' + r'\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+)' + ) + + keep = _re.compile(( + r'[\000-\011\013\014\016-\040]+|(?:/\*(?!!)[^*]*\*+(?:[^/*][^*]*' + r'\*+)*/)+|(?:(?://[^\r\n]*)?[\r\n])+|((?:/\*![^*]*\*+(?:[^/*][^' + r'*]*\*+)*/)+)' + ) % locals()).sub + keeper = lambda m: m.groups()[0] or '' + + def subber(match): + """ Substitution callback """ + groups = match.groups() + return ( + groups[0] or + groups[1] or + (groups[3] and "%s%s%s%s" % ( + keep(keeper, groups[2]), + groups[3], + keep(keeper, groups[4] or ''), + groups[4] and '\n' or '', + )) or + (groups[7] and "%s%s%s%s%s" % ( + keep(keeper, groups[5]), + groups[6] and '\n' or '', + groups[7], + keep(keeper, groups[8] or ''), + groups[8] and '\n' or '', + )) or + (groups[9] and keep(keeper, groups[9] + '\n')) or + (groups[10] and keep(keeper, groups[10]) or ' ') or + (groups[11] and keep(keeper, groups[11]) or ' ') or + (groups[12] and keep(keeper, groups[12]) or ' ') or + keep(keeper, groups[13] or groups[14]) + ) + + return _re.sub(rex, subber, '\n%s\n' % script).strip() + + +if __name__ == '__main__': + def main(): + """ Main """ + import sys as _sys + + argv = _sys.argv[1:] + keep_bang_comments = '-b' in argv or '-bp' in argv or '-pb' in argv + if '-p' in argv or '-bp' in argv or '-pb' in argv: + xjsmin = _make_jsmin(python_only=True) + else: + xjsmin = jsmin + + _sys.stdout.write(xjsmin( + _sys.stdin.read(), keep_bang_comments=keep_bang_comments + )) + + main() |