diff options
Diffstat (limited to 'catapult/common/py_vulcanize/py_vulcanize/strip_js_comments.py')
-rw-r--r-- | catapult/common/py_vulcanize/py_vulcanize/strip_js_comments.py | 81 |
1 files changed, 81 insertions, 0 deletions
diff --git a/catapult/common/py_vulcanize/py_vulcanize/strip_js_comments.py b/catapult/common/py_vulcanize/py_vulcanize/strip_js_comments.py new file mode 100644 index 00000000..d63c6675 --- /dev/null +++ b/catapult/common/py_vulcanize/py_vulcanize/strip_js_comments.py @@ -0,0 +1,81 @@ +# Copyright 2013 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""Utility function for stripping comments out of JavaScript source code.""" + +import re + + +def _TokenizeJS(text): + """Splits source code text into segments in preparation for comment stripping. + + Note that this doesn't tokenize for parsing. There is no notion of statements, + variables, etc. The only tokens of interest are comment-related tokens. + + Args: + text: The contents of a JavaScript file. + + Yields: + A succession of strings in the file, including all comment-related symbols. + """ + rest = text + tokens = ['//', '/*', '*/', '\n'] + next_tok = re.compile('|'.join(re.escape(x) for x in tokens)) + while len(rest): + m = next_tok.search(rest) + if not m: + # end of string + yield rest + return + min_index = m.start() + end_index = m.end() + + if min_index > 0: + yield rest[:min_index] + + yield rest[min_index:end_index] + rest = rest[end_index:] + + +def StripJSComments(text): + """Strips comments out of JavaScript source code. + + Args: + text: JavaScript source text. + + Returns: + JavaScript source text with comments stripped out. + """ + result_tokens = [] + token_stream = _TokenizeJS(text).__iter__() + while True: + try: + t = token_stream.next() + except StopIteration: + break + + if t == '//': + while True: + try: + t2 = token_stream.next() + if t2 == '\n': + break + except StopIteration: + break + elif t == '/*': + nesting = 1 + while True: + try: + t2 = token_stream.next() + if t2 == '/*': + nesting += 1 + elif t2 == '*/': + nesting -= 1 + if nesting == 0: + break + except StopIteration: + break + else: + result_tokens.append(t) + return ''.join(result_tokens) |