aboutsummaryrefslogtreecommitdiff
path: root/catapult/common/py_vulcanize/py_vulcanize/strip_js_comments.py
diff options
context:
space:
mode:
Diffstat (limited to 'catapult/common/py_vulcanize/py_vulcanize/strip_js_comments.py')
-rw-r--r--catapult/common/py_vulcanize/py_vulcanize/strip_js_comments.py81
1 files changed, 81 insertions, 0 deletions
diff --git a/catapult/common/py_vulcanize/py_vulcanize/strip_js_comments.py b/catapult/common/py_vulcanize/py_vulcanize/strip_js_comments.py
new file mode 100644
index 00000000..d63c6675
--- /dev/null
+++ b/catapult/common/py_vulcanize/py_vulcanize/strip_js_comments.py
@@ -0,0 +1,81 @@
+# Copyright 2013 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Utility function for stripping comments out of JavaScript source code."""
+
+import re
+
+
+def _TokenizeJS(text):
+ """Splits source code text into segments in preparation for comment stripping.
+
+ Note that this doesn't tokenize for parsing. There is no notion of statements,
+ variables, etc. The only tokens of interest are comment-related tokens.
+
+ Args:
+ text: The contents of a JavaScript file.
+
+ Yields:
+ A succession of strings in the file, including all comment-related symbols.
+ """
+ rest = text
+ tokens = ['//', '/*', '*/', '\n']
+ next_tok = re.compile('|'.join(re.escape(x) for x in tokens))
+ while len(rest):
+ m = next_tok.search(rest)
+ if not m:
+ # end of string
+ yield rest
+ return
+ min_index = m.start()
+ end_index = m.end()
+
+ if min_index > 0:
+ yield rest[:min_index]
+
+ yield rest[min_index:end_index]
+ rest = rest[end_index:]
+
+
+def StripJSComments(text):
+ """Strips comments out of JavaScript source code.
+
+ Args:
+ text: JavaScript source text.
+
+ Returns:
+ JavaScript source text with comments stripped out.
+ """
+ result_tokens = []
+ token_stream = _TokenizeJS(text).__iter__()
+ while True:
+ try:
+ t = token_stream.next()
+ except StopIteration:
+ break
+
+ if t == '//':
+ while True:
+ try:
+ t2 = token_stream.next()
+ if t2 == '\n':
+ break
+ except StopIteration:
+ break
+ elif t == '/*':
+ nesting = 1
+ while True:
+ try:
+ t2 = token_stream.next()
+ if t2 == '/*':
+ nesting += 1
+ elif t2 == '*/':
+ nesting -= 1
+ if nesting == 0:
+ break
+ except StopIteration:
+ break
+ else:
+ result_tokens.append(t)
+ return ''.join(result_tokens)