1 files changed, 81 insertions, 0 deletions
diff --git a/catapult/common/py_vulcanize/py_vulcanize/strip_js_comments.py b/catapult/common/py_vulcanize/py_vulcanize/strip_js_comments.py
new file mode 100644
index 00000000..d63c6675
--- /dev/null
+++ b/catapult/common/py_vulcanize/py_vulcanize/strip_js_comments.py
@@ -0,0 +1,81 @@
+# Copyright 2013 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Utility function for stripping comments out of JavaScript source code."""
+
+import re
+
+
+def _TokenizeJS(text):
+  """Splits source code text into segments in preparation for comment stripping.
+
+  Note that this doesn't tokenize for parsing. There is no notion of statements,
+  variables, etc. The only tokens of interest are comment-related tokens.
+
+  Args:
+    text: The contents of a JavaScript file.
+
+  Yields:
+    A succession of strings in the file, including all comment-related symbols.
+  """
+  rest = text
+  tokens = ['//', '/*', '*/', '\n']
+  next_tok = re.compile('|'.join(re.escape(x) for x in tokens))
+  while len(rest):
+    m = next_tok.search(rest)
+    if not m:
+      # end of string
+      yield rest
+      return
+    min_index = m.start()
+    end_index = m.end()
+
+    if min_index > 0:
+      yield rest[:min_index]
+
+    yield rest[min_index:end_index]
+    rest = rest[end_index:]
+
+
+def StripJSComments(text):
+  """Strips comments out of JavaScript source code.
+
+  Args:
+    text: JavaScript source text.
+
+  Returns:
+    JavaScript source text with comments stripped out.
+  """
+  result_tokens = []
+  token_stream = _TokenizeJS(text).__iter__()
+  while True:
+    try:
+      t = token_stream.next()
+    except StopIteration:
+      break
+
+    if t == '//':
+      while True:
+        try:
+          t2 = token_stream.next()
+          if t2 == '\n':
+            break
+        except StopIteration:
+          break
+    elif t == '/*':
+      nesting = 1
+      while True:
+        try:
+          t2 = token_stream.next()
+          if t2 == '/*':
+            nesting += 1
+          elif t2 == '*/':
+            nesting -= 1
+            if nesting == 0:
+              break
+        except StopIteration:
+          break
+    else:
+      result_tokens.append(t)
+  return ''.join(result_tokens)