Cleanup of the FormatToken interface

Don't rely upon the original "pytree node" object. This helps isolate the lib2to3 objects from yapf.
author: Bill Wendling <morbo@google.com> 2021-11-08 01:36:48 -0800
committer: Bill Wendling <morbo@google.com> 2021-11-08 01:36:48 -0800
commit: e9dd4e5f5c2f7630e760afedd9076c87b0e35296 (patch)
tree: eb92e5a882a9213532923f13909ebcd7f195def5
parent: 9d6808a3d9b9a0ea40efea0c7cc3247464dbc291 (diff)
download: yapf-e9dd4e5f5c2f7630e760afedd9076c87b0e35296.tar.gz
6 files changed, 55 insertions, 90 deletions
diff --git a/CHANGELOG b/CHANGELOG
index ecbda46..13e03a7 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -13,6 +13,8 @@
 - Change tests to support "pytest".
 - Reformat so that "flake8" is happy.
 - Use GitHub Actions instead of Travis for CI.
+- Clean up the FormatToken interface to limit how much it relies upon the
+  pytree node object.
 ### Fixed
 - Enable `BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF` knob for "pep8" style, so
   method definitions inside a class are surrounded by a single blank line as
diff --git a/yapf/yapflib/format_decision_state.py b/yapf/yapflib/format_decision_state.py
index 3c0db37..6c3c0fd 100644
--- a/yapf/yapflib/format_decision_state.py
+++ b/yapf/yapflib/format_decision_state.py
@@ -132,7 +132,7 @@ class FormatDecisionState(object):
     current = self.next_token
     previous = current.previous_token
 
-    if current.is_pseudo_paren:
+    if current.is_pseudo:
       return False
 
     if (not must_split and
@@ -171,7 +171,7 @@ class FormatDecisionState(object):
     current = self.next_token
     previous = current.previous_token
 
-    if current.is_pseudo_paren:
+    if current.is_pseudo:
       return False
 
     if current.must_break_before:
@@ -268,7 +268,7 @@ class FormatDecisionState(object):
             token = token.next_token
         return False
 
-      if (previous.value == '(' and not previous.is_pseudo_paren and
+      if (previous.value == '(' and not previous.is_pseudo and
           not unwrapped_line.IsSurroundedByBrackets(previous)):
         pptoken = previous.previous_token
         if (pptoken and not pptoken.is_name and not pptoken.is_keyword and
@@ -350,7 +350,7 @@ class FormatDecisionState(object):
       return True
 
     if (format_token.Subtype.DICTIONARY_VALUE in current.subtypes or
-        (previous.is_pseudo_paren and previous.value == '(' and
+        (previous.is_pseudo and previous.value == '(' and
          not current.is_comment)):
       # Split before the dictionary value if we can't fit every dictionary
       # entry on its own line.
@@ -674,7 +674,7 @@ class FormatDecisionState(object):
       # Don't penalize for a must split.
       return penalty
 
-    if previous.is_pseudo_paren and previous.value == '(':
+    if previous.is_pseudo and previous.value == '(':
       # Small penalty for splitting after a pseudo paren.
       penalty += 50
 
@@ -734,7 +734,7 @@ class FormatDecisionState(object):
     if is_multiline_string:
       # This is a multiline string. Only look at the first line.
       self.column += len(current.value.split('\n')[0])
-    elif not current.is_pseudo_paren:
+    elif not current.is_pseudo:
       self.column += len(current.value)
 
     self.next_token = self.next_token.next_token
@@ -965,7 +965,7 @@ class FormatDecisionState(object):
       return previous.column
 
     if style.Get('INDENT_DICTIONARY_VALUE'):
-      if previous and (previous.value == ':' or previous.is_pseudo_paren):
+      if previous and (previous.value == ':' or previous.is_pseudo):
         if format_token.Subtype.DICTIONARY_VALUE in current.subtypes:
           return top_of_stack.indent
 
@@ -993,7 +993,7 @@ class FormatDecisionState(object):
   def _FitsOnLine(self, start, end):
     """Determines if line between start and end can fit on the current line."""
     length = end.total_length - start.total_length
-    if not start.is_pseudo_paren:
+    if not start.is_pseudo:
       length += len(start.value)
     return length + self.column <= self.column_limit
 
@@ -1008,7 +1008,7 @@ class FormatDecisionState(object):
 
     def ImplicitStringConcatenation(tok):
       num_strings = 0
-      if tok.is_pseudo_paren:
+      if tok.is_pseudo:
         tok = tok.next_token
       while tok.is_string:
         num_strings += 1
@@ -1021,7 +1021,7 @@ class FormatDecisionState(object):
         return False
       colon = opening.previous_token
       while colon:
-        if not colon.is_pseudo_paren:
+        if not colon.is_pseudo:
           break
         colon = colon.previous_token
       if not colon or colon.value != ':':
@@ -1048,7 +1048,7 @@ class FormatDecisionState(object):
         entry_start = current
       if current.OpensScope():
         if ((current.value == '{' or
-             (current.is_pseudo_paren and current.next_token.value == '{') and
+             (current.is_pseudo and current.next_token.value == '{') and
              format_token.Subtype.DICTIONARY_VALUE in current.subtypes) or
             ImplicitStringConcatenation(current)):
           # A dictionary entry that cannot fit on a single line shouldn't matter
@@ -1141,13 +1141,13 @@ def _IsArgumentToFunction(token):
 
 def _GetOpeningBracket(current):
   """Get the opening bracket containing the current token."""
-  if current.matching_bracket and not current.is_pseudo_paren:
+  if current.matching_bracket and not current.is_pseudo:
     return current if current.OpensScope() else current.matching_bracket
 
   while current:
     if current.ClosesScope():
       current = current.matching_bracket
-    elif current.is_pseudo_paren:
+    elif current.is_pseudo:
       current = current.previous_token
     elif current.OpensScope():
       return current
diff --git a/yapf/yapflib/format_token.py b/yapf/yapflib/format_token.py
index 92c26ee..0f3cb60 100644
--- a/yapf/yapflib/format_token.py
+++ b/yapf/yapflib/format_token.py
@@ -131,18 +131,27 @@ class FormatToken(object):
     self.whitespace_prefix = ''
     self.can_break_before = False
     self.must_break_before = False
-    self.total_length = 0  # TODO(morbo): Think up a better name.
+    self.total_length = 0
     self.split_penalty = 0
 
+    self.type = node.type
+    self.column = node.column
+    self.lineno = node.lineno
+
+    self.spaces_required_before = 0
     if self.is_comment:
       self.spaces_required_before = style.Get('SPACES_BEFORE_COMMENT')
-    else:
-      self.spaces_required_before = 0
 
+    self.value = node.value
     if self.is_continuation:
-      self.value = self.node.value.rstrip()
-    else:
-      self.value = self.node.value
+      self.value = node.value.rstrip()
+
+    subtypes = pytree_utils.GetNodeAnnotation(node,
+                                              pytree_utils.Annotation.SUBTYPE)
+    self.subtypes = [Subtype.NONE] if subtypes is None else subtypes
+    self.name = pytree_utils.NodeName(node)
+    self.is_pseudo = hasattr(node, 'is_pseudo') and node.is_pseudo
+    self.is_docstring = self.is_multiline_string and not node.prev_sibling
 
   @property
   def formatted_whitespace_prefix(self):
@@ -176,10 +185,10 @@ class FormatToken(object):
 
     if self.is_comment:
       comment_lines = [s.lstrip() for s in self.value.splitlines()]
-      self.node.value = ('\n' + indent_before).join(comment_lines)
+      self.value = ('\n' + indent_before).join(comment_lines)
 
       # Update our own value since we are changing node value
-      self.value = self.node.value
+      self.value = self.value
 
     if not self.whitespace_prefix:
       self.whitespace_prefix = ('\n' * (self.newlines or newlines_before) +
@@ -198,7 +207,7 @@ class FormatToken(object):
     if not previous:
       return
 
-    if previous.is_pseudo_paren:
+    if previous.is_pseudo:
       previous = previous.previous_token
       if not previous:
         return
@@ -209,17 +218,17 @@ class FormatToken(object):
       prev_lineno += previous.value.count('\n')
 
     if (cur_lineno != prev_lineno or
-        (previous.is_pseudo_paren and previous.value != ')' and
+        (previous.is_pseudo and previous.value != ')' and
          cur_lineno != previous.previous_token.lineno)):
       self.spaces_required_before = (
           self.column - first_column + depth * style.Get('INDENT_WIDTH'))
       return
 
-    cur_column = self.node.column
+    cur_column = self.column
     prev_column = previous.node.column
     prev_len = len(previous.value)
 
-    if previous.is_pseudo_paren and previous.value == ')':
+    if previous.is_pseudo and previous.value == ')':
       prev_column -= 1
       prev_len = 0
 
@@ -239,11 +248,10 @@ class FormatToken(object):
   def __repr__(self):
     msg = 'FormatToken(name={0}, value={1}, lineno={2}'.format(
         self.name, self.value, self.lineno)
-    msg += ', pseudo)' if self.is_pseudo_paren else ')'
+    msg += ', pseudo)' if self.is_pseudo else ')'
     return msg
 
   @property
-  @py3compat.lru_cache()
   def node_split_penalty(self):
     """Split penalty attached to the pytree node of this token."""
     return pytree_utils.GetNodeAnnotation(
@@ -262,72 +270,42 @@ class FormatToken(object):
                                           pytree_utils.Annotation.MUST_SPLIT)
 
   @property
-  def column(self):
-    """The original column number of the node in the source."""
-    return self.node.column
-
-  @property
-  def lineno(self):
-    """The original line number of the node in the source."""
-    return self.node.lineno
-
-  @property
-  @py3compat.lru_cache()
-  def subtypes(self):
-    """Extra type information for directing formatting."""
-    value = pytree_utils.GetNodeAnnotation(self.node,
-                                           pytree_utils.Annotation.SUBTYPE)
-    return [Subtype.NONE] if value is None else value
-
-  @property
-  @py3compat.lru_cache()
   def is_binary_op(self):
     """Token is a binary operator."""
     return Subtype.BINARY_OPERATOR in self.subtypes
 
   @property
-  @py3compat.lru_cache()
   def is_a_expr_op(self):
     """Token is an a_expr operator."""
     return Subtype.A_EXPR_OPERATOR in self.subtypes
 
   @property
-  @py3compat.lru_cache()
   def is_m_expr_op(self):
     """Token is an m_expr operator."""
     return Subtype.M_EXPR_OPERATOR in self.subtypes
 
   @property
-  @py3compat.lru_cache()
   def is_arithmetic_op(self):
     """Token is an arithmetic operator."""
     return self.is_a_expr_op or self.is_m_expr_op
 
   @property
-  @py3compat.lru_cache()
   def is_simple_expr(self):
     """Token is an operator in a simple expression."""
     return Subtype.SIMPLE_EXPRESSION in self.subtypes
 
   @property
-  @py3compat.lru_cache()
   def is_subscript_colon(self):
     """Token is a subscript colon."""
     return Subtype.SUBSCRIPT_COLON in self.subtypes
 
   @property
-  @py3compat.lru_cache()
-  def name(self):
-    """A string representation of the node's name."""
-    return pytree_utils.NodeName(self.node)
-
-  @property
   def is_comment(self):
-    return self.node.type == token.COMMENT
+    return self.type == token.COMMENT
 
   @property
   def is_continuation(self):
-    return self.node.type == CONTINUATION
+    return self.type == CONTINUATION
 
   @property
   @py3compat.lru_cache()
@@ -335,20 +313,18 @@ class FormatToken(object):
     return keyword.iskeyword(self.value)
 
   @property
-  @py3compat.lru_cache()
   def is_name(self):
-    return self.node.type == token.NAME and not self.is_keyword
+    return self.type == token.NAME and not self.is_keyword
 
   @property
   def is_number(self):
-    return self.node.type == token.NUMBER
+    return self.type == token.NUMBER
 
   @property
   def is_string(self):
-    return self.node.type == token.STRING
+    return self.type == token.STRING
 
   @property
-  @py3compat.lru_cache()
   def is_multiline_string(self):
     """Test if this string is a multiline string.
 
@@ -360,16 +336,6 @@ class FormatToken(object):
     return self.is_string and self.value.endswith(('"""', "'''"))
 
   @property
-  @py3compat.lru_cache()
-  def is_docstring(self):
-    return self.is_multiline_string and not self.node.prev_sibling
-
-  @property
-  @py3compat.lru_cache()
-  def is_pseudo_paren(self):
-    return hasattr(self.node, 'is_pseudo') and self.node.is_pseudo
-
-  @property
   def is_pylint_comment(self):
     return self.is_comment and re.match(r'#.*\bpylint:\s*(disable|enable)=',
                                         self.value)
@@ -381,5 +347,5 @@ class FormatToken(object):
 
   @property
   def is_copybara_comment(self):
-    return self.is_comment and re.match(r'#.*\bcopybara:(strip|insert|replace)',
-                                        self.value)
+    return self.is_comment and re.match(
+        r'#.*\bcopybara:\s*(strip|insert|replace)', self.value)
diff --git a/yapf/yapflib/reformatter.py b/yapf/yapflib/reformatter.py
index 6857b10..91cad92 100644
--- a/yapf/yapflib/reformatter.py
+++ b/yapf/yapflib/reformatter.py
@@ -135,7 +135,7 @@ def _RetainRequiredVerticalSpacingBetweenTokens(cur_tok, prev_tok, lines):
 
   if prev_tok.is_string:
     prev_lineno = prev_tok.lineno + prev_tok.value.count('\n')
-  elif prev_tok.is_pseudo_paren:
+  elif prev_tok.is_pseudo:
     if not prev_tok.previous_token.is_multiline_string:
       prev_lineno = prev_tok.previous_token.lineno
     else:
@@ -400,7 +400,7 @@ def _FormatFinalLines(final_lines, verify):
   for line in final_lines:
     formatted_line = []
     for tok in line.tokens:
-      if not tok.is_pseudo_paren:
+      if not tok.is_pseudo:
         formatted_line.append(tok.formatted_whitespace_prefix)
         formatted_line.append(tok.value)
       elif (not tok.next_token.whitespace_prefix.startswith('\n') and
diff --git a/yapf/yapflib/unwrapped_line.py b/yapf/yapflib/unwrapped_line.py
index ddd50e0..215d0cd 100644
--- a/yapf/yapflib/unwrapped_line.py
+++ b/yapf/yapflib/unwrapped_line.py
@@ -74,7 +74,7 @@ class UnwrappedLine(object):
           _SpaceRequiredBetween(prev_token, token, self.disable)):
         token.spaces_required_before = 1
 
-      tok_len = len(token.value) if not token.is_pseudo_paren else 0
+      tok_len = len(token.value) if not token.is_pseudo else 0
 
       spaces_required_before = token.spaces_required_before
       if isinstance(spaces_required_before, list):
@@ -271,11 +271,11 @@ def _SpaceRequiredBetween(left, right, is_line_disabled):
   """Return True if a space is required between the left and right token."""
   lval = left.value
   rval = right.value
-  if (left.is_pseudo_paren and _IsIdNumberStringToken(right) and
+  if (left.is_pseudo and _IsIdNumberStringToken(right) and
       left.previous_token and _IsIdNumberStringToken(left.previous_token)):
     # Space between keyword... tokens and pseudo parens.
     return True
-  if left.is_pseudo_paren or right.is_pseudo_paren:
+  if left.is_pseudo or right.is_pseudo:
     # There should be a space after the ':' in a dictionary.
     if left.OpensScope():
       return True
@@ -484,7 +484,7 @@ def _SpaceRequiredBetween(left, right, is_line_disabled):
 def _MustBreakBefore(prev_token, cur_token):
   """Return True if a line break is required before the current token."""
   if prev_token.is_comment or (prev_token.previous_token and
-                               prev_token.is_pseudo_paren and
+                               prev_token.is_pseudo and
                                prev_token.previous_token.is_comment):
     # Must break if the previous token was a comment.
     return True
diff --git a/yapftests/reformatter_basic_test.py b/yapftests/reformatter_basic_test.py
index 185d068..539aa10 100644
--- a/yapftests/reformatter_basic_test.py
+++ b/yapftests/reformatter_basic_test.py
@@ -110,8 +110,7 @@ class BasicReformatterTest(yapf_test_helper.YAPFTest):
     style.SetGlobalStyle(
         style.CreateStyleFromConfig(
             '{split_all_top_level_comma_separated_values: true, '
-            'column_limit: 40}'
-        ))
+            'column_limit: 40}'))
     # Works the same way as split_all_comma_separated_values
     unformatted_code = textwrap.dedent("""\
           responseDict = {"timestamp": timestamp, "someValue":   value, "whatever": 120}
@@ -282,8 +281,7 @@ class BasicReformatterTest(yapf_test_helper.YAPFTest):
       style.SetGlobalStyle(
           style.CreateStyleFromConfig(
               '{based_on_style: yapf, '
-              'blank_lines_between_top_level_imports_and_variables: 2}'
-          ))
+              'blank_lines_between_top_level_imports_and_variables: 2}'))
       uwlines = yapf_test_helper.ParseAndUnwrap(unformatted_code)
       self.assertCodeEqual(expected_formatted_code,
                            reformatter.Reformat(uwlines))
@@ -1997,10 +1995,9 @@ class A(object):
 
     try:
       style.SetGlobalStyle(
-          style.CreateStyleFromConfig(
-              '{based_on_style: pep8, indent_width: 2, '
-              'continuation_indent_width: 4, '
-              'indent_dictionary_value: True}'))
+          style.CreateStyleFromConfig('{based_on_style: pep8, indent_width: 2, '
+                                      'continuation_indent_width: 4, '
+                                      'indent_dictionary_value: True}'))
 
       uwlines = yapf_test_helper.ParseAndUnwrap(code)
       reformatted_code = reformatter.Reformat(uwlines)
author	Bill Wendling <morbo@google.com>	2021-11-08 01:36:48 -0800
committer	Bill Wendling <morbo@google.com>	2021-11-08 01:36:48 -0800
commit	e9dd4e5f5c2f7630e760afedd9076c87b0e35296 (patch)
tree	eb92e5a882a9213532923f13909ebcd7f195def5
parent	9d6808a3d9b9a0ea40efea0c7cc3247464dbc291 (diff)
download	yapf-e9dd4e5f5c2f7630e760afedd9076c87b0e35296.tar.gz