diff options
Diffstat (limited to 'yapf/yapflib')
-rw-r--r-- | yapf/yapflib/blank_line_calculator.py | 33 | ||||
-rw-r--r-- | yapf/yapflib/comment_splicer.py | 21 | ||||
-rw-r--r-- | yapf/yapflib/errors.py | 25 | ||||
-rw-r--r-- | yapf/yapflib/file_resources.py | 155 | ||||
-rw-r--r-- | yapf/yapflib/format_decision_state.py | 427 | ||||
-rw-r--r-- | yapf/yapflib/format_token.py | 234 | ||||
-rw-r--r-- | yapf/yapflib/identify_container.py | 6 | ||||
-rw-r--r-- | yapf/yapflib/line_joiner.py | 8 | ||||
-rw-r--r-- | yapf/yapflib/logical_line.py (renamed from yapf/yapflib/unwrapped_line.py) | 334 | ||||
-rw-r--r-- | yapf/yapflib/object_state.py | 157 | ||||
-rw-r--r-- | yapf/yapflib/py3compat.py | 21 | ||||
-rw-r--r-- | yapf/yapflib/pytree_unwrapper.py | 117 | ||||
-rw-r--r-- | yapf/yapflib/pytree_utils.py | 16 | ||||
-rw-r--r-- | yapf/yapflib/pytree_visitor.py | 2 | ||||
-rw-r--r-- | yapf/yapflib/reformatter.py | 469 | ||||
-rw-r--r-- | yapf/yapflib/split_penalty.py | 249 | ||||
-rw-r--r-- | yapf/yapflib/style.py | 314 | ||||
-rw-r--r-- | yapf/yapflib/subtype_assigner.py | 253 | ||||
-rw-r--r-- | yapf/yapflib/subtypes.py | 40 | ||||
-rw-r--r-- | yapf/yapflib/yapf_api.py | 132 |
20 files changed, 2161 insertions, 852 deletions
diff --git a/yapf/yapflib/blank_line_calculator.py b/yapf/yapflib/blank_line_calculator.py index c239ee7..3d78646 100644 --- a/yapf/yapflib/blank_line_calculator.py +++ b/yapf/yapflib/blank_line_calculator.py @@ -22,6 +22,8 @@ Annotations: newlines: The number of newlines required before the node. """ +from lib2to3.pgen2 import token as grammar_token + from yapf.yapflib import py3compat from yapf.yapflib import pytree_utils from yapf.yapflib import pytree_visitor @@ -64,15 +66,15 @@ class _BlankLineCalculator(pytree_visitor.PyTreeVisitor): def Visit_simple_stmt(self, node): # pylint: disable=invalid-name self.DefaultNodeVisit(node) - if pytree_utils.NodeName(node.children[0]) == 'COMMENT': + if node.children[0].type == grammar_token.COMMENT: self.last_comment_lineno = node.children[0].lineno def Visit_decorator(self, node): # pylint: disable=invalid-name if (self.last_comment_lineno and self.last_comment_lineno == node.children[0].lineno - 1): - self._SetNumNewlines(node.children[0], _NO_BLANK_LINES) + _SetNumNewlines(node.children[0], _NO_BLANK_LINES) else: - self._SetNumNewlines(node.children[0], self._GetNumNewlines(node)) + _SetNumNewlines(node.children[0], self._GetNumNewlines(node)) for child in node.children: self.Visit(child) self.last_was_decorator = True @@ -93,7 +95,7 @@ class _BlankLineCalculator(pytree_visitor.PyTreeVisitor): if _AsyncFunction(node): index = self._SetBlankLinesBetweenCommentAndClassFunc( node.prev_sibling.parent) - self._SetNumNewlines(node.children[0], None) + _SetNumNewlines(node.children[0], None) else: index = self._SetBlankLinesBetweenCommentAndClassFunc(node) self.last_was_decorator = False @@ -115,7 +117,7 @@ class _BlankLineCalculator(pytree_visitor.PyTreeVisitor): if self.last_was_class_or_function: if pytree_utils.NodeName(node) in _PYTHON_STATEMENTS: leaf = pytree_utils.FirstLeafNode(node) - self._SetNumNewlines(leaf, self._GetNumNewlines(leaf)) + _SetNumNewlines(leaf, self._GetNumNewlines(leaf)) self.last_was_class_or_function = False super(_BlankLineCalculator, self).DefaultNodeVisit(node) @@ -137,17 +139,17 @@ class _BlankLineCalculator(pytree_visitor.PyTreeVisitor): # node as its only child. self.Visit(node.children[index].children[0]) if not self.last_was_decorator: - self._SetNumNewlines(node.children[index].children[0], _ONE_BLANK_LINE) + _SetNumNewlines(node.children[index].children[0], _ONE_BLANK_LINE) index += 1 - if (index and node.children[index].lineno - - 1 == node.children[index - 1].children[0].lineno): - self._SetNumNewlines(node.children[index], _NO_BLANK_LINES) + if (index and node.children[index].lineno - 1 + == node.children[index - 1].children[0].lineno): + _SetNumNewlines(node.children[index], _NO_BLANK_LINES) else: if self.last_comment_lineno + 1 == node.children[index].lineno: num_newlines = _NO_BLANK_LINES else: num_newlines = self._GetNumNewlines(node) - self._SetNumNewlines(node.children[index], num_newlines) + _SetNumNewlines(node.children[index], num_newlines) return index def _GetNumNewlines(self, node): @@ -157,15 +159,16 @@ class _BlankLineCalculator(pytree_visitor.PyTreeVisitor): return 1 + style.Get('BLANK_LINES_AROUND_TOP_LEVEL_DEFINITION') return _ONE_BLANK_LINE - def _SetNumNewlines(self, node, num_newlines): - pytree_utils.SetNodeAnnotation(node, pytree_utils.Annotation.NEWLINES, - num_newlines) - def _IsTopLevel(self, node): return (not (self.class_level or self.function_level) and _StartsInZerothColumn(node)) +def _SetNumNewlines(node, num_newlines): + pytree_utils.SetNodeAnnotation(node, pytree_utils.Annotation.NEWLINES, + num_newlines) + + def _StartsInZerothColumn(node): return (pytree_utils.FirstLeafNode(node).column == 0 or (_AsyncFunction(node) and node.prev_sibling.column == 0)) @@ -173,4 +176,4 @@ def _StartsInZerothColumn(node): def _AsyncFunction(node): return (py3compat.PY3 and node.prev_sibling and - pytree_utils.NodeName(node.prev_sibling) == 'ASYNC') + node.prev_sibling.type == grammar_token.ASYNC) diff --git a/yapf/yapflib/comment_splicer.py b/yapf/yapflib/comment_splicer.py index af999d2..535711b 100644 --- a/yapf/yapflib/comment_splicer.py +++ b/yapf/yapflib/comment_splicer.py @@ -44,6 +44,7 @@ def SpliceComments(tree): _AnnotateIndents(tree) def _VisitNodeRec(node): + """Recursively visit each node to splice comments into the AST.""" # This loop may insert into node.children, so we'll iterate over a copy. for child in node.children[:]: if isinstance(child, pytree.Node): @@ -119,9 +120,9 @@ def SpliceComments(tree): for comment_column, comment_indent, comment_group in comment_groups: ancestor_at_indent = _FindAncestorAtIndent(child, comment_indent) if ancestor_at_indent.type == token.DEDENT: - InsertNodes = pytree_utils.InsertNodesBefore # pylint: disable=invalid-name + InsertNodes = pytree_utils.InsertNodesBefore # pylint: disable=invalid-name # noqa else: - InsertNodes = pytree_utils.InsertNodesAfter # pylint: disable=invalid-name + InsertNodes = pytree_utils.InsertNodesAfter # pylint: disable=invalid-name # noqa InsertNodes( _CreateCommentsFromPrefix( '\n'.join(comment_group) + '\n', @@ -152,6 +153,16 @@ def SpliceComments(tree): # parent to insert into. See comments above # _STANDALONE_LINE_NODES for more details. node_with_line_parent = _FindNodeWithStandaloneLineParent(child) + + if pytree_utils.NodeName( + node_with_line_parent.parent) in {'funcdef', 'classdef'}: + # Keep a comment that's not attached to a function or class + # next to the object it is attached to. + comment_end = ( + comment_lineno + comment_prefix.rstrip('\n').count('\n')) + if comment_end < node_with_line_parent.lineno - 1: + node_with_line_parent = node_with_line_parent.parent + pytree_utils.InsertNodesBefore( _CreateCommentsFromPrefix( comment_prefix, comment_lineno, 0, standalone=True), @@ -177,8 +188,8 @@ def SpliceComments(tree): rindex = (0 if '\n' not in comment_prefix.rstrip() else comment_prefix.rstrip().rindex('\n') + 1) comment_column = ( - len(comment_prefix[rindex:]) - len( - comment_prefix[rindex:].lstrip())) + len(comment_prefix[rindex:]) - + len(comment_prefix[rindex:].lstrip())) comments = _CreateCommentsFromPrefix( comment_prefix, comment_lineno, @@ -250,7 +261,7 @@ def _CreateCommentsFromPrefix(comment_prefix, # When splicing a standalone comment (i.e. a comment that appears on its own # line, not on the same line with other code), it's important to insert it into # an appropriate parent of the node it's attached to. An appropriate parent -# is the first "standaline line node" in the parent chain of a node. +# is the first "standalone line node" in the parent chain of a node. _STANDALONE_LINE_NODES = frozenset([ 'suite', 'if_stmt', 'while_stmt', 'for_stmt', 'try_stmt', 'with_stmt', 'funcdef', 'classdef', 'decorated', 'file_input' diff --git a/yapf/yapflib/errors.py b/yapf/yapflib/errors.py index 3946275..99e88d9 100644 --- a/yapf/yapflib/errors.py +++ b/yapf/yapflib/errors.py @@ -11,7 +11,30 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""YAPF error object.""" +"""YAPF error objects.""" + +from lib2to3.pgen2 import tokenize + + +def FormatErrorMsg(e): + """Convert an exception into a standard format. + + The standard error message format is: + + <filename>:<lineno>:<column>: <msg> + + Arguments: + e: An exception. + + Returns: + A properly formatted error message string. + """ + if isinstance(e, SyntaxError): + return '{}:{}:{}: {}'.format(e.filename, e.lineno, e.offset, e.msg) + if isinstance(e, tokenize.TokenError): + return '{}:{}:{}: {}'.format(e.filename, e.args[1][0], e.args[1][1], + e.args[0]) + return '{}:{}:{}: {}'.format(e.args[1][0], e.args[1][1], e.args[1][2], e.msg) class YapfError(Exception): diff --git a/yapf/yapflib/file_resources.py b/yapf/yapflib/file_resources.py index 6e7202d..972f483 100644 --- a/yapf/yapflib/file_resources.py +++ b/yapf/yapflib/file_resources.py @@ -32,16 +32,79 @@ LF = '\n' CRLF = '\r\n' -def GetDefaultStyleForDir(dirname): +def _GetExcludePatternsFromYapfIgnore(filename): + """Get a list of file patterns to ignore from .yapfignore.""" + ignore_patterns = [] + if os.path.isfile(filename) and os.access(filename, os.R_OK): + with open(filename, 'r') as fd: + for line in fd: + if line.strip() and not line.startswith('#'): + ignore_patterns.append(line.strip()) + + if any(e.startswith('./') for e in ignore_patterns): + raise errors.YapfError('path in .yapfignore should not start with ./') + + return ignore_patterns + + +def _GetExcludePatternsFromPyprojectToml(filename): + """Get a list of file patterns to ignore from pyproject.toml.""" + ignore_patterns = [] + try: + import toml + except ImportError: + raise errors.YapfError( + "toml package is needed for using pyproject.toml as a " + "configuration file") + + if os.path.isfile(filename) and os.access(filename, os.R_OK): + pyproject_toml = toml.load(filename) + ignore_patterns = pyproject_toml.get('tool', + {}).get('yapfignore', + {}).get('ignore_patterns', []) + if any(e.startswith('./') for e in ignore_patterns): + raise errors.YapfError('path in pyproject.toml should not start with ./') + + return ignore_patterns + + +def GetExcludePatternsForDir(dirname): + """Return patterns of files to exclude from ignorefile in a given directory. + + Looks for .yapfignore in the directory dirname. + + Arguments: + dirname: (unicode) The name of the directory. + + Returns: + A List of file patterns to exclude if ignore file is found, otherwise empty + List. + """ + ignore_patterns = [] + + yapfignore_file = os.path.join(dirname, '.yapfignore') + if os.path.exists(yapfignore_file): + ignore_patterns += _GetExcludePatternsFromYapfIgnore(yapfignore_file) + + pyproject_toml_file = os.path.join(dirname, 'pyproject.toml') + if os.path.exists(pyproject_toml_file): + ignore_patterns += _GetExcludePatternsFromPyprojectToml(pyproject_toml_file) + return ignore_patterns + + +def GetDefaultStyleForDir(dirname, default_style=style.DEFAULT_STYLE): """Return default style name for a given directory. - Looks for .style.yapf or setup.cfg in the parent directories. + Looks for .style.yapf or setup.cfg or pyproject.toml in the parent + directories. Arguments: dirname: (unicode) The name of the directory. + default_style: The style to return if nothing is found. Defaults to the + global default style ('pep8') unless otherwise specified. Returns: - The filename if found, otherwise return the global default (pep8). + The filename if found, otherwise return the default style. """ dirname = os.path.abspath(dirname) while True: @@ -52,23 +115,47 @@ def GetDefaultStyleForDir(dirname): # See if we have a setup.cfg file with a '[yapf]' section. config_file = os.path.join(dirname, style.SETUP_CONFIG) - if os.path.exists(config_file): - with open(config_file) as fd: + try: + fd = open(config_file) + except IOError: + pass # It's okay if it's not there. + else: + with fd: config = py3compat.ConfigParser() config.read_file(fd) if config.has_section('yapf'): return config_file - dirname = os.path.dirname(dirname) + # See if we have a pyproject.toml file with a '[tool.yapf]' section. + config_file = os.path.join(dirname, style.PYPROJECT_TOML) + try: + fd = open(config_file) + except IOError: + pass # It's okay if it's not there. + else: + with fd: + try: + import toml + except ImportError: + raise errors.YapfError( + "toml package is needed for using pyproject.toml as a " + "configuration file") + + pyproject_toml = toml.load(config_file) + style_dict = pyproject_toml.get('tool', {}).get('yapf', None) + if style_dict is not None: + return config_file + if (not dirname or not os.path.basename(dirname) or dirname == os.path.abspath(os.path.sep)): break + dirname = os.path.dirname(dirname) global_file = os.path.expanduser(style.GLOBAL_STYLE) if os.path.exists(global_file): return global_file - return style.DEFAULT_STYLE + return default_style def GetCommandLineFiles(command_line_file_list, recursive, exclude): @@ -116,30 +203,44 @@ def _FindPythonFiles(filenames, recursive, exclude): """Find all Python files.""" if exclude and any(e.startswith('./') for e in exclude): raise errors.YapfError("path in '--exclude' should not start with ./") + exclude = exclude and [e.rstrip("/" + os.path.sep) for e in exclude] python_files = [] for filename in filenames: if filename != '.' and exclude and IsIgnored(filename, exclude): continue if os.path.isdir(filename): - if recursive: - # TODO(morbo): Look into a version of os.walk that can handle recursion. - excluded_dirs = [] - for dirpath, _, filelist in os.walk(filename): - if dirpath != '.' and exclude and IsIgnored(dirpath, exclude): - excluded_dirs.append(dirpath) - continue - elif any(dirpath.startswith(e) for e in excluded_dirs): - continue - for f in filelist: - filepath = os.path.join(dirpath, f) - if exclude and IsIgnored(filepath, exclude): - continue - if IsPythonFile(filepath): - python_files.append(filepath) - else: + if not recursive: raise errors.YapfError( "directory specified without '--recursive' flag: %s" % filename) + + # TODO(morbo): Look into a version of os.walk that can handle recursion. + excluded_dirs = [] + for dirpath, dirnames, filelist in os.walk(filename): + if dirpath != '.' and exclude and IsIgnored(dirpath, exclude): + excluded_dirs.append(dirpath) + continue + elif any(dirpath.startswith(e) for e in excluded_dirs): + continue + for f in filelist: + filepath = os.path.join(dirpath, f) + if exclude and IsIgnored(filepath, exclude): + continue + if IsPythonFile(filepath): + python_files.append(filepath) + # To prevent it from scanning the contents excluded folders, os.walk() + # lets you amend its list of child dirs `dirnames`. These edits must be + # made in-place instead of creating a modified copy of `dirnames`. + # list.remove() is slow and list.pop() is a headache. Instead clear + # `dirnames` then repopulate it. + dirnames_ = [dirnames.pop(0) for i in range(len(dirnames))] + for dirname in dirnames_: + dir_ = os.path.join(dirpath, dirname) + if IsIgnored(dir_, exclude): + excluded_dirs.append(dir_) + else: + dirnames.append(dirname) + elif os.path.isfile(filename): python_files.append(filename) @@ -148,10 +249,12 @@ def _FindPythonFiles(filenames, recursive, exclude): def IsIgnored(path, exclude): """Return True if filename matches any patterns in exclude.""" - path = path.lstrip('/') - while path.startswith('./'): + if exclude is None: + return False + path = path.lstrip(os.path.sep) + while path.startswith('.' + os.path.sep): path = path[2:] - return any(fnmatch.fnmatch(path, e.rstrip('/')) for e in exclude) + return any(fnmatch.fnmatch(path, e.rstrip(os.path.sep)) for e in exclude) def IsPythonFile(filename): diff --git a/yapf/yapflib/format_decision_state.py b/yapf/yapflib/format_decision_state.py index bff8ea3..74d0861 100644 --- a/yapf/yapflib/format_decision_state.py +++ b/yapf/yapflib/format_decision_state.py @@ -27,32 +27,34 @@ through the code to commit the whitespace formatting. """ from yapf.yapflib import format_token +from yapf.yapflib import logical_line from yapf.yapflib import object_state from yapf.yapflib import split_penalty from yapf.yapflib import style -from yapf.yapflib import unwrapped_line +from yapf.yapflib import subtypes class FormatDecisionState(object): - """The current state when indenting an unwrapped line. + """The current state when indenting a logical line. The FormatDecisionState object is meant to be copied instead of referenced. Attributes: first_indent: The indent of the first token. column: The number of used columns in the current line. + line: The logical line we're currently processing. next_token: The next token to be formatted. paren_level: The level of nesting inside (), [], and {}. lowest_level_on_line: The lowest paren_level on the current line. - newline: Indicates if a newline is added along the edge to this format - decision state node. - previous: The previous format decision state in the decision tree. stack: A stack (of _ParenState) keeping track of properties applying to parenthesis levels. comp_stack: A stack (of ComprehensionState) keeping track of properties applying to comprehensions. + param_list_stack: A stack (of ParameterListState) keeping track of + properties applying to function parameter lists. ignore_stack_for_comparison: Ignore the stack of _ParenState for state comparison. + column_limit: The column limit specified by the style. """ def __init__(self, line, first_indent): @@ -62,7 +64,7 @@ class FormatDecisionState(object): 'first_indent'. Arguments: - line: (UnwrappedLine) The unwrapped line we're currently processing. + line: (LogicalLine) The logical line we're currently processing. first_indent: (int) The indent of the first token. """ self.next_token = line.first @@ -73,9 +75,8 @@ class FormatDecisionState(object): self.ignore_stack_for_comparison = False self.stack = [_ParenState(first_indent, first_indent)] self.comp_stack = [] + self.param_list_stack = [] self.first_indent = first_indent - self.newline = False - self.previous = None self.column_limit = style.Get('COLUMN_LIMIT') def Clone(self): @@ -89,10 +90,9 @@ class FormatDecisionState(object): new.lowest_level_on_line = self.lowest_level_on_line new.ignore_stack_for_comparison = self.ignore_stack_for_comparison new.first_indent = self.first_indent - new.newline = self.newline - new.previous = self.previous new.stack = [state.Clone() for state in self.stack] new.comp_stack = [state.Clone() for state in self.comp_stack] + new.param_list_stack = [state.Clone() for state in self.param_list_stack] return new def __eq__(self, other): @@ -105,8 +105,9 @@ class FormatDecisionState(object): self.line.depth == other.line.depth and self.lowest_level_on_line == other.lowest_level_on_line and (self.ignore_stack_for_comparison or - other.ignore_stack_for_comparison or - self.stack == other.stack and self.comp_stack == other.comp_stack)) + other.ignore_stack_for_comparison or self.stack == other.stack and + self.comp_stack == other.comp_stack and + self.param_list_stack == other.param_list_stack)) def __ne__(self, other): return not self == other @@ -132,19 +133,17 @@ class FormatDecisionState(object): current = self.next_token previous = current.previous_token - if current.is_pseudo_paren: + if current.is_pseudo: return False - if (not must_split and - format_token.Subtype.DICTIONARY_KEY_PART in current.subtypes and - format_token.Subtype.DICTIONARY_KEY not in current.subtypes and + if (not must_split and subtypes.DICTIONARY_KEY_PART in current.subtypes and + subtypes.DICTIONARY_KEY not in current.subtypes and not style.Get('ALLOW_MULTILINE_DICTIONARY_KEYS')): # In some situations, a dictionary may be multiline, but pylint doesn't # like it. So don't allow it unless forced to. return False - if (not must_split and - format_token.Subtype.DICTIONARY_VALUE in current.subtypes and + if (not must_split and subtypes.DICTIONARY_VALUE in current.subtypes and not style.Get('ALLOW_SPLIT_BEFORE_DICT_VALUE')): return False @@ -157,7 +156,7 @@ class FormatDecisionState(object): if not prev or prev.name not in {'NAME', 'DOT'}: break token = token.previous_token - if token and format_token.Subtype.DICTIONARY_VALUE in token.subtypes: + if token and subtypes.DICTIONARY_VALUE in token.subtypes: if not style.Get('ALLOW_SPLIT_BEFORE_DICT_VALUE'): return False @@ -171,7 +170,7 @@ class FormatDecisionState(object): current = self.next_token previous = current.previous_token - if current.is_pseudo_paren: + if current.is_pseudo: return False if current.must_break_before: @@ -183,10 +182,32 @@ class FormatDecisionState(object): if style.Get('SPLIT_ALL_COMMA_SEPARATED_VALUES') and previous.value == ',': return True + if (style.Get('SPLIT_ALL_TOP_LEVEL_COMMA_SEPARATED_VALUES') and + previous.value == ','): + # Avoid breaking in a container that fits in the current line if possible + opening = _GetOpeningBracket(current) + + # Can't find opening bracket, behave the same way as + # SPLIT_ALL_COMMA_SEPARATED_VALUES. + if not opening: + return True + + if current.is_comment: + # Don't require splitting before a comment, since it may be related to + # the current line. + return False + + # Allow the fallthrough code to handle the closing bracket. + if current != opening.matching_bracket: + # If the container doesn't fit in the current line, must split + return not self._ContainerFitsOnStartLine(opening) + if (self.stack[-1].split_before_closing_bracket and - current.value in '}]' and style.Get('SPLIT_BEFORE_CLOSING_BRACKET')): + (current.value in '}]' and style.Get('SPLIT_BEFORE_CLOSING_BRACKET') or + current.value in '}])' and style.Get('INDENT_CLOSING_BRACKETS'))): # Split before the closing bracket if we can. - return current.node_split_penalty != split_penalty.UNBREAKABLE + if subtypes.SUBSCRIPT_BRACKET not in current.subtypes: + return current.node_split_penalty != split_penalty.UNBREAKABLE if (current.value == ')' and previous.value == ',' and not _IsSingleElementTuple(current.matching_bracket)): @@ -202,9 +223,10 @@ class FormatDecisionState(object): ########################################################################### # List Splitting if (style.Get('DEDENT_CLOSING_BRACKETS') or + style.Get('INDENT_CLOSING_BRACKETS') or style.Get('SPLIT_BEFORE_FIRST_ARGUMENT')): bracket = current if current.ClosesScope() else previous - if format_token.Subtype.SUBSCRIPT_BRACKET not in bracket.subtypes: + if subtypes.SUBSCRIPT_BRACKET not in bracket.subtypes: if bracket.OpensScope(): if style.Get('COALESCE_BRACKETS'): if current.OpensScope(): @@ -212,7 +234,7 @@ class FormatDecisionState(object): return False if (not _IsLastScopeInLine(bracket) or - unwrapped_line.IsSurroundedByBrackets(bracket)): + logical_line.IsSurroundedByBrackets(bracket)): last_token = bracket.matching_bracket else: last_token = _LastTokenInLine(bracket.matching_bracket) @@ -223,7 +245,8 @@ class FormatDecisionState(object): self.stack[-1].split_before_closing_bracket = True return True - elif style.Get('DEDENT_CLOSING_BRACKETS') and current.ClosesScope(): + elif (style.Get('DEDENT_CLOSING_BRACKETS') or + style.Get('INDENT_CLOSING_BRACKETS')) and current.ClosesScope(): # Split before and dedent the closing bracket. return self.stack[-1].split_before_closing_bracket @@ -244,8 +267,8 @@ class FormatDecisionState(object): token = token.next_token return False - if (previous.value == '(' and not previous.is_pseudo_paren and - not unwrapped_line.IsSurroundedByBrackets(previous)): + if (previous.value == '(' and not previous.is_pseudo and + not logical_line.IsSurroundedByBrackets(previous)): pptoken = previous.previous_token if (pptoken and not pptoken.is_name and not pptoken.is_keyword and SurroundedByParens(current)): @@ -277,7 +300,7 @@ class FormatDecisionState(object): tok = tok.next_token func_call_or_string_format = tok and tok.value == '%' if func_call_or_string_format: - open_bracket = unwrapped_line.IsSurroundedByBrackets(current) + open_bracket = logical_line.IsSurroundedByBrackets(current) if open_bracket: if open_bracket.value in '[{': if not self._FitsOnLine(open_bracket, @@ -287,11 +310,20 @@ class FormatDecisionState(object): if not self._FitsOnLine(current, tok.matching_bracket): return True + if (current.OpensScope() and previous.value == ',' and + subtypes.DICTIONARY_KEY not in current.next_token.subtypes): + # If we have a list of tuples, then we can get a similar look as above. If + # the full list cannot fit on the line, then we want a split. + open_bracket = logical_line.IsSurroundedByBrackets(current) + if (open_bracket and open_bracket.value in '[{' and + subtypes.SUBSCRIPT_BRACKET not in open_bracket.subtypes): + if not self._FitsOnLine(current, current.matching_bracket): + return True + ########################################################################### # Dict/Set Splitting if (style.Get('EACH_DICT_ENTRY_ON_SEPARATE_LINE') and - format_token.Subtype.DICTIONARY_KEY in current.subtypes and - not current.is_comment): + subtypes.DICTIONARY_KEY in current.subtypes and not current.is_comment): # Place each dictionary entry onto its own line. if previous.value == '{' and previous.previous_token: opening = _GetOpeningBracket(previous.previous_token) @@ -311,12 +343,12 @@ class FormatDecisionState(object): return True if (style.Get('SPLIT_BEFORE_DICT_SET_GENERATOR') and - format_token.Subtype.DICT_SET_GENERATOR in current.subtypes): + subtypes.DICT_SET_GENERATOR in current.subtypes): # Split before a dict/set generator. return True - if (format_token.Subtype.DICTIONARY_VALUE in current.subtypes or - (previous.is_pseudo_paren and previous.value == '(' and + if (subtypes.DICTIONARY_VALUE in current.subtypes or + (previous.is_pseudo and previous.value == '(' and not current.is_comment)): # Split before the dictionary value if we can't fit every dictionary # entry on its own line. @@ -336,8 +368,7 @@ class FormatDecisionState(object): ########################################################################### # Argument List Splitting if (style.Get('SPLIT_BEFORE_NAMED_ASSIGNS') and not current.is_comment and - format_token.Subtype.DEFAULT_OR_NAMED_ASSIGN_ARG_LIST in - current.subtypes): + subtypes.DEFAULT_OR_NAMED_ASSIGN_ARG_LIST in current.subtypes): if (previous.value not in {'=', ':', '*', '**'} and current.value not in ':=,)' and not _IsFunctionDefinition(previous)): # If we're going to split the lines because of named arguments, then we @@ -351,20 +382,22 @@ class FormatDecisionState(object): # b=1, # c=2) if (self._FitsOnLine(previous, previous.matching_bracket) and - unwrapped_line.IsSurroundedByBrackets(previous)): + logical_line.IsSurroundedByBrackets(previous)): # An argument to a function is a function call with named # assigns. return False + # Don't split if not required + if (not style.Get('SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN') and + not style.Get('SPLIT_BEFORE_FIRST_ARGUMENT')): + return False + column = self.column - self.stack[-1].last_space return column > style.Get('CONTINUATION_INDENT_WIDTH') opening = _GetOpeningBracket(current) if opening: - arglist_length = ( - opening.matching_bracket.total_length - opening.total_length + - self.stack[-1].indent) - return arglist_length > self.column_limit + return not self._ContainerFitsOnStartLine(opening) if (current.value not in '{)' and previous.value == '(' and self._ArgumentListHasDictionaryEntry(current)): @@ -407,14 +440,34 @@ class FormatDecisionState(object): return True pprevious = previous.previous_token + + # A function call with a dictionary as its first argument may result in + # unreadable formatting if the dictionary spans multiple lines. The + # dictionary itself is formatted just fine, but the remaining arguments are + # indented too far: + # + # function_call({ + # KEY_1: 'value one', + # KEY_2: 'value two', + # }, + # default=False) + if (current.value == '{' and previous.value == '(' and pprevious and + pprevious.is_name): + dict_end = current.matching_bracket + next_token = dict_end.next_token + if next_token.value == ',' and not self._FitsOnLine(current, dict_end): + return True + if (current.is_name and pprevious and pprevious.is_name and previous.value == '('): + if (not self._FitsOnLine(previous, previous.matching_bracket) and _IsFunctionCallWithArguments(current)): # There is a function call, with more than 1 argument, where the first - # argument is itself a function call with arguments. In this specific - # case, if we split after the first argument's opening '(', then the - # formatting will look bad for the rest of the arguments. E.g.: + # argument is itself a function call with arguments that does not fit + # into the line. In this specific case, if we split after the first + # argument's opening '(', then the formatting will look bad for the + # rest of the arguments. E.g.: # # outer_function_call(inner_function_call( # inner_arg1, inner_arg2), @@ -422,11 +475,35 @@ class FormatDecisionState(object): # # Instead, enforce a split before that argument to keep things looking # good. - return True + if (style.Get('SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN') or + style.Get('SPLIT_BEFORE_FIRST_ARGUMENT')): + return True + + opening = _GetOpeningBracket(current) + if (opening and opening.value == '(' and opening.previous_token and + (opening.previous_token.is_name or + opening.previous_token.value in {'*', '**'})): + is_func_call = False + opening = current + while opening: + if opening.value == '(': + is_func_call = True + break + if (not (opening.is_name or opening.value in {'*', '**'}) and + opening.value != '.'): + break + opening = opening.next_token + + if is_func_call: + if (not self._FitsOnLine(current, opening.matching_bracket) or + (opening.matching_bracket.next_token and + opening.matching_bracket.next_token.value != ',' and + not opening.matching_bracket.next_token.ClosesScope())): + return True if (previous.OpensScope() and not current.OpensScope() and not current.is_comment and - format_token.Subtype.SUBSCRIPT_BRACKET not in previous.subtypes): + subtypes.SUBSCRIPT_BRACKET not in previous.subtypes): if pprevious and not pprevious.is_keyword and not pprevious.is_name: # We want to split if there's a comment in the container. token = current @@ -474,7 +551,7 @@ class FormatDecisionState(object): if (current.is_comment and previous.lineno < current.lineno - current.value.count('\n')): - # If a comment comes in the middle of an unwrapped line (like an if + # If a comment comes in the middle of a logical line (like an if # conditional with comments interspersed), then we want to split if the # original comments were on a separate line. return True @@ -496,6 +573,8 @@ class FormatDecisionState(object): Returns: The penalty of splitting after the current token. """ + self._PushParameterListState(newline) + penalty = 0 if newline: penalty = self._AddTokenOnNewline(dry_run, must_split) @@ -503,6 +582,7 @@ class FormatDecisionState(object): self._AddTokenOnCurrentLine(dry_run) penalty += self._CalculateComprehensionState(newline) + penalty += self._CalculateParameterListState(newline) return self.MoveStateToNextToken() + penalty @@ -519,6 +599,11 @@ class FormatDecisionState(object): previous = current.previous_token spaces = current.spaces_required_before + if isinstance(spaces, list): + # Don't set the value here, as we need to look at the lines near + # this one to determine the actual horizontal alignment value. + spaces = 0 + if not dry_run: current.AddWhitespacePrefix(newlines_before=0, spaces=spaces) @@ -573,9 +658,10 @@ class FormatDecisionState(object): if (previous.OpensScope() or (previous.is_comment and previous.previous_token is not None and previous.previous_token.OpensScope())): - self.stack[-1].closing_scope_indent = max( - 0, self.stack[-1].indent - style.Get('CONTINUATION_INDENT_WIDTH')) - + dedent = (style.Get('CONTINUATION_INDENT_WIDTH'), + 0)[style.Get('INDENT_CLOSING_BRACKETS')] + self.stack[-1].closing_scope_indent = ( + max(0, self.stack[-1].indent - dedent)) self.stack[-1].split_before_closing_bracket = True # Calculate the split penalty. @@ -585,7 +671,7 @@ class FormatDecisionState(object): # Don't penalize for a must split. return penalty - if previous.is_pseudo_paren and previous.value == '(': + if previous.is_pseudo and previous.value == '(': # Small penalty for splitting after a pseudo paren. penalty += 50 @@ -634,7 +720,7 @@ class FormatDecisionState(object): # If we encounter a closing bracket, we can remove a level from our # parenthesis stack. if len(self.stack) > 1 and current.ClosesScope(): - if format_token.Subtype.DICTIONARY_KEY_PART in current.subtypes: + if subtypes.DICTIONARY_KEY_PART in current.subtypes: self.stack[-2].last_space = self.stack[-2].indent else: self.stack[-2].last_space = self.stack[-1].last_space @@ -645,7 +731,7 @@ class FormatDecisionState(object): if is_multiline_string: # This is a multiline string. Only look at the first line. self.column += len(current.value.split('\n')[0]) - elif not current.is_pseudo_paren: + elif not current.is_pseudo: self.column += len(current.value) self.next_token = self.next_token.next_token @@ -653,7 +739,7 @@ class FormatDecisionState(object): # Calculate the penalty for overflowing the column limit. penalty = 0 if (not current.is_pylint_comment and not current.is_pytype_comment and - self.column > self.column_limit): + not current.is_copybara_comment and self.column > self.column_limit): excess_characters = self.column - self.column_limit penalty += style.Get('SPLIT_PENALTY_EXCESS_CHARACTER') * excess_characters @@ -692,13 +778,12 @@ class FormatDecisionState(object): if newline: top_of_stack.has_interior_split = True - if (format_token.Subtype.COMP_EXPR in current.subtypes and - format_token.Subtype.COMP_EXPR not in previous.subtypes): + if (subtypes.COMP_EXPR in current.subtypes and + subtypes.COMP_EXPR not in previous.subtypes): self.comp_stack.append(object_state.ComprehensionState(current)) return penalty - if (current.value == 'for' and - format_token.Subtype.COMP_FOR in current.subtypes): + if current.value == 'for' and subtypes.COMP_FOR in current.subtypes: if top_of_stack.for_token is not None: # Treat nested comprehensions like normal comp_if expressions. # Example: @@ -722,8 +807,8 @@ class FormatDecisionState(object): top_of_stack.HasTrivialExpr()): penalty += split_penalty.CONNECTED - if (format_token.Subtype.COMP_IF in current.subtypes and - format_token.Subtype.COMP_IF not in previous.subtypes): + if (subtypes.COMP_IF in current.subtypes and + subtypes.COMP_IF not in previous.subtypes): # Penalize breaking at comp_if when it doesn't match the newline structure # in the rest of the comprehension. if (style.Get('SPLIT_COMPLEX_COMPREHENSION') and @@ -733,17 +818,135 @@ class FormatDecisionState(object): return penalty + def _PushParameterListState(self, newline): + """Push a new parameter list state for a function definition. + + Args: + newline: Whether the current token is to be added on a newline. + """ + current = self.next_token + previous = current.previous_token + + if _IsFunctionDefinition(previous): + first_param_column = previous.total_length + self.stack[-2].indent + self.param_list_stack.append( + object_state.ParameterListState(previous, newline, + first_param_column)) + + def _CalculateParameterListState(self, newline): + """Makes required changes to parameter list state. + + Args: + newline: Whether the current token is to be added on a newline. + + Returns: + The penalty for the token-newline combination given the current + parameter state. + """ + current = self.next_token + previous = current.previous_token + penalty = 0 + + if _IsFunctionDefinition(previous): + first_param_column = previous.total_length + self.stack[-2].indent + if not newline: + param_list = self.param_list_stack[-1] + if param_list.parameters and param_list.has_typed_return: + last_param = param_list.parameters[-1].first_token + last_token = _LastTokenInLine(previous.matching_bracket) + total_length = last_token.total_length + total_length -= last_param.total_length - len(last_param.value) + if total_length + self.column > self.column_limit: + # If we need to split before the trailing code of a function + # definition with return types, then also split before the opening + # parameter so that the trailing bit isn't indented on a line by + # itself: + # + # def rrrrrrrrrrrrrrrrrrrrrr(ccccccccccccccccccccccc: Tuple[Text] + # ) -> List[Tuple[Text, Text]]: + # pass + penalty += split_penalty.VERY_STRONGLY_CONNECTED + return penalty + + if first_param_column <= self.column: + # Make sure we don't split after the opening bracket if the + # continuation indent is greater than the opening bracket: + # + # a( + # b=1, + # c=2) + penalty += split_penalty.VERY_STRONGLY_CONNECTED + return penalty + + if not self.param_list_stack: + return penalty + + param_list = self.param_list_stack[-1] + if current == self.param_list_stack[-1].closing_bracket: + self.param_list_stack.pop() # We're done with this state. + if newline and param_list.has_typed_return: + if param_list.split_before_closing_bracket: + penalty -= split_penalty.STRONGLY_CONNECTED + elif param_list.LastParamFitsOnLine(self.column): + penalty += split_penalty.STRONGLY_CONNECTED + + if (not newline and param_list.has_typed_return and + param_list.has_split_before_first_param): + # Prefer splitting before the closing bracket if there's a return type + # and we've already split before the first parameter. + penalty += split_penalty.STRONGLY_CONNECTED + + return penalty + + if not param_list.parameters: + return penalty + + if newline: + if self._FitsOnLine(param_list.parameters[0].first_token, + _LastTokenInLine(param_list.closing_bracket)): + penalty += split_penalty.STRONGLY_CONNECTED + + if (not newline and style.Get('SPLIT_BEFORE_NAMED_ASSIGNS') and + param_list.has_default_values and + current != param_list.parameters[0].first_token and + current != param_list.closing_bracket and + subtypes.PARAMETER_START in current.subtypes): + # If we want to split before parameters when there are named assigns, + # then add a penalty for not splitting. + penalty += split_penalty.STRONGLY_CONNECTED + + return penalty + + def _IndentWithContinuationAlignStyle(self, column): + if column == 0: + return column + align_style = style.Get('CONTINUATION_ALIGN_STYLE') + if align_style == 'FIXED': + return ((self.line.depth * style.Get('INDENT_WIDTH')) + + style.Get('CONTINUATION_INDENT_WIDTH')) + if align_style == 'VALIGN-RIGHT': + indent_width = style.Get('INDENT_WIDTH') + return indent_width * int((column + indent_width - 1) / indent_width) + return column + def _GetNewlineColumn(self): """Return the new column on the newline.""" current = self.next_token previous = current.previous_token top_of_stack = self.stack[-1] - if current.spaces_required_before > 2 or self.line.disable: + if isinstance(current.spaces_required_before, list): + # Don't set the value here, as we need to look at the lines near + # this one to determine the actual horizontal alignment value. + return 0 + elif current.spaces_required_before > 2 or self.line.disable: return current.spaces_required_before + cont_aligned_indent = self._IndentWithContinuationAlignStyle( + top_of_stack.indent) + if current.OpensScope(): - return top_of_stack.indent if self.paren_level else self.first_indent + return cont_aligned_indent if self.paren_level else self.first_indent if current.ClosesScope(): if (previous.OpensScope() or @@ -754,29 +957,39 @@ class FormatDecisionState(object): return top_of_stack.closing_scope_indent if (previous and previous.is_string and current.is_string and - format_token.Subtype.DICTIONARY_VALUE in current.subtypes): + subtypes.DICTIONARY_VALUE in current.subtypes): return previous.column if style.Get('INDENT_DICTIONARY_VALUE'): - if previous and (previous.value == ':' or previous.is_pseudo_paren): - if format_token.Subtype.DICTIONARY_VALUE in current.subtypes: + if previous and (previous.value == ':' or previous.is_pseudo): + if subtypes.DICTIONARY_VALUE in current.subtypes: return top_of_stack.indent - if (_IsCompoundStatement(self.line.first) and - (not style.Get('DEDENT_CLOSING_BRACKETS') or + if (not self.param_list_stack and _IsCompoundStatement(self.line.first) and + (not (style.Get('DEDENT_CLOSING_BRACKETS') or + style.Get('INDENT_CLOSING_BRACKETS')) or style.Get('SPLIT_BEFORE_FIRST_ARGUMENT'))): token_indent = ( len(self.line.first.whitespace_prefix.split('\n')[-1]) + style.Get('INDENT_WIDTH')) if token_indent == top_of_stack.indent: + return token_indent + style.Get('CONTINUATION_INDENT_WIDTH') + + if (self.param_list_stack and + not self.param_list_stack[-1].SplitBeforeClosingBracket( + top_of_stack.indent) and top_of_stack.indent + == ((self.line.depth + 1) * style.Get('INDENT_WIDTH'))): + if (subtypes.PARAMETER_START in current.subtypes or + (previous.is_comment and + subtypes.PARAMETER_START in previous.subtypes)): return top_of_stack.indent + style.Get('CONTINUATION_INDENT_WIDTH') - return top_of_stack.indent + return cont_aligned_indent def _FitsOnLine(self, start, end): """Determines if line between start and end can fit on the current line.""" length = end.total_length - start.total_length - if not start.is_pseudo_paren: + if not start.is_pseudo: length += len(start.value) return length + self.column <= self.column_limit @@ -791,29 +1004,48 @@ class FormatDecisionState(object): def ImplicitStringConcatenation(tok): num_strings = 0 - if tok.is_pseudo_paren: + if tok.is_pseudo: tok = tok.next_token while tok.is_string: num_strings += 1 tok = tok.next_token return num_strings > 1 + def DictValueIsContainer(opening, closing): + """Return true if the dictionary value is a container.""" + if not opening or not closing: + return False + colon = opening.previous_token + while colon: + if not colon.is_pseudo: + break + colon = colon.previous_token + if not colon or colon.value != ':': + return False + key = colon.previous_token + if not key: + return False + return subtypes.DICTIONARY_KEY_PART in key.subtypes + closing = opening.matching_bracket entry_start = opening.next_token current = opening.next_token.next_token while current and current != closing: - if format_token.Subtype.DICTIONARY_KEY in current.subtypes: + if subtypes.DICTIONARY_KEY in current.subtypes: prev = PreviousNonCommentToken(current) - length = prev.total_length - entry_start.total_length - length += len(entry_start.value) - if length + self.stack[-2].indent >= self.column_limit: - return False + if prev.value == ',': + prev = PreviousNonCommentToken(prev.previous_token) + if not DictValueIsContainer(prev.matching_bracket, prev): + length = prev.total_length - entry_start.total_length + length += len(entry_start.value) + if length + self.stack[-2].indent >= self.column_limit: + return False entry_start = current if current.OpensScope(): if ((current.value == '{' or - (current.is_pseudo_paren and current.next_token.value == '{') and - format_token.Subtype.DICTIONARY_VALUE in current.subtypes) or + (current.is_pseudo and current.next_token.value == '{') and + subtypes.DICTIONARY_VALUE in current.subtypes) or ImplicitStringConcatenation(current)): # A dictionary entry that cannot fit on a single line shouldn't matter # to this calculation. If it can't fit on a single line, then the @@ -825,7 +1057,7 @@ class FormatDecisionState(object): while current: if current == closing: return True - if format_token.Subtype.DICTIONARY_KEY in current.subtypes: + if subtypes.DICTIONARY_KEY in current.subtypes: entry_start = current break current = current.next_token @@ -834,8 +1066,8 @@ class FormatDecisionState(object): else: current = current.next_token - # At this point, current is the closing bracket. Go back one to get the the - # end of the dictionary entry. + # At this point, current is the closing bracket. Go back one to get the end + # of the dictionary entry. current = PreviousNonCommentToken(current) length = current.total_length - entry_start.total_length length += len(entry_start.value) @@ -855,6 +1087,11 @@ class FormatDecisionState(object): token = token.next_token return False + def _ContainerFitsOnStartLine(self, opening): + """Check if the container can fit on its starting line.""" + return (opening.matching_bracket.total_length - opening.total_length + + self.stack[-1].indent) <= self.column_limit + _COMPOUND_STMTS = frozenset( {'for', 'while', 'if', 'elif', 'with', 'except', 'def', 'class'}) @@ -884,29 +1121,22 @@ def _IsFunctionCallWithArguments(token): def _IsArgumentToFunction(token): - bracket = unwrapped_line.IsSurroundedByBrackets(token) + bracket = logical_line.IsSurroundedByBrackets(token) if not bracket or bracket.value != '(': return False previous = bracket.previous_token return previous and previous.is_name -def _GetLengthOfSubtype(token, subtype, exclude=None): - current = token - while (current.next_token and subtype in current.subtypes and - (exclude is None or exclude not in current.subtypes)): - current = current.next_token - return current.total_length - token.total_length + 1 - - def _GetOpeningBracket(current): """Get the opening bracket containing the current token.""" - if current.matching_bracket and not current.is_pseudo_paren: - return current.matching_bracket + if current.matching_bracket and not current.is_pseudo: + return current if current.OpensScope() else current.matching_bracket + while current: if current.ClosesScope(): current = current.matching_bracket - elif current.is_pseudo_paren: + elif current.is_pseudo: current = current.previous_token elif current.OpensScope(): return current @@ -922,11 +1152,11 @@ def _LastTokenInLine(current): def _IsFunctionDefinition(current): prev = current.previous_token - return (current.value == '(' and prev and - format_token.Subtype.FUNC_DEF in prev.subtypes) + return current.value == '(' and prev and subtypes.FUNC_DEF in prev.subtypes def _IsLastScopeInLine(current): + current = current.matching_bracket while current: current = current.next_token if current and current.OpensScope(): @@ -942,10 +1172,7 @@ def _IsSingleElementTuple(token): while token != close: if token.value == ',': num_commas += 1 - if token.OpensScope(): - token = token.matching_bracket - else: - token = token.next_token + token = token.matching_bracket if token.OpensScope() else token.next_token return num_commas == 1 @@ -956,10 +1183,7 @@ def _ScopeHasNoCommas(token): while token != close: if token.value == ',': return False - if token.OpensScope(): - token = token.matching_bracket - else: - token = token.next_token + token = token.matching_bracket if token.OpensScope() else token.next_token return True @@ -973,6 +1197,7 @@ class _ParenState(object): indent: The column position to which a specified parenthesis level needs to be indented. last_space: The column position of the last space on each level. + closing_scope_indent: The column position of the closing indentation. split_before_closing_bracket: Whether a newline needs to be inserted before the closing bracket. We only want to insert a newline before the closing bracket if there also was a newline after the beginning left bracket. diff --git a/yapf/yapflib/format_token.py b/yapf/yapflib/format_token.py index 79dced4..487f3a9 100644 --- a/yapf/yapflib/format_token.py +++ b/yapf/yapflib/format_token.py @@ -24,58 +24,26 @@ from lib2to3.pgen2 import token from yapf.yapflib import py3compat from yapf.yapflib import pytree_utils from yapf.yapflib import style +from yapf.yapflib import subtypes CONTINUATION = token.N_TOKENS -class Subtype(object): - """Subtype information about tokens. - - Gleaned from parsing the code. Helps determine the best formatting. - """ - NONE = 0 - UNARY_OPERATOR = 1 - BINARY_OPERATOR = 2 - SUBSCRIPT_COLON = 3 - SUBSCRIPT_BRACKET = 4 - DEFAULT_OR_NAMED_ASSIGN = 5 - DEFAULT_OR_NAMED_ASSIGN_ARG_LIST = 6 - VARARGS_LIST = 7 - VARARGS_STAR = 8 - KWARGS_STAR_STAR = 9 - ASSIGN_OPERATOR = 10 - DICTIONARY_KEY = 11 - DICTIONARY_KEY_PART = 12 - DICTIONARY_VALUE = 13 - DICT_SET_GENERATOR = 14 - COMP_EXPR = 21 - COMP_FOR = 15 - COMP_IF = 16 - FUNC_DEF = 17 - DECORATOR = 18 - TYPED_NAME = 19 - TYPED_NAME_ARG_LIST = 20 - - -def _TabbedContinuationAlignPadding(spaces, align_style, tab_width, - continuation_indent_width): +def _TabbedContinuationAlignPadding(spaces, align_style, tab_width): """Build padding string for continuation alignment in tabbed indentation. Arguments: spaces: (int) The number of spaces to place before the token for alignment. align_style: (str) The alignment style for continuation lines. tab_width: (int) Number of columns of each tab character. - continuation_indent_width: (int) Indent columns for line continuations. Returns: A padding string for alignment with style specified by align_style option. """ - if align_style == 'FIXED': + if align_style in ('FIXED', 'VALIGN-RIGHT'): if spaces > 0: - return '\t' * int(continuation_indent_width / tab_width) + return '\t' * int((spaces + tab_width - 1) / tab_width) return '' - elif align_style == 'VALIGN-RIGHT': - return '\t' * int((spaces + tab_width - 1) / tab_width) return ' ' * spaces @@ -86,12 +54,15 @@ class FormatToken(object): the code. Attributes: - next_token: The token in the unwrapped line after this token or None if this - is the last token in the unwrapped line. - previous_token: The token in the unwrapped line before this token or None if - this is the first token in the unwrapped line. + node: The PyTree node this token represents. + next_token: The token in the logical line after this token or None if this + is the last token in the logical line. + previous_token: The token in the logical line before this token or None if + this is the first token in the logical line. matching_bracket: If a bracket token ('[', '{', or '(') the matching bracket. + parameters: If this and its following tokens make up a parameter list, then + this is a list of those parameters. container_opening: If the object is in a container, this points to its opening bracket. container_elements: If this is the start of a container, a list of the @@ -103,12 +74,13 @@ class FormatToken(object): formatter won't place n spaces before all comments. Only those that are moved to the end of a line of code. The formatter may use different spacing when appropriate. - can_break_before: True if we're allowed to break before this token. - must_break_before: True if we're required to break before this token. - total_length: The total length of the unwrapped line up to and including + total_length: The total length of the logical line up to and including whitespace and this token. However, this doesn't include the initial indentation amount. split_penalty: The penalty for splitting the line before this token. + can_break_before: True if we're allowed to break before this token. + must_break_before: True if we're required to break before this token. + newlines: The number of newlines needed before this token. """ def __init__(self, node): @@ -121,23 +93,44 @@ class FormatToken(object): self.next_token = None self.previous_token = None self.matching_bracket = None + self.parameters = [] self.container_opening = None self.container_elements = [] self.whitespace_prefix = '' - self.can_break_before = False - self.must_break_before = False - self.total_length = 0 # TODO(morbo): Think up a better name. + self.total_length = 0 self.split_penalty = 0 + self.can_break_before = False + self.must_break_before = pytree_utils.GetNodeAnnotation( + node, pytree_utils.Annotation.MUST_SPLIT, default=False) + self.newlines = pytree_utils.GetNodeAnnotation( + node, pytree_utils.Annotation.NEWLINES) + self.type = node.type + self.column = node.column + self.lineno = node.lineno + self.name = pytree_utils.NodeName(node) + + self.spaces_required_before = 0 if self.is_comment: self.spaces_required_before = style.Get('SPACES_BEFORE_COMMENT') - else: - self.spaces_required_before = 0 + self.value = node.value if self.is_continuation: - self.value = self.node.value.rstrip() - else: - self.value = self.node.value + self.value = node.value.rstrip() + + stypes = pytree_utils.GetNodeAnnotation(node, + pytree_utils.Annotation.SUBTYPE) + self.subtypes = {subtypes.NONE} if not stypes else stypes + self.is_pseudo = hasattr(node, 'is_pseudo') and node.is_pseudo + + @property + def formatted_whitespace_prefix(self): + if style.Get('INDENT_BLANK_LINES'): + without_newlines = self.whitespace_prefix.lstrip('\n') + height = len(self.whitespace_prefix) - len(without_newlines) + if height: + return ('\n' + without_newlines) * height + return self.whitespace_prefix def AddWhitespacePrefix(self, newlines_before, spaces=0, indent_level=0): """Register a token's whitespace prefix. @@ -153,30 +146,30 @@ class FormatToken(object): if newlines_before > 0: indent_before = '\t' * indent_level + _TabbedContinuationAlignPadding( spaces, style.Get('CONTINUATION_ALIGN_STYLE'), - style.Get('INDENT_WIDTH'), style.Get('CONTINUATION_INDENT_WIDTH')) + style.Get('INDENT_WIDTH')) else: indent_before = '\t' * indent_level + ' ' * spaces else: - indent_before = ( - ' ' * indent_level * style.Get('INDENT_WIDTH') + ' ' * spaces) + indent_before = (' ' * indent_level * style.Get('INDENT_WIDTH') + + ' ' * spaces) if self.is_comment: comment_lines = [s.lstrip() for s in self.value.splitlines()] - self.node.value = ('\n' + indent_before).join(comment_lines) + self.value = ('\n' + indent_before).join(comment_lines) # Update our own value since we are changing node value - self.value = self.node.value + self.value = self.value if not self.whitespace_prefix: - self.whitespace_prefix = ( - '\n' * (self.newlines or newlines_before) + indent_before) + self.whitespace_prefix = ('\n' * (self.newlines or newlines_before) + + indent_before) else: self.whitespace_prefix += indent_before def AdjustNewlinesBefore(self, newlines_before): """Change the number of newlines before this token.""" - self.whitespace_prefix = ( - '\n' * newlines_before + self.whitespace_prefix.lstrip('\n')) + self.whitespace_prefix = ('\n' * newlines_before + + self.whitespace_prefix.lstrip('\n')) def RetainHorizontalSpacing(self, first_column, depth): """Retains a token's horizontal spacing.""" @@ -184,7 +177,7 @@ class FormatToken(object): if not previous: return - if previous.is_pseudo_paren: + if previous.is_pseudo: previous = previous.previous_token if not previous: return @@ -195,17 +188,17 @@ class FormatToken(object): prev_lineno += previous.value.count('\n') if (cur_lineno != prev_lineno or - (previous.is_pseudo_paren and previous.value != ')' and + (previous.is_pseudo and previous.value != ')' and cur_lineno != previous.previous_token.lineno)): self.spaces_required_before = ( self.column - first_column + depth * style.Get('INDENT_WIDTH')) return - cur_column = self.node.column + cur_column = self.column prev_column = previous.node.column prev_len = len(previous.value) - if previous.is_pseudo_paren and previous.value == ')': + if previous.is_pseudo and previous.value == ')': prev_column -= 1 prev_len = 0 @@ -222,67 +215,65 @@ class FormatToken(object): def ClosesScope(self): return self.value in pytree_utils.CLOSING_BRACKETS + def AddSubtype(self, subtype): + self.subtypes.add(subtype) + def __repr__(self): - msg = 'FormatToken(name={0}, value={1}'.format(self.name, self.value) - msg += ', pseudo)' if self.is_pseudo_paren else ')' + msg = ('FormatToken(name={0}, value={1}, column={2}, lineno={3}, ' + 'splitpenalty={4}'.format( + 'DOCSTRING' if self.is_docstring else self.name, self.value, + self.column, self.lineno, self.split_penalty)) + msg += ', pseudo)' if self.is_pseudo else ')' return msg @property - @py3compat.lru_cache() def node_split_penalty(self): """Split penalty attached to the pytree node of this token.""" return pytree_utils.GetNodeAnnotation( self.node, pytree_utils.Annotation.SPLIT_PENALTY, default=0) @property - def newlines(self): - """The number of newlines needed before this token.""" - return pytree_utils.GetNodeAnnotation(self.node, - pytree_utils.Annotation.NEWLINES) - - @property - def must_split(self): - """Return true if the token requires a split before it.""" - return pytree_utils.GetNodeAnnotation(self.node, - pytree_utils.Annotation.MUST_SPLIT) - - @property - def column(self): - """The original column number of the node in the source.""" - return self.node.column - - @property - def lineno(self): - """The original line number of the node in the source.""" - return self.node.lineno + def is_binary_op(self): + """Token is a binary operator.""" + return subtypes.BINARY_OPERATOR in self.subtypes @property @py3compat.lru_cache() - def subtypes(self): - """Extra type information for directing formatting.""" - value = pytree_utils.GetNodeAnnotation(self.node, - pytree_utils.Annotation.SUBTYPE) - return [Subtype.NONE] if value is None else value + def is_arithmetic_op(self): + """Token is an arithmetic operator.""" + return self.value in frozenset({ + '+', # Add + '-', # Subtract + '*', # Multiply + '@', # Matrix Multiply + '/', # Divide + '//', # Floor Divide + '%', # Modulo + '<<', # Left Shift + '>>', # Right Shift + '|', # Bitwise Or + '&', # Bitwise Add + '^', # Bitwise Xor + '**', # Power + }) @property - @py3compat.lru_cache() - def is_binary_op(self): - """Token is a binary operator.""" - return Subtype.BINARY_OPERATOR in self.subtypes + def is_simple_expr(self): + """Token is an operator in a simple expression.""" + return subtypes.SIMPLE_EXPRESSION in self.subtypes @property - @py3compat.lru_cache() - def name(self): - """A string representation of the node's name.""" - return pytree_utils.NodeName(self.node) + def is_subscript_colon(self): + """Token is a subscript colon.""" + return subtypes.SUBSCRIPT_COLON in self.subtypes @property def is_comment(self): - return self.node.type == token.COMMENT + return self.type == token.COMMENT @property def is_continuation(self): - return self.node.type == CONTINUATION + return self.type == CONTINUATION @property @py3compat.lru_cache() @@ -290,43 +281,31 @@ class FormatToken(object): return keyword.iskeyword(self.value) @property - @py3compat.lru_cache() def is_name(self): - return self.node.type == token.NAME and not self.is_keyword + return self.type == token.NAME and not self.is_keyword @property def is_number(self): - return self.node.type == token.NUMBER + return self.type == token.NUMBER @property def is_string(self): - return self.node.type == token.STRING + return self.type == token.STRING @property - @py3compat.lru_cache() def is_multiline_string(self): - """A multiline string.""" - if py3compat.PY3: - prefix = '(' - prefix += 'r|u|R|U|f|F|fr|Fr|fR|FR|rf|rF|Rf|RF' # strings - prefix += '|b|B|br|Br|bR|BR|rb|rB|Rb|RB' # bytes - prefix += ')?' - else: - prefix = '[uUbB]?[rR]?' + """Test if this string is a multiline string. - regex = r'^{prefix}(?P<delim>"""|\'\'\').*(?P=delim)$'.format(prefix=prefix) - return (self.is_string and - re.match(regex, self.value, re.DOTALL) is not None) + Returns: + A multiline string always ends with triple quotes, so if it is a string + token, inspect the last 3 characters and return True if it is a triple + double or triple single quote mark. + """ + return self.is_string and self.value.endswith(('"""', "'''")) @property - @py3compat.lru_cache() def is_docstring(self): - return self.is_multiline_string and not self.node.prev_sibling - - @property - @py3compat.lru_cache() - def is_pseudo_paren(self): - return hasattr(self.node, 'is_pseudo') and self.node.is_pseudo + return self.is_string and self.previous_token is None @property def is_pylint_comment(self): @@ -337,3 +316,8 @@ class FormatToken(object): def is_pytype_comment(self): return self.is_comment and re.match(r'#.*\bpytype:\s*(disable|enable)=', self.value) + + @property + def is_copybara_comment(self): + return self.is_comment and re.match( + r'#.*\bcopybara:\s*(strip|insert|replace)', self.value) diff --git a/yapf/yapflib/identify_container.py b/yapf/yapflib/identify_container.py index 5c5fc5b..888dbbb 100644 --- a/yapf/yapflib/identify_container.py +++ b/yapf/yapflib/identify_container.py @@ -19,6 +19,8 @@ to the opening bracket and vice-versa. IdentifyContainers(): the main function exported by this module. """ +from lib2to3.pgen2 import token as grammar_token + from yapf.yapflib import pytree_utils from yapf.yapflib import pytree_visitor @@ -42,7 +44,7 @@ class _IdentifyContainers(pytree_visitor.PyTreeVisitor): if len(node.children) != 3: return - if pytree_utils.NodeName(node.children[0]) != 'LPAR': + if node.children[0].type != grammar_token.LPAR: return if pytree_utils.NodeName(node.children[1]) == 'arglist': @@ -59,7 +61,7 @@ class _IdentifyContainers(pytree_visitor.PyTreeVisitor): if len(node.children) != 3: return - if pytree_utils.NodeName(node.children[0]) != 'LPAR': + if node.children[0].type != grammar_token.LPAR: return for child in node.children[1].children: diff --git a/yapf/yapflib/line_joiner.py b/yapf/yapflib/line_joiner.py index 84346c2..f0acd2f 100644 --- a/yapf/yapflib/line_joiner.py +++ b/yapf/yapflib/line_joiner.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Join unwrapped lines together. +"""Join logical lines together. Determine how many lines can be joined into one line. For instance, we could join these statements into one line: @@ -43,8 +43,8 @@ def CanMergeMultipleLines(lines, last_was_merged=False): """Determine if multiple lines can be joined into one. Arguments: - lines: (list of UnwrappedLine) This is a splice of UnwrappedLines from the - full code base. + lines: (list of LogicalLine) This is a splice of LogicalLines from the full + code base. last_was_merged: (bool) The last line was merged. Returns: @@ -91,7 +91,7 @@ def _CanMergeLineIntoIfStatement(lines, limit): 'continue', and 'break'. Arguments: - lines: (list of UnwrappedLine) The lines we are wanting to merge. + lines: (list of LogicalLine) The lines we are wanting to merge. limit: (int) The amount of space remaining on the line. Returns: diff --git a/yapf/yapflib/unwrapped_line.py b/yapf/yapflib/logical_line.py index 92b986e..5723440 100644 --- a/yapf/yapflib/unwrapped_line.py +++ b/yapf/yapflib/logical_line.py @@ -11,12 +11,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""UnwrappedLine primitive for formatting. +"""LogicalLine primitive for formatting. -An unwrapped line is the containing data structure produced by the parser. It -collects all nodes (stored in FormatToken objects) that could appear on a -single line if there were no line length restrictions. It's then used by the -parser to perform the wrapping required to comply with the style guide. +A logical line is the containing data structure produced by the parser. It +collects all nodes (stored in FormatToken objects) that could appear on a single +line if there were no line length restrictions. It's then used by the parser to +perform the wrapping required to comply with the style guide. """ from yapf.yapflib import format_token @@ -24,10 +24,13 @@ from yapf.yapflib import py3compat from yapf.yapflib import pytree_utils from yapf.yapflib import split_penalty from yapf.yapflib import style +from yapf.yapflib import subtypes +from lib2to3.fixer_util import syms as python_symbols -class UnwrappedLine(object): - """Represents a single unwrapped line in the output. + +class LogicalLine(object): + """Represents a single logical line in the output. Attributes: depth: indentation depth of this line. This is just a numeric value used to @@ -38,7 +41,7 @@ class UnwrappedLine(object): def __init__(self, depth, tokens=None): """Constructor. - Creates a new unwrapped line with the given depth an initial list of tokens. + Creates a new logical line with the given depth an initial list of tokens. Constructs the doubly-linked lists for format tokens using their built-in next_token and previous_token attributes. @@ -60,7 +63,7 @@ class UnwrappedLine(object): def CalculateFormattingInformation(self): """Calculate the split penalty and total length for the tokens.""" # Say that the first token in the line should have a space before it. This - # means only that if this unwrapped line is joined with a predecessor line, + # means only that if this logical line is joined with a predecessor line, # then there will be a space between them. self.first.spaces_required_before = 1 self.first.total_length = len(self.first.value) @@ -69,11 +72,24 @@ class UnwrappedLine(object): prev_length = self.first.total_length for token in self._tokens[1:]: if (token.spaces_required_before == 0 and - _SpaceRequiredBetween(prev_token, token)): + _SpaceRequiredBetween(prev_token, token, self.disable)): token.spaces_required_before = 1 - tok_len = len(token.value) if not token.is_pseudo_paren else 0 - token.total_length = prev_length + tok_len + token.spaces_required_before + tok_len = len(token.value) if not token.is_pseudo else 0 + + spaces_required_before = token.spaces_required_before + if isinstance(spaces_required_before, list): + assert token.is_comment, token + + # If here, we are looking at a comment token that appears on a line + # with other tokens (but because it is a comment, it is always the last + # token). Rather than specifying the actual number of spaces here, + # hard code a value of 0 and then set it later. This logic only works + # because this comment token is guaranteed to be the last token in the + # list. + spaces_required_before = 0 + + token.total_length = prev_length + tok_len + spaces_required_before # The split penalty has to be computed before {must|can}_break_before, # because these may use it for their decision. @@ -90,25 +106,23 @@ class UnwrappedLine(object): if not self.has_semicolon or self.disable: return [self] - uwlines = [] - uwline = UnwrappedLine(self.depth) + llines = [] + lline = LogicalLine(self.depth) for tok in self._tokens: if tok.value == ';': - uwlines.append(uwline) - uwline = UnwrappedLine(self.depth) + llines.append(lline) + lline = LogicalLine(self.depth) else: - uwline.AppendToken(tok) + lline.AppendToken(tok) - if uwline.tokens: - uwlines.append(uwline) + if lline.tokens: + llines.append(lline) - for uwline in uwlines: - pytree_utils.SetNodeAnnotation(uwline.first.node, - pytree_utils.Annotation.MUST_SPLIT, True) - uwline.first.previous_token = None - uwline.last.next_token = None + for lline in llines: + lline.first.previous_token = None + lline.last.next_token = None - return uwlines + return llines ############################################################################ # Token Access and Manipulation Methods # @@ -169,8 +183,8 @@ class UnwrappedLine(object): def __repr__(self): # pragma: no cover tokens_repr = ','.join( - ['{0}({1!r})'.format(tok.name, tok.value) for tok in self._tokens]) - return 'UnwrappedLine(depth={0}, tokens=[{1}])'.format( + '{0}({1!r})'.format(tok.name, tok.value) for tok in self._tokens) + return 'LogicalLine(depth={0}, tokens=[{1}])'.format( self.depth, tokens_repr) ############################################################################ @@ -190,14 +204,32 @@ class UnwrappedLine(object): @property def lineno(self): - """Return the line number of this unwrapped line. + """Return the line number of this logical line. Returns: - The line number of the first token in this unwrapped line. + The line number of the first token in this logical line. """ return self.first.lineno @property + def start(self): + """The start of the logical line. + + Returns: + A tuple of the starting line number and column. + """ + return (self.first.lineno, self.first.column) + + @property + def end(self): + """The end of the logical line. + + Returns: + A tuple of the ending line number and column. + """ + return (self.last.lineno, self.last.column + len(self.last.value)) + + @property def is_comment(self): return self.first.is_comment @@ -211,18 +243,56 @@ def _IsIdNumberStringToken(tok): def _IsUnaryOperator(tok): - return format_token.Subtype.UNARY_OPERATOR in tok.subtypes + return subtypes.UNARY_OPERATOR in tok.subtypes + +def _HasPrecedence(tok): + """Whether a binary operation has precedence within its context.""" + node = tok.node -def _SpaceRequiredBetween(left, right): + # We let ancestor be the statement surrounding the operation that tok is the + # operator in. + ancestor = node.parent.parent + + while ancestor is not None: + # Search through the ancestor nodes in the parse tree for operators with + # lower precedence. + predecessor_type = pytree_utils.NodeName(ancestor) + if predecessor_type in ['arith_expr', 'term']: + # An ancestor "arith_expr" or "term" means we have found an operator + # with lower precedence than our tok. + return True + if predecessor_type != 'atom': + # We understand the context to look for precedence within as an + # arbitrary nesting of "arith_expr", "term", and "atom" nodes. If we + # leave this context we have not found a lower precedence operator. + return False + # Under normal usage we expect a complete parse tree to be available and + # we will return before we get an AttributeError from the root. + ancestor = ancestor.parent + + +def _PriorityIndicatingNoSpace(tok): + """Whether to remove spaces around an operator due to precedence.""" + if not tok.is_arithmetic_op or not tok.is_simple_expr: + # Limit space removal to highest priority arithmetic operators + return False + return _HasPrecedence(tok) + + +def _IsSubscriptColonAndValuePair(token1, token2): + return (token1.is_number or token1.is_name) and token2.is_subscript_colon + + +def _SpaceRequiredBetween(left, right, is_line_disabled): """Return True if a space is required between the left and right token.""" lval = left.value rval = right.value - if (left.is_pseudo_paren and _IsIdNumberStringToken(right) and + if (left.is_pseudo and _IsIdNumberStringToken(right) and left.previous_token and _IsIdNumberStringToken(left.previous_token)): # Space between keyword... tokens and pseudo parens. return True - if left.is_pseudo_paren or right.is_pseudo_paren: + if left.is_pseudo or right.is_pseudo: # There should be a space after the ':' in a dictionary. if left.OpensScope(): return True @@ -240,6 +310,17 @@ def _SpaceRequiredBetween(left, right): if lval == ',' and rval == ':': # We do want a space between a comma and colon. return True + if style.Get('SPACE_INSIDE_BRACKETS'): + # Supersede the "no space before a colon or comma" check. + if lval in pytree_utils.OPENING_BRACKETS and rval == ':': + return True + if rval in pytree_utils.CLOSING_BRACKETS and lval == ':': + return True + if (style.Get('SPACES_AROUND_SUBSCRIPT_COLON') and + (_IsSubscriptColonAndValuePair(left, right) or + _IsSubscriptColonAndValuePair(right, left))): + # Supersede the "never want a space before a colon or comma" check. + return True if rval in ':,': # Otherwise, we never want a space before a colon or comma. return False @@ -255,33 +336,43 @@ def _SpaceRequiredBetween(left, right): if lval == '.' and rval == 'import': # Space after the '.' in an import statement. return True - if (lval == '=' and rval == '.' and - format_token.Subtype.DEFAULT_OR_NAMED_ASSIGN not in left.subtypes): + if (lval == '=' and rval in {'.', ',,,'} and + subtypes.DEFAULT_OR_NAMED_ASSIGN not in left.subtypes): # Space between equal and '.' as in "X = ...". return True + if lval == ':' and rval in {'.', '...'}: + # Space between : and ... + return True if ((right.is_keyword or right.is_name) and (left.is_keyword or left.is_name)): # Don't merge two keywords/identifiers. return True - if (format_token.Subtype.SUBSCRIPT_COLON in left.subtypes or - format_token.Subtype.SUBSCRIPT_COLON in right.subtypes): + if (subtypes.SUBSCRIPT_COLON in left.subtypes or + subtypes.SUBSCRIPT_COLON in right.subtypes): # A subscript shouldn't have spaces separating its colons. return False - if (format_token.Subtype.TYPED_NAME in left.subtypes or - format_token.Subtype.TYPED_NAME in right.subtypes): + if (subtypes.TYPED_NAME in left.subtypes or + subtypes.TYPED_NAME in right.subtypes): # A typed argument should have a space after the colon. return True if left.is_string: if (rval == '=' and - format_token.Subtype.DEFAULT_OR_NAMED_ASSIGN_ARG_LIST in right.subtypes - ): + subtypes.DEFAULT_OR_NAMED_ASSIGN_ARG_LIST in right.subtypes): # If there is a type hint, then we don't want to add a space between the # equal sign and the hint. return False - if rval not in '[)]}.': + if rval not in '[)]}.' and not right.is_binary_op: # A string followed by something other than a subscript, closing bracket, - # or dot should have a space after it. + # dot, or a binary op should have a space after it. return True + if rval in pytree_utils.CLOSING_BRACKETS: + # A string followed by closing brackets should have a space after it + # depending on SPACE_INSIDE_BRACKETS. A string followed by opening + # brackets, however, should not. + return style.Get('SPACE_INSIDE_BRACKETS') + if subtypes.SUBSCRIPT_BRACKET in right.subtypes: + # It's legal to do this in Python: 'hello'[a] + return False if left.is_binary_op and lval != '**' and _IsUnaryOperator(right): # Space between the binary operator and the unary operator. return True @@ -295,31 +386,46 @@ def _SpaceRequiredBetween(left, right): if lval == '**' or rval == '**': # Space around the "power" operator. return style.Get('SPACES_AROUND_POWER_OPERATOR') - # Enforce spaces around binary operators except the blacklisted ones. - blacklist = style.Get('NO_SPACES_AROUND_SELECTED_BINARY_OPERATORS') - return lval not in blacklist and rval not in blacklist + # Enforce spaces around binary operators except the blocked ones. + block_list = style.Get('NO_SPACES_AROUND_SELECTED_BINARY_OPERATORS') + if lval in block_list or rval in block_list: + return False + if style.Get('ARITHMETIC_PRECEDENCE_INDICATION'): + if _PriorityIndicatingNoSpace(left) or _PriorityIndicatingNoSpace(right): + return False + else: + return True + else: + return True if (_IsUnaryOperator(left) and lval != 'not' and (right.is_name or right.is_number or rval == '(')): # The previous token was a unary op. No space is desired between it and # the current token. return False - if (format_token.Subtype.DEFAULT_OR_NAMED_ASSIGN in left.subtypes or - format_token.Subtype.DEFAULT_OR_NAMED_ASSIGN in right.subtypes): + if (subtypes.DEFAULT_OR_NAMED_ASSIGN in left.subtypes and + subtypes.TYPED_NAME not in right.subtypes): + # A named argument or default parameter shouldn't have spaces around it. + return style.Get('SPACES_AROUND_DEFAULT_OR_NAMED_ASSIGN') + if (subtypes.DEFAULT_OR_NAMED_ASSIGN in right.subtypes and + subtypes.TYPED_NAME not in left.subtypes): # A named argument or default parameter shouldn't have spaces around it. return style.Get('SPACES_AROUND_DEFAULT_OR_NAMED_ASSIGN') - if (format_token.Subtype.VARARGS_LIST in left.subtypes or - format_token.Subtype.VARARGS_LIST in right.subtypes): + if (subtypes.VARARGS_LIST in left.subtypes or + subtypes.VARARGS_LIST in right.subtypes): return False - if (format_token.Subtype.VARARGS_STAR in left.subtypes or - format_token.Subtype.KWARGS_STAR_STAR in left.subtypes): + if (subtypes.VARARGS_STAR in left.subtypes or + subtypes.KWARGS_STAR_STAR in left.subtypes): # Don't add a space after a vararg's star or a keyword's star-star. return False - if lval == '@' and format_token.Subtype.DECORATOR in left.subtypes: + if lval == '@' and subtypes.DECORATOR in left.subtypes: # Decorators shouldn't be separated from the 'at' sign. return False - if left.is_keyword and rval == '.' or lval == '.' and right.is_keyword: + if left.is_keyword and rval == '.': + # Add space between keywords and dots. + return lval not in {'None', 'print'} + if lval == '.' and right.is_keyword: # Add space between keywords and dots. - return lval != 'None' + return rval not in {'None', 'print'} if lval == '.' or rval == '.': # Don't place spaces between dots. return False @@ -327,53 +433,75 @@ def _SpaceRequiredBetween(left, right): (lval == '{' and rval == '}')): # Empty objects shouldn't be separated by spaces. return False + if not is_line_disabled and (left.OpensScope() or right.ClosesScope()): + if (style.GetOrDefault('SPACES_AROUND_DICT_DELIMITERS', False) and ( + (lval == '{' and _IsDictListTupleDelimiterTok(left, is_opening=True)) or + (rval == '}' and + _IsDictListTupleDelimiterTok(right, is_opening=False)))): + return True + if (style.GetOrDefault('SPACES_AROUND_LIST_DELIMITERS', False) and ( + (lval == '[' and _IsDictListTupleDelimiterTok(left, is_opening=True)) or + (rval == ']' and + _IsDictListTupleDelimiterTok(right, is_opening=False)))): + return True + if (style.GetOrDefault('SPACES_AROUND_TUPLE_DELIMITERS', False) and ( + (lval == '(' and _IsDictListTupleDelimiterTok(left, is_opening=True)) or + (rval == ')' and + _IsDictListTupleDelimiterTok(right, is_opening=False)))): + return True if (lval in pytree_utils.OPENING_BRACKETS and rval in pytree_utils.OPENING_BRACKETS): - # Nested objects' opening brackets shouldn't be separated. - return False + # Nested objects' opening brackets shouldn't be separated, unless enabled + # by SPACE_INSIDE_BRACKETS. + return style.Get('SPACE_INSIDE_BRACKETS') if (lval in pytree_utils.CLOSING_BRACKETS and rval in pytree_utils.CLOSING_BRACKETS): - # Nested objects' closing brackets shouldn't be separated. - return False + # Nested objects' closing brackets shouldn't be separated, unless enabled + # by SPACE_INSIDE_BRACKETS. + return style.Get('SPACE_INSIDE_BRACKETS') if lval in pytree_utils.CLOSING_BRACKETS and rval in '([': # A call, set, dictionary, or subscript that has a call or subscript after # it shouldn't have a space between them. return False if lval in pytree_utils.OPENING_BRACKETS and _IsIdNumberStringToken(right): - # Don't separate the opening bracket from the first item. - return False + # Don't separate the opening bracket from the first item, unless enabled + # by SPACE_INSIDE_BRACKETS. + return style.Get('SPACE_INSIDE_BRACKETS') if left.is_name and rval in '([': # Don't separate a call or array access from the name. return False if rval in pytree_utils.CLOSING_BRACKETS: - # Don't separate the closing bracket from the last item. + # Don't separate the closing bracket from the last item, unless enabled + # by SPACE_INSIDE_BRACKETS. # FIXME(morbo): This might be too permissive. - return False + return style.Get('SPACE_INSIDE_BRACKETS') if lval == 'print' and rval == '(': # Special support for the 'print' function. return False if lval in pytree_utils.OPENING_BRACKETS and _IsUnaryOperator(right): - # Don't separate a unary operator from the opening bracket. - return False + # Don't separate a unary operator from the opening bracket, unless enabled + # by SPACE_INSIDE_BRACKETS. + return style.Get('SPACE_INSIDE_BRACKETS') if (lval in pytree_utils.OPENING_BRACKETS and - (format_token.Subtype.VARARGS_STAR in right.subtypes or - format_token.Subtype.KWARGS_STAR_STAR in right.subtypes)): - # Don't separate a '*' or '**' from the opening bracket. - return False + (subtypes.VARARGS_STAR in right.subtypes or + subtypes.KWARGS_STAR_STAR in right.subtypes)): + # Don't separate a '*' or '**' from the opening bracket, unless enabled + # by SPACE_INSIDE_BRACKETS. + return style.Get('SPACE_INSIDE_BRACKETS') if rval == ';': # Avoid spaces before a semicolon. (Why is there a semicolon?!) return False if lval == '(' and rval == 'await': # Special support for the 'await' keyword. Don't separate the 'await' - # keyword from an opening paren. - return False + # keyword from an opening paren, unless enabled by SPACE_INSIDE_BRACKETS. + return style.Get('SPACE_INSIDE_BRACKETS') return True def _MustBreakBefore(prev_token, cur_token): """Return True if a line break is required before the current token.""" if prev_token.is_comment or (prev_token.previous_token and - prev_token.is_pseudo_paren and + prev_token.is_pseudo and prev_token.previous_token.is_comment): # Must break if the previous token was a comment. return True @@ -383,8 +511,7 @@ def _MustBreakBefore(prev_token, cur_token): # reasonable assumption, because otherwise they should have written them # all on the same line, or with a '+'. return True - return pytree_utils.GetNodeAnnotation( - cur_token.node, pytree_utils.Annotation.MUST_SPLIT, default=False) + return cur_token.must_break_before def _CanBreakBefore(prev_token, cur_token): @@ -415,15 +542,16 @@ def _CanBreakBefore(prev_token, cur_token): if prev_token.is_name and cval == '[': # Don't break in the middle of an array dereference. return False - if prev_token.is_name and cval == '.': - # Don't break before the '.' in a dotted name. - return False if cur_token.is_comment and prev_token.lineno == cur_token.lineno: # Don't break a comment at the end of the line. return False - if format_token.Subtype.UNARY_OPERATOR in prev_token.subtypes: + if subtypes.UNARY_OPERATOR in prev_token.subtypes: # Don't break after a unary token. return False + if not style.Get('ALLOW_SPLIT_BEFORE_DEFAULT_OR_NAMED_ASSIGNS'): + if (subtypes.DEFAULT_OR_NAMED_ASSIGN in cur_token.subtypes or + subtypes.DEFAULT_OR_NAMED_ASSIGN in prev_token.subtypes): + return False return True @@ -458,9 +586,36 @@ def IsSurroundedByBrackets(tok): return None +def _IsDictListTupleDelimiterTok(tok, is_opening): + assert tok + + if tok.matching_bracket is None: + return False + + if is_opening: + open_tok = tok + close_tok = tok.matching_bracket + else: + open_tok = tok.matching_bracket + close_tok = tok + + # There must be something in between the tokens + if open_tok.next_token == close_tok: + return False + + assert open_tok.next_token.node + assert open_tok.next_token.node.parent + + return open_tok.next_token.node.parent.type in [ + python_symbols.dictsetmaker, + python_symbols.listmaker, + python_symbols.testlist_gexp, + ] + + _LOGICAL_OPERATORS = frozenset({'and', 'or'}) _BITWISE_OPERATORS = frozenset({'&', '|', '^'}) -_TERM_OPERATORS = frozenset({'*', '/', '%', '//'}) +_ARITHMETIC_OPERATORS = frozenset({'+', '-', '*', '/', '%', '//', '@'}) def _SplitPenalty(prev_token, cur_token): @@ -499,21 +654,20 @@ def _SplitPenalty(prev_token, cur_token): if cval in _BITWISE_OPERATORS: return style.Get('SPLIT_PENALTY_BITWISE_OPERATOR') - if (format_token.Subtype.COMP_FOR in cur_token.subtypes or - format_token.Subtype.COMP_IF in cur_token.subtypes): + if (subtypes.COMP_FOR in cur_token.subtypes or + subtypes.COMP_IF in cur_token.subtypes): # We don't mind breaking before the 'for' or 'if' of a list comprehension. return 0 - if format_token.Subtype.UNARY_OPERATOR in prev_token.subtypes: + if subtypes.UNARY_OPERATOR in prev_token.subtypes: # Try not to break after a unary operator. return style.Get('SPLIT_PENALTY_AFTER_UNARY_OPERATOR') if pval == ',': # Breaking after a comma is fine, if need be. return 0 - if prev_token.is_binary_op: - # We would rather not split after an equality operator. - return 20 - if (format_token.Subtype.VARARGS_STAR in prev_token.subtypes or - format_token.Subtype.KWARGS_STAR_STAR in prev_token.subtypes): + if pval == '**' or cval == '**': + return split_penalty.STRONGLY_CONNECTED + if (subtypes.VARARGS_STAR in prev_token.subtypes or + subtypes.KWARGS_STAR_STAR in prev_token.subtypes): # Don't split after a varargs * or kwargs **. return split_penalty.UNBREAKABLE if prev_token.OpensScope() and cval != '(': @@ -525,8 +679,8 @@ def _SplitPenalty(prev_token, cur_token): if cval == '=': # Don't split before an assignment. return split_penalty.UNBREAKABLE - if (format_token.Subtype.DEFAULT_OR_NAMED_ASSIGN in prev_token.subtypes or - format_token.Subtype.DEFAULT_OR_NAMED_ASSIGN in cur_token.subtypes): + if (subtypes.DEFAULT_OR_NAMED_ASSIGN in prev_token.subtypes or + subtypes.DEFAULT_OR_NAMED_ASSIGN in cur_token.subtypes): # Don't break before or after an default or named assignment. return split_penalty.UNBREAKABLE if cval == '==': @@ -535,6 +689,4 @@ def _SplitPenalty(prev_token, cur_token): if cur_token.ClosesScope(): # Give a slight penalty for splitting before the closing scope. return 100 - if pval in _TERM_OPERATORS or cval in _TERM_OPERATORS: - return 50 return 0 diff --git a/yapf/yapflib/object_state.py b/yapf/yapflib/object_state.py index dded7c4..07925ef 100644 --- a/yapf/yapflib/object_state.py +++ b/yapf/yapflib/object_state.py @@ -22,6 +22,11 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from yapf.yapflib import format_token +from yapf.yapflib import py3compat +from yapf.yapflib import style +from yapf.yapflib import subtypes + class ComprehensionState(object): """Maintains the state of list comprehension formatting decisions. @@ -32,10 +37,12 @@ class ComprehensionState(object): Attributes: expr_token: The first token in the comprehension. for_token: The first 'for' token of the comprehension. + opening_bracket: The opening bracket of the list comprehension. + closing_bracket: The closing bracket of the list comprehension. has_split_at_for: Whether there is a newline immediately before the - for_token. + for_token. has_interior_split: Whether there is a newline within the comprehension. - That is, a split somewhere after expr_token or before closing_bracket. + That is, a split somewhere after expr_token or before closing_bracket. """ def __init__(self, expr_token): @@ -78,3 +85,149 @@ class ComprehensionState(object): def __hash__(self, *args, **kwargs): return hash((self.expr_token, self.for_token, self.has_split_at_for, self.has_interior_split)) + + +class ParameterListState(object): + """Maintains the state of function parameter list formatting decisions. + + Attributes: + opening_bracket: The opening bracket of the parameter list. + closing_bracket: The closing bracket of the parameter list. + has_typed_return: True if the function definition has a typed return. + ends_in_comma: True if the parameter list ends in a comma. + last_token: Returns the last token of the function declaration. + has_default_values: True if the parameters have default values. + has_split_before_first_param: Whether there is a newline before the first + parameter. + opening_column: The position of the opening parameter before a newline. + parameters: A list of parameter objects (Parameter). + split_before_closing_bracket: Split before the closing bracket. Sometimes + needed if the indentation would collide. + """ + + def __init__(self, opening_bracket, newline, opening_column): + self.opening_bracket = opening_bracket + self.has_split_before_first_param = newline + self.opening_column = opening_column + self.parameters = opening_bracket.parameters + self.split_before_closing_bracket = False + + @property + def closing_bracket(self): + return self.opening_bracket.matching_bracket + + @property + def has_typed_return(self): + return self.closing_bracket.next_token.value == '->' + + @property + @py3compat.lru_cache() + def has_default_values(self): + return any(param.has_default_value for param in self.parameters) + + @property + @py3compat.lru_cache() + def ends_in_comma(self): + if not self.parameters: + return False + return self.parameters[-1].last_token.next_token.value == ',' + + @property + @py3compat.lru_cache() + def last_token(self): + token = self.opening_bracket.matching_bracket + while not token.is_comment and token.next_token: + token = token.next_token + return token + + @py3compat.lru_cache() + def LastParamFitsOnLine(self, indent): + """Return true if the last parameter fits on a single line.""" + if not self.has_typed_return: + return False + if not self.parameters: + return True + total_length = self.last_token.total_length + last_param = self.parameters[-1].first_token + total_length -= last_param.total_length - len(last_param.value) + return total_length + indent <= style.Get('COLUMN_LIMIT') + + @py3compat.lru_cache() + def SplitBeforeClosingBracket(self, indent): + """Return true if there's a split before the closing bracket.""" + if style.Get('DEDENT_CLOSING_BRACKETS'): + return True + if self.ends_in_comma: + return True + if not self.parameters: + return False + total_length = self.last_token.total_length + last_param = self.parameters[-1].first_token + total_length -= last_param.total_length - len(last_param.value) + return total_length + indent > style.Get('COLUMN_LIMIT') + + def Clone(self): + clone = ParameterListState(self.opening_bracket, + self.has_split_before_first_param, + self.opening_column) + clone.split_before_closing_bracket = self.split_before_closing_bracket + clone.parameters = [param.Clone() for param in self.parameters] + return clone + + def __repr__(self): + return ('[opening_bracket::%s, has_split_before_first_param::%s, ' + 'opening_column::%d]' % + (self.opening_bracket, self.has_split_before_first_param, + self.opening_column)) + + def __eq__(self, other): + return hash(self) == hash(other) + + def __ne__(self, other): + return not self == other + + def __hash__(self, *args, **kwargs): + return hash( + (self.opening_bracket, self.has_split_before_first_param, + self.opening_column, (hash(param) for param in self.parameters))) + + +class Parameter(object): + """A parameter in a parameter list. + + Attributes: + first_token: (format_token.FormatToken) First token of parameter. + last_token: (format_token.FormatToken) Last token of parameter. + has_default_value: (boolean) True if the parameter has a default value + """ + + def __init__(self, first_token, last_token): + self.first_token = first_token + self.last_token = last_token + + @property + @py3compat.lru_cache() + def has_default_value(self): + """Returns true if the parameter has a default value.""" + tok = self.first_token + while tok != self.last_token: + if subtypes.DEFAULT_OR_NAMED_ASSIGN in tok.subtypes: + return True + tok = tok.matching_bracket if tok.OpensScope() else tok.next_token + return False + + def Clone(self): + return Parameter(self.first_token, self.last_token) + + def __repr__(self): + return '[first_token::%s, last_token:%s]' % (self.first_token, + self.last_token) + + def __eq__(self, other): + return hash(self) == hash(other) + + def __ne__(self, other): + return not self == other + + def __hash__(self, *args, **kwargs): + return hash((self.first_token, self.last_token)) diff --git a/yapf/yapflib/py3compat.py b/yapf/yapflib/py3compat.py index c66d6c6..8f15476 100644 --- a/yapf/yapflib/py3compat.py +++ b/yapf/yapflib/py3compat.py @@ -13,20 +13,23 @@ # limitations under the License. """Utilities for Python2 / Python3 compatibility.""" +import codecs import io import os import sys PY3 = sys.version_info[0] >= 3 PY36 = sys.version_info[0] >= 3 and sys.version_info[1] >= 6 +PY37 = sys.version_info[0] >= 3 and sys.version_info[1] >= 7 +PY38 = sys.version_info[0] >= 3 and sys.version_info[1] >= 8 if PY3: StringIO = io.StringIO BytesIO = io.BytesIO - import codecs + import codecs # noqa: F811 - def open_with_encoding(filename, mode, encoding, newline=''): # pylint: disable=unused-argument + def open_with_encoding(filename, mode, encoding, newline=''): # pylint: disable=unused-argument # noqa return codecs.open(filename, mode=mode, encoding=encoding) import functools @@ -59,13 +62,13 @@ else: return fake_wrapper - range = xrange + range = xrange # noqa: F821 from itertools import ifilter raw_input = raw_input import ConfigParser as configparser - CONFIGPARSER_BOOLEAN_STATES = configparser.ConfigParser._boolean_states # pylint: disable=protected-access + CONFIGPARSER_BOOLEAN_STATES = configparser.ConfigParser._boolean_states # pylint: disable=protected-access # noqa def EncodeAndWriteToStdout(s, encoding='utf-8'): @@ -116,3 +119,13 @@ class ConfigParser(configparser.ConfigParser): def read_file(self, fp, source=None): self.readfp(fp, filename=source) + + +def removeBOM(source): + """Remove any Byte-order-Mark bytes from the beginning of a file.""" + bom = codecs.BOM_UTF8 + if PY3: + bom = bom.decode('utf-8') + if source.startswith(bom): + return source[len(bom):] + return source diff --git a/yapf/yapflib/pytree_unwrapper.py b/yapf/yapflib/pytree_unwrapper.py index 0d371ae..1b05b0e 100644 --- a/yapf/yapflib/pytree_unwrapper.py +++ b/yapf/yapflib/pytree_unwrapper.py @@ -11,13 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""PyTreeUnwrapper - produces a list of unwrapped lines from a pytree. +"""PyTreeUnwrapper - produces a list of logical lines from a pytree. -[for a description of what an unwrapped line is, see unwrapped_line.py] +[for a description of what a logical line is, see logical_line.py] This is a pytree visitor that goes over a parse tree and produces a list of -UnwrappedLine containers from it, each with its own depth and containing all -the tokens that could fit on the line if there were no maximal line-length +LogicalLine containers from it, each with its own depth and containing all the +tokens that could fit on the line if there were no maximal line-length limitations. Note: a precondition to running this visitor and obtaining correct results is @@ -31,27 +31,30 @@ For most uses, the convenience function UnwrapPyTree should be sufficient. from lib2to3 import pytree from lib2to3.pgen2 import token as grammar_token +from yapf.yapflib import format_token +from yapf.yapflib import logical_line +from yapf.yapflib import object_state from yapf.yapflib import pytree_utils from yapf.yapflib import pytree_visitor from yapf.yapflib import split_penalty from yapf.yapflib import style -from yapf.yapflib import unwrapped_line +from yapf.yapflib import subtypes def UnwrapPyTree(tree): - """Create and return a list of unwrapped lines from the given pytree. + """Create and return a list of logical lines from the given pytree. Arguments: - tree: the top-level pytree node to unwrap. + tree: the top-level pytree node to unwrap.. Returns: - A list of UnwrappedLine objects. + A list of LogicalLine objects. """ unwrapper = PyTreeUnwrapper() unwrapper.Visit(tree) - uwlines = unwrapper.GetUnwrappedLines() - uwlines.sort(key=lambda x: x.lineno) - return uwlines + llines = unwrapper.GetLogicalLines() + llines.sort(key=lambda x: x.lineno) + return llines # Grammar tokens considered as whitespace for the purpose of unwrapping. @@ -77,39 +80,40 @@ class PyTreeUnwrapper(pytree_visitor.PyTreeVisitor): """ def __init__(self): - # A list of all unwrapped lines finished visiting so far. - self._unwrapped_lines = [] + # A list of all logical lines finished visiting so far. + self._logical_lines = [] - # Builds up a "current" unwrapped line while visiting pytree nodes. Some - # nodes will finish a line and start a new one. - self._cur_unwrapped_line = unwrapped_line.UnwrappedLine(0) + # Builds up a "current" logical line while visiting pytree nodes. Some nodes + # will finish a line and start a new one. + self._cur_logical_line = logical_line.LogicalLine(0) # Current indentation depth. self._cur_depth = 0 - def GetUnwrappedLines(self): + def GetLogicalLines(self): """Fetch the result of the tree walk. Note: only call this after visiting the whole tree. Returns: - A list of UnwrappedLine objects. + A list of LogicalLine objects. """ # Make sure the last line that was being populated is flushed. self._StartNewLine() - return self._unwrapped_lines + return self._logical_lines def _StartNewLine(self): """Finish current line and start a new one. - Place the currently accumulated line into the _unwrapped_lines list and + Place the currently accumulated line into the _logical_lines list and start a new one. """ - if self._cur_unwrapped_line.tokens: - self._unwrapped_lines.append(self._cur_unwrapped_line) - _MatchBrackets(self._cur_unwrapped_line) - _AdjustSplitPenalty(self._cur_unwrapped_line) - self._cur_unwrapped_line = unwrapped_line.UnwrappedLine(self._cur_depth) + if self._cur_logical_line.tokens: + self._logical_lines.append(self._cur_logical_line) + _MatchBrackets(self._cur_logical_line) + _IdentifyParameterLists(self._cur_logical_line) + _AdjustSplitPenalty(self._cur_logical_line) + self._cur_logical_line = logical_line.LogicalLine(self._cur_depth) _STMT_TYPES = frozenset({ 'if_stmt', @@ -148,7 +152,7 @@ class PyTreeUnwrapper(pytree_visitor.PyTreeVisitor): """Helper for visiting compound statements. Python compound statements serve as containers for other statements. Thus, - when we encounter a new compound statement we start a new unwrapped line. + when we encounter a new compound statement, we start a new logical line. Arguments: node: the node to visit. @@ -202,7 +206,7 @@ class PyTreeUnwrapper(pytree_visitor.PyTreeVisitor): for child in node.children: index += 1 self.Visit(child) - if pytree_utils.NodeName(child) == 'ASYNC': + if child.type == grammar_token.ASYNC: break for child in node.children[index].children: self.Visit(child) @@ -218,16 +222,17 @@ class PyTreeUnwrapper(pytree_visitor.PyTreeVisitor): for child in node.children: index += 1 self.Visit(child) - if pytree_utils.NodeName(child) == 'ASYNC': + if child.type == grammar_token.ASYNC: break for child in node.children[index].children: + if child.type == grammar_token.NAME and child.value == 'else': + self._StartNewLine() self.Visit(child) def Visit_decorator(self, node): # pylint: disable=invalid-name for child in node.children: self.Visit(child) - if (pytree_utils.NodeName(child) == 'COMMENT' and - child == node.children[0]): + if child.type == grammar_token.COMMENT and child == node.children[0]: self._StartNewLine() def Visit_decorators(self, node): # pylint: disable=invalid-name @@ -280,7 +285,7 @@ class PyTreeUnwrapper(pytree_visitor.PyTreeVisitor): def DefaultLeafVisit(self, leaf): """Default visitor for tree leaves. - A tree leaf is always just gets appended to the current unwrapped line. + A tree leaf is always just gets appended to the current logical line. Arguments: leaf: the leaf to visit. @@ -289,13 +294,13 @@ class PyTreeUnwrapper(pytree_visitor.PyTreeVisitor): self._StartNewLine() elif leaf.type != grammar_token.COMMENT or leaf.value.strip(): # Add non-whitespace tokens and comments that aren't empty. - self._cur_unwrapped_line.AppendNode(leaf) + self._cur_logical_line.AppendNode(leaf) _BRACKET_MATCH = {')': '(', '}': '{', ']': '['} -def _MatchBrackets(uwline): +def _MatchBrackets(line): """Visit the node and match the brackets. For every open bracket ('[', '{', or '('), find the associated closing bracket @@ -303,10 +308,10 @@ def _MatchBrackets(uwline): or close bracket. Arguments: - uwline: (UnwrappedLine) An unwrapped line. + line: (LogicalLine) A logical line. """ bracket_stack = [] - for token in uwline.tokens: + for token in line.tokens: if token.value in pytree_utils.OPENING_BRACKETS: bracket_stack.append(token) elif token.value in pytree_utils.CLOSING_BRACKETS: @@ -320,17 +325,50 @@ def _MatchBrackets(uwline): token.container_opening = bracket -def _AdjustSplitPenalty(uwline): +def _IdentifyParameterLists(line): + """Visit the node to create a state for parameter lists. + + For instance, a parameter is considered an "object" with its first and last + token uniquely identifying the object. + + Arguments: + line: (LogicalLine) A logical line. + """ + func_stack = [] + param_stack = [] + for tok in line.tokens: + # Identify parameter list objects. + if subtypes.FUNC_DEF in tok.subtypes: + assert tok.next_token.value == '(' + func_stack.append(tok.next_token) + continue + + if func_stack and tok.value == ')': + if tok == func_stack[-1].matching_bracket: + func_stack.pop() + continue + + # Identify parameter objects. + if subtypes.PARAMETER_START in tok.subtypes: + param_stack.append(tok) + + # Not "elif", a parameter could be a single token. + if param_stack and subtypes.PARAMETER_STOP in tok.subtypes: + start = param_stack.pop() + func_stack[-1].parameters.append(object_state.Parameter(start, tok)) + + +def _AdjustSplitPenalty(line): """Visit the node and adjust the split penalties if needed. A token shouldn't be split if it's not within a bracket pair. Mark any token that's not within a bracket pair as "unbreakable". Arguments: - uwline: (UnwrappedLine) An unwrapped line. + line: (LogicalLine) An logical line. """ bracket_level = 0 - for index, token in enumerate(uwline.tokens): + for index, token in enumerate(line.tokens): if index and not bracket_level: pytree_utils.SetNodeAnnotation(token.node, pytree_utils.Annotation.SPLIT_PENALTY, @@ -348,8 +386,7 @@ def _DetermineMustSplitAnnotation(node): if not _ContainsComments(node): token = next(node.parent.leaves()) if token.value == '(': - if sum(1 for ch in node.children - if pytree_utils.NodeName(ch) == 'COMMA') < 2: + if sum(1 for ch in node.children if ch.type == grammar_token.COMMA) < 2: return if (not isinstance(node.children[-1], pytree.Leaf) or node.children[-1].value != ','): diff --git a/yapf/yapflib/pytree_utils.py b/yapf/yapflib/pytree_utils.py index 999ba88..8762032 100644 --- a/yapf/yapflib/pytree_utils.py +++ b/yapf/yapflib/pytree_utils.py @@ -25,6 +25,7 @@ the lib2to3 library. """ import ast +import os from lib2to3 import pygram from lib2to3 import pytree @@ -108,6 +109,9 @@ def ParseCodeToTree(code): """ # This function is tiny, but the incantation for invoking the parser correctly # is sufficiently magical to be worth abstracting away. + if not code.endswith(os.linesep): + code += os.linesep + try: # Try to parse using a Python 3 grammar, which is more permissive (print and # exec are not keywords). @@ -219,6 +223,18 @@ def _InsertNodeAt(new_node, target, after=False): _NODE_ANNOTATION_PREFIX = '_yapf_annotation_' +def CopyYapfAnnotations(src, dst): + """Copy all YAPF annotations from the source node to the destination node. + + Arguments: + src: the source node. + dst: the destination node. + """ + for annotation in dir(src): + if annotation.startswith(_NODE_ANNOTATION_PREFIX): + setattr(dst, annotation, getattr(src, annotation, None)) + + def GetNodeAnnotation(node, annotation, default=None): """Get annotation value from a node. diff --git a/yapf/yapflib/pytree_visitor.py b/yapf/yapflib/pytree_visitor.py index 49da056..a39331c 100644 --- a/yapf/yapflib/pytree_visitor.py +++ b/yapf/yapflib/pytree_visitor.py @@ -19,7 +19,7 @@ and Leaf types. This module implements a visitor pattern for such trees. It also exports a basic "dumping" visitor that dumps a textual representation of a pytree into a stream. - PyTreeVisitor: a generic visitor pattern fo pytrees. + PyTreeVisitor: a generic visitor pattern for pytrees. PyTreeDumper: a configurable "dumper" for displaying pytrees. DumpPyTree(): a convenience function to dump a pytree. """ diff --git a/yapf/yapflib/reformatter.py b/yapf/yapflib/reformatter.py index 6539e68..b6e6a13 100644 --- a/yapf/yapflib/reformatter.py +++ b/yapf/yapflib/reformatter.py @@ -13,9 +13,8 @@ # limitations under the License. """Decide what the format for the code should be. -The `unwrapped_line.UnwrappedLine`s are now ready to be formatted. -UnwrappedLines that can be merged together are. The best formatting is returned -as a string. +The `logical_line.LogicalLine`s are now ready to be formatted. LogicalLInes that +can be merged together are. The best formatting is returned as a string. Reformat(): the main function exported by this module. """ @@ -36,11 +35,11 @@ from yapf.yapflib import style from yapf.yapflib import verifier -def Reformat(uwlines, verify=False, lines=None): - """Reformat the unwrapped lines. +def Reformat(llines, verify=False, lines=None): + """Reformat the logical lines. Arguments: - uwlines: (list of unwrapped_line.UnwrappedLine) Lines we want to format. + llines: (list of logical_line.LogicalLine) Lines we want to format. verify: (bool) True if reformatted code should be verified for syntax. lines: (set of int) The lines which can be modified or None if there is no line range restriction. @@ -49,66 +48,81 @@ def Reformat(uwlines, verify=False, lines=None): A string representing the reformatted code. """ final_lines = [] - prev_uwline = None # The previous line. + prev_line = None # The previous line. indent_width = style.Get('INDENT_WIDTH') - for uwline in _SingleOrMergedLines(uwlines): - first_token = uwline.first - _FormatFirstToken(first_token, uwline.depth, prev_uwline, final_lines) + for lline in _SingleOrMergedLines(llines): + first_token = lline.first + _FormatFirstToken(first_token, lline.depth, prev_line, final_lines) - indent_amt = indent_width * uwline.depth - state = format_decision_state.FormatDecisionState(uwline, indent_amt) + indent_amt = indent_width * lline.depth + state = format_decision_state.FormatDecisionState(lline, indent_amt) state.MoveStateToNextToken() - if not uwline.disable: - if uwline.first.is_comment: - uwline.first.node.value = uwline.first.node.value.rstrip() - elif uwline.last.is_comment: - uwline.last.node.value = uwline.last.node.value.rstrip() - if prev_uwline and prev_uwline.disable: + if not lline.disable: + if lline.first.is_comment: + lline.first.node.value = lline.first.node.value.rstrip() + elif lline.last.is_comment: + lline.last.node.value = lline.last.node.value.rstrip() + if prev_line and prev_line.disable: # Keep the vertical spacing between a disabled and enabled formatting # region. - _RetainRequiredVerticalSpacingBetweenTokens(uwline.first, - prev_uwline.last, lines) - if any(tok.is_comment for tok in uwline.tokens): - _RetainVerticalSpacingBeforeComments(uwline) - - if (_LineContainsI18n(uwline) or uwline.disable or - _LineHasContinuationMarkers(uwline)): - _RetainHorizontalSpacing(uwline) - _RetainRequiredVerticalSpacing(uwline, prev_uwline, lines) + _RetainRequiredVerticalSpacingBetweenTokens(lline.first, prev_line.last, + lines) + if any(tok.is_comment for tok in lline.tokens): + _RetainVerticalSpacingBeforeComments(lline) + + if lline.disable or _LineHasContinuationMarkers(lline): + _RetainHorizontalSpacing(lline) + _RetainRequiredVerticalSpacing(lline, prev_line, lines) _EmitLineUnformatted(state) - elif _CanPlaceOnSingleLine(uwline) and not any(tok.must_split - for tok in uwline.tokens): - # The unwrapped line fits on one line. + + elif (_LineContainsPylintDisableLineTooLong(lline) or + _LineContainsI18n(lline)): + # Don't modify vertical spacing, but fix any horizontal spacing issues. + _RetainRequiredVerticalSpacing(lline, prev_line, lines) + _EmitLineUnformatted(state) + + elif _CanPlaceOnSingleLine(lline) and not any(tok.must_break_before + for tok in lline.tokens): + # The logical line fits on one line. while state.next_token: state.AddTokenToState(newline=False, dry_run=False) - else: - if not _AnalyzeSolutionSpace(state): - # Failsafe mode. If there isn't a solution to the line, then just emit - # it as is. - state = format_decision_state.FormatDecisionState(uwline, indent_amt) - state.MoveStateToNextToken() - _RetainHorizontalSpacing(uwline) - _RetainRequiredVerticalSpacing(uwline, prev_uwline, None) - _EmitLineUnformatted(state) - - final_lines.append(uwline) - prev_uwline = uwline + + elif not _AnalyzeSolutionSpace(state): + # Failsafe mode. If there isn't a solution to the line, then just emit + # it as is. + state = format_decision_state.FormatDecisionState(lline, indent_amt) + state.MoveStateToNextToken() + _RetainHorizontalSpacing(lline) + _RetainRequiredVerticalSpacing(lline, prev_line, None) + _EmitLineUnformatted(state) + + final_lines.append(lline) + prev_line = lline + + _AlignTrailingComments(final_lines) return _FormatFinalLines(final_lines, verify) -def _RetainHorizontalSpacing(uwline): +def _RetainHorizontalSpacing(line): """Retain all horizontal spacing between tokens.""" - for tok in uwline.tokens: - tok.RetainHorizontalSpacing(uwline.first.column, uwline.depth) + for tok in line.tokens: + tok.RetainHorizontalSpacing(line.first.column, line.depth) -def _RetainRequiredVerticalSpacing(cur_uwline, prev_uwline, lines): +def _RetainRequiredVerticalSpacing(cur_line, prev_line, lines): + """Retain all vertical spacing between lines.""" prev_tok = None - if prev_uwline is not None: - prev_tok = prev_uwline.last - for cur_tok in cur_uwline.tokens: + if prev_line is not None: + prev_tok = prev_line.last + + if cur_line.disable: + # After the first token we are acting on a single line. So if it is + # disabled we must not reformat. + lines = set() + + for cur_tok in cur_line.tokens: _RetainRequiredVerticalSpacingBetweenTokens(cur_tok, prev_tok, lines) prev_tok = cur_tok @@ -120,7 +134,7 @@ def _RetainRequiredVerticalSpacingBetweenTokens(cur_tok, prev_tok, lines): if prev_tok.is_string: prev_lineno = prev_tok.lineno + prev_tok.value.count('\n') - elif prev_tok.is_pseudo_paren: + elif prev_tok.is_pseudo: if not prev_tok.previous_token.is_multiline_string: prev_lineno = prev_tok.previous_token.lineno else: @@ -133,14 +147,14 @@ def _RetainRequiredVerticalSpacingBetweenTokens(cur_tok, prev_tok, lines): else: cur_lineno = cur_tok.lineno - if prev_tok.value.endswith('\\'): + if not prev_tok.is_comment and prev_tok.value.endswith('\\'): prev_lineno += prev_tok.value.count('\n') required_newlines = cur_lineno - prev_lineno if cur_tok.is_comment and not prev_tok.is_comment: # Don't adjust between a comment and non-comment. pass - elif lines and (cur_lineno in lines or prev_lineno in lines): + elif lines and lines.intersection(range(prev_lineno, cur_lineno + 1)): desired_newlines = cur_tok.whitespace_prefix.count('\n') whitespace_lines = range(prev_lineno + 1, cur_lineno) deletable_lines = len(lines.intersection(whitespace_lines)) @@ -150,10 +164,10 @@ def _RetainRequiredVerticalSpacingBetweenTokens(cur_tok, prev_tok, lines): cur_tok.AdjustNewlinesBefore(required_newlines) -def _RetainVerticalSpacingBeforeComments(uwline): +def _RetainVerticalSpacingBeforeComments(line): """Retain vertical spacing before comments.""" prev_token = None - for tok in uwline.tokens: + for tok in line.tokens: if tok.is_comment and prev_token: if tok.lineno - tok.value.count('\n') - prev_token.lineno > 1: tok.AdjustNewlinesBefore(ONE_BLANK_LINE) @@ -173,7 +187,6 @@ def _EmitLineUnformatted(state): state: (format_decision_state.FormatDecisionState) The format decision state. """ - prev_lineno = None while state.next_token: previous_token = state.next_token.previous_token previous_lineno = previous_token.lineno @@ -184,67 +197,200 @@ def _EmitLineUnformatted(state): if previous_token.is_continuation: newline = False else: - newline = ( - prev_lineno is not None and state.next_token.lineno > previous_lineno) + newline = state.next_token.lineno > previous_lineno - prev_lineno = state.next_token.lineno state.AddTokenToState(newline=newline, dry_run=False) -def _LineContainsI18n(uwline): +def _LineContainsI18n(line): """Return true if there are i18n comments or function calls in the line. I18n comments and pseudo-function calls are closely related. They cannot be moved apart without breaking i18n. Arguments: - uwline: (unwrapped_line.UnwrappedLine) The line currently being formatted. + line: (logical_line.LogicalLine) The line currently being formatted. Returns: True if the line contains i18n comments or function calls. False otherwise. """ if style.Get('I18N_COMMENT'): - for tok in uwline.tokens: + for tok in line.tokens: if tok.is_comment and re.match(style.Get('I18N_COMMENT'), tok.value): # Contains an i18n comment. return True if style.Get('I18N_FUNCTION_CALL'): - length = len(uwline.tokens) - index = 0 - while index < length - 1: - if (uwline.tokens[index + 1].value == '(' and - uwline.tokens[index].value in style.Get('I18N_FUNCTION_CALL')): + length = len(line.tokens) + for index in range(length - 1): + if (line.tokens[index + 1].value == '(' and + line.tokens[index].value in style.Get('I18N_FUNCTION_CALL')): return True - index += 1 - return False -def _LineHasContinuationMarkers(uwline): +def _LineContainsPylintDisableLineTooLong(line): + """Return true if there is a "pylint: disable=line-too-long" comment.""" + return re.search(r'\bpylint:\s+disable=line-too-long\b', line.last.value) + + +def _LineHasContinuationMarkers(line): """Return true if the line has continuation markers in it.""" - return any(tok.is_continuation for tok in uwline.tokens) + return any(tok.is_continuation for tok in line.tokens) -def _CanPlaceOnSingleLine(uwline): - """Determine if the unwrapped line can go on a single line. +def _CanPlaceOnSingleLine(line): + """Determine if the logical line can go on a single line. Arguments: - uwline: (unwrapped_line.UnwrappedLine) The line currently being formatted. + line: (logical_line.LogicalLine) The line currently being formatted. Returns: True if the line can or should be added to a single line. False otherwise. """ - indent_amt = style.Get('INDENT_WIDTH') * uwline.depth - last = uwline.last + token_names = [x.name for x in line.tokens] + if (style.Get('FORCE_MULTILINE_DICT') and 'LBRACE' in token_names): + return False + indent_amt = style.Get('INDENT_WIDTH') * line.depth + last = line.last last_index = -1 - if last.is_pylint_comment or last.is_pytype_comment: + if (last.is_pylint_comment or last.is_pytype_comment or + last.is_copybara_comment): last = last.previous_token last_index = -2 if last is None: return True return (last.total_length + indent_amt <= style.Get('COLUMN_LIMIT') and - not any(tok.is_comment for tok in uwline.tokens[:last_index])) + not any(tok.is_comment for tok in line.tokens[:last_index])) + + +def _AlignTrailingComments(final_lines): + """Align trailing comments to the same column.""" + final_lines_index = 0 + while final_lines_index < len(final_lines): + line = final_lines[final_lines_index] + assert line.tokens + + processed_content = False + + for tok in line.tokens: + if (tok.is_comment and isinstance(tok.spaces_required_before, list) and + tok.value.startswith('#')): + # All trailing comments and comments that appear on a line by themselves + # in this block should be indented at the same level. The block is + # terminated by an empty line or EOF. Enumerate through each line in + # the block and calculate the max line length. Once complete, use the + # first col value greater than that value and create the necessary for + # each line accordingly. + all_pc_line_lengths = [] # All pre-comment line lengths + max_line_length = 0 + + while True: + # EOF + if final_lines_index + len(all_pc_line_lengths) == len(final_lines): + break + + this_line = final_lines[final_lines_index + len(all_pc_line_lengths)] + + # Blank line - note that content is preformatted so we don't need to + # worry about spaces/tabs; a blank line will always be '\n\n'. + assert this_line.tokens + if (all_pc_line_lengths and + this_line.tokens[0].formatted_whitespace_prefix.startswith('\n\n') + ): + break + + if this_line.disable: + all_pc_line_lengths.append([]) + continue + + # Calculate the length of each line in this logical line. + line_content = '' + pc_line_lengths = [] + + for line_tok in this_line.tokens: + whitespace_prefix = line_tok.formatted_whitespace_prefix + + newline_index = whitespace_prefix.rfind('\n') + if newline_index != -1: + max_line_length = max(max_line_length, len(line_content)) + line_content = '' + + whitespace_prefix = whitespace_prefix[newline_index + 1:] + + if line_tok.is_comment: + pc_line_lengths.append(len(line_content)) + else: + line_content += '{}{}'.format(whitespace_prefix, line_tok.value) + + if pc_line_lengths: + max_line_length = max(max_line_length, max(pc_line_lengths)) + + all_pc_line_lengths.append(pc_line_lengths) + + # Calculate the aligned column value + max_line_length += 2 + + aligned_col = None + for potential_col in tok.spaces_required_before: + if potential_col > max_line_length: + aligned_col = potential_col + break + + if aligned_col is None: + aligned_col = max_line_length + + # Update the comment token values based on the aligned values + for all_pc_line_lengths_index, pc_line_lengths in enumerate( + all_pc_line_lengths): + if not pc_line_lengths: + continue + + this_line = final_lines[final_lines_index + all_pc_line_lengths_index] + + pc_line_length_index = 0 + for line_tok in this_line.tokens: + if line_tok.is_comment: + assert pc_line_length_index < len(pc_line_lengths) + assert pc_line_lengths[pc_line_length_index] < aligned_col + + # Note that there may be newlines embedded in the comments, so + # we need to apply a whitespace prefix to each line. + whitespace = ' ' * ( + aligned_col - pc_line_lengths[pc_line_length_index] - 1) + pc_line_length_index += 1 + + line_content = [] + + for comment_line_index, comment_line in enumerate( + line_tok.value.split('\n')): + line_content.append('{}{}'.format(whitespace, + comment_line.strip())) + + if comment_line_index == 0: + whitespace = ' ' * (aligned_col - 1) + + line_content = '\n'.join(line_content) + + # Account for initial whitespace already slated for the + # beginning of the line. + existing_whitespace_prefix = \ + line_tok.formatted_whitespace_prefix.lstrip('\n') + + if line_content.startswith(existing_whitespace_prefix): + line_content = line_content[len(existing_whitespace_prefix):] + + line_tok.value = line_content + + assert pc_line_length_index == len(pc_line_lengths) + + final_lines_index += len(all_pc_line_lengths) + + processed_content = True + break + + if not processed_content: + final_lines_index += 1 def _FormatFinalLines(final_lines, verify): @@ -253,15 +399,14 @@ def _FormatFinalLines(final_lines, verify): for line in final_lines: formatted_line = [] for tok in line.tokens: - if not tok.is_pseudo_paren: - formatted_line.append(tok.whitespace_prefix) + if not tok.is_pseudo: + formatted_line.append(tok.formatted_whitespace_prefix) formatted_line.append(tok.value) - else: - if (not tok.next_token.whitespace_prefix.startswith('\n') and + elif (not tok.next_token.whitespace_prefix.startswith('\n') and not tok.next_token.whitespace_prefix.startswith(' ')): - if (tok.previous_token.value == ':' or - tok.next_token.value not in ',}])'): - formatted_line.append(' ') + if (tok.previous_token.value == ':' or + tok.next_token.value not in ',}])'): + formatted_line.append(' ') formatted_code.append(''.join(formatted_line)) if verify: @@ -340,10 +485,13 @@ def _AnalyzeSolutionSpace(initial_state): if count > 10000: node.state.ignore_stack_for_comparison = True - if node.state in seen: - continue - + # Unconditionally add the state and check if it was present to avoid having + # to hash it twice in the common case (state hashing is expensive). + before_seen_count = len(seen) seen.add(node.state) + # If seen didn't change size, the state was already present. + if before_seen_count == len(seen): + continue # FIXME(morbo): Add a 'decision' element? @@ -409,24 +557,38 @@ def _ReconstructPath(initial_state, current): initial_state.AddTokenToState(newline=node.newline, dry_run=False) -def _FormatFirstToken(first_token, indent_depth, prev_uwline, final_lines): - """Format the first token in the unwrapped line. +NESTED_DEPTH = [] + - Add a newline and the required indent before the first token of the unwrapped +def _FormatFirstToken(first_token, indent_depth, prev_line, final_lines): + """Format the first token in the logical line. + + Add a newline and the required indent before the first token of the logical line. Arguments: - first_token: (format_token.FormatToken) The first token in the unwrapped - line. + first_token: (format_token.FormatToken) The first token in the logical line. indent_depth: (int) The line's indentation depth. - prev_uwline: (list of unwrapped_line.UnwrappedLine) The unwrapped line - previous to this line. - final_lines: (list of unwrapped_line.UnwrappedLine) The unwrapped lines - that have already been processed. + prev_line: (list of logical_line.LogicalLine) The logical line previous to + this line. + final_lines: (list of logical_line.LogicalLine) The logical lines that have + already been processed. """ + global NESTED_DEPTH + while NESTED_DEPTH and NESTED_DEPTH[-1] > indent_depth: + NESTED_DEPTH.pop() + + first_nested = False + if _IsClassOrDef(first_token): + if not NESTED_DEPTH: + NESTED_DEPTH = [indent_depth] + elif NESTED_DEPTH[-1] < indent_depth: + first_nested = True + NESTED_DEPTH.append(indent_depth) + first_token.AddWhitespacePrefix( - _CalculateNumberOfNewlines(first_token, indent_depth, prev_uwline, - final_lines), + _CalculateNumberOfNewlines(first_token, indent_depth, prev_line, + final_lines, first_nested), indent_level=indent_depth) @@ -435,70 +597,82 @@ ONE_BLANK_LINE = 2 TWO_BLANK_LINES = 3 -def _IsClassOrDef(uwline): - if uwline.first.value in {'class', 'def'}: +def _IsClassOrDef(tok): + if tok.value in {'class', 'def', '@'}: return True - - return [t.value for t in uwline.tokens[:2]] == ['async', 'def'] + return (tok.next_token and tok.value == 'async' and + tok.next_token.value == 'def') -def _CalculateNumberOfNewlines(first_token, indent_depth, prev_uwline, - final_lines): +def _CalculateNumberOfNewlines(first_token, indent_depth, prev_line, + final_lines, first_nested): """Calculate the number of newlines we need to add. Arguments: - first_token: (format_token.FormatToken) The first token in the unwrapped + first_token: (format_token.FormatToken) The first token in the logical line. indent_depth: (int) The line's indentation depth. - prev_uwline: (list of unwrapped_line.UnwrappedLine) The unwrapped line - previous to this line. - final_lines: (list of unwrapped_line.UnwrappedLine) The unwrapped lines - that have already been processed. + prev_line: (list of logical_line.LogicalLine) The logical line previous to + this line. + final_lines: (list of logical_line.LogicalLine) The logical lines that have + already been processed. + first_nested: (boolean) Whether this is the first nested class or function. Returns: The number of newlines needed before the first token. """ # TODO(morbo): Special handling for imports. # TODO(morbo): Create a knob that can tune these. - if prev_uwline is None: + if prev_line is None: # The first line in the file. Don't add blank lines. # FIXME(morbo): Is this correct? if first_token.newlines is not None: - pytree_utils.SetNodeAnnotation(first_token.node, - pytree_utils.Annotation.NEWLINES, None) + first_token.newlines = None return 0 if first_token.is_docstring: - if (prev_uwline.first.value == 'class' and + if (prev_line.first.value == 'class' and style.Get('BLANK_LINE_BEFORE_CLASS_DOCSTRING')): # Enforce a blank line before a class's docstring. return ONE_BLANK_LINE - elif (prev_uwline.first.value.startswith('#') and + elif (prev_line.first.value.startswith('#') and style.Get('BLANK_LINE_BEFORE_MODULE_DOCSTRING')): # Enforce a blank line before a module's docstring. return ONE_BLANK_LINE # The docstring shouldn't have a newline before it. return NO_BLANK_LINES - prev_last_token = prev_uwline.last + if first_token.is_name and not indent_depth: + if prev_line.first.value in {'from', 'import'}: + # Support custom number of blank lines between top-level imports and + # variable definitions. + return 1 + style.Get( + 'BLANK_LINES_BETWEEN_TOP_LEVEL_IMPORTS_AND_VARIABLES') + + prev_last_token = prev_line.last if prev_last_token.is_docstring: if (not indent_depth and first_token.value in {'class', 'def', 'async'}): # Separate a class or function from the module-level docstring with # appropriate number of blank lines. return 1 + style.Get('BLANK_LINES_AROUND_TOP_LEVEL_DEFINITION') + if (first_nested and + not style.Get('BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF') and + _IsClassOrDef(first_token)): + first_token.newlines = None + return NO_BLANK_LINES if _NoBlankLinesBeforeCurrentToken(prev_last_token.value, first_token, prev_last_token): return NO_BLANK_LINES else: return ONE_BLANK_LINE - if first_token.value in {'class', 'def', 'async', '@'}: + if _IsClassOrDef(first_token): # TODO(morbo): This can go once the blank line calculator is more # sophisticated. if not indent_depth: # This is a top-level class or function. is_inline_comment = prev_last_token.whitespace_prefix.count('\n') == 0 - if (not prev_uwline.disable and prev_last_token.is_comment and + if (not prev_line.disable and prev_last_token.is_comment and not is_inline_comment): # This token follows a non-inline comment. if _NoBlankLinesBeforeCurrentToken(prev_last_token.value, first_token, @@ -516,13 +690,12 @@ def _CalculateNumberOfNewlines(first_token, indent_depth, prev_uwline, prev_last_token.AdjustNewlinesBefore( 1 + style.Get('BLANK_LINES_AROUND_TOP_LEVEL_DEFINITION')) if first_token.newlines is not None: - pytree_utils.SetNodeAnnotation( - first_token.node, pytree_utils.Annotation.NEWLINES, None) + first_token.newlines = None return NO_BLANK_LINES - elif _IsClassOrDef(prev_uwline): - if not style.Get('BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF'): - pytree_utils.SetNodeAnnotation(first_token.node, - pytree_utils.Annotation.NEWLINES, None) + elif _IsClassOrDef(prev_line.first): + if first_nested and not style.Get( + 'BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF'): + first_token.newlines = None return NO_BLANK_LINES # Calculate how many newlines were between the original lines. We want to @@ -542,11 +715,11 @@ def _CalculateNumberOfNewlines(first_token, indent_depth, prev_uwline, return NO_BLANK_LINES -def _SingleOrMergedLines(uwlines): +def _SingleOrMergedLines(lines): """Generate the lines we want to format. Arguments: - uwlines: (list of unwrapped_line.UnwrappedLine) Lines we want to format. + lines: (list of logical_line.LogicalLine) Lines we want to format. Yields: Either a single line, if the current line cannot be merged with the @@ -554,38 +727,37 @@ def _SingleOrMergedLines(uwlines): """ index = 0 last_was_merged = False - while index < len(uwlines): - if uwlines[index].disable: - uwline = uwlines[index] + while index < len(lines): + if lines[index].disable: + line = lines[index] index += 1 - while index < len(uwlines): - column = uwline.last.column + 2 - if uwlines[index].lineno != uwline.lineno: + while index < len(lines): + column = line.last.column + 2 + if lines[index].lineno != line.lineno: break - if uwline.last.value != ':': + if line.last.value != ':': leaf = pytree.Leaf( - type=token.SEMI, value=';', context=('', (uwline.lineno, column))) - uwline.AppendToken(format_token.FormatToken(leaf)) - for tok in uwlines[index].tokens: - uwline.AppendToken(tok) + type=token.SEMI, value=';', context=('', (line.lineno, column))) + line.AppendToken(format_token.FormatToken(leaf)) + for tok in lines[index].tokens: + line.AppendToken(tok) index += 1 - yield uwline - elif line_joiner.CanMergeMultipleLines(uwlines[index:], last_was_merged): + yield line + elif line_joiner.CanMergeMultipleLines(lines[index:], last_was_merged): # TODO(morbo): This splice is potentially very slow. Come up with a more # performance-friendly way of determining if two lines can be merged. - next_uwline = uwlines[index + 1] - for tok in next_uwline.tokens: - uwlines[index].AppendToken(tok) - if (len(next_uwline.tokens) == 1 and - next_uwline.first.is_multiline_string): + next_line = lines[index + 1] + for tok in next_line.tokens: + lines[index].AppendToken(tok) + if (len(next_line.tokens) == 1 and next_line.first.is_multiline_string): # This may be a multiline shebang. In that case, we want to retain the # formatting. Otherwise, it could mess up the shell script's syntax. - uwlines[index].disable = True - yield uwlines[index] + lines[index].disable = True + yield lines[index] index += 2 last_was_merged = True else: - yield uwlines[index] + yield lines[index] index += 1 last_was_merged = False @@ -602,9 +774,8 @@ def _NoBlankLinesBeforeCurrentToken(text, cur_token, prev_token): Arguments: text: (unicode) The text of the docstring or comment before the current token. - cur_token: (format_token.FormatToken) The current token in the unwrapped - line. - prev_token: (format_token.FormatToken) The previous token in the unwrapped + cur_token: (format_token.FormatToken) The current token in the logical line. + prev_token: (format_token.FormatToken) The previous token in the logical line. Returns: diff --git a/yapf/yapflib/split_penalty.py b/yapf/yapflib/split_penalty.py index 416eda3..643ae24 100644 --- a/yapf/yapflib/split_penalty.py +++ b/yapf/yapflib/split_penalty.py @@ -16,21 +16,24 @@ import re from lib2to3 import pytree +from lib2to3.pgen2 import token as grammar_token from yapf.yapflib import format_token from yapf.yapflib import py3compat from yapf.yapflib import pytree_utils from yapf.yapflib import pytree_visitor from yapf.yapflib import style +from yapf.yapflib import subtypes # TODO(morbo): Document the annotations in a centralized place. E.g., the # README file. UNBREAKABLE = 1000 * 1000 -NAMED_ASSIGN = 11000 +NAMED_ASSIGN = 15000 DOTTED_NAME = 4000 VERY_STRONGLY_CONNECTED = 3500 STRONGLY_CONNECTED = 3000 CONNECTED = 500 +TOGETHER = 100 OR_TEST = 1000 AND_TEST = 1100 @@ -46,7 +49,8 @@ TERM = 2000 FACTOR = 2100 POWER = 2200 ATOM = 2300 -ONE_ELEMENT_ARGUMENT = 2500 +ONE_ELEMENT_ARGUMENT = 500 +SUBSCRIPT = 6000 def ComputeSplitPenalties(tree): @@ -64,6 +68,10 @@ class _SplitPenaltyAssigner(pytree_visitor.PyTreeVisitor): Split penalties are attached as annotations to tokens. """ + def Visit(self, node): + if not hasattr(node, 'is_pseudo'): # Ignore pseudo tokens. + super(_SplitPenaltyAssigner, self).Visit(node) + def Visit_import_as_names(self, node): # pyline: disable=invalid-name # import_as_names ::= import_as_name (',' import_as_name)* [','] self.DefaultNodeVisit(node) @@ -117,15 +125,15 @@ class _SplitPenaltyAssigner(pytree_visitor.PyTreeVisitor): allow_multiline_lambdas = style.Get('ALLOW_MULTILINE_LAMBDAS') if not allow_multiline_lambdas: for child in node.children: - if pytree_utils.NodeName(child) == 'COMMENT': + if child.type == grammar_token.COMMENT: if re.search(r'pylint:.*disable=.*\bg-long-lambda', child.value): allow_multiline_lambdas = True break if allow_multiline_lambdas: - _SetStronglyConnected(node) + _SetExpressionPenalty(node, STRONGLY_CONNECTED) else: - self._SetUnbreakableOnChildren(node) + _SetExpressionPenalty(node, VERY_STRONGLY_CONNECTED) def Visit_parameters(self, node): # pylint: disable=invalid-name # parameters ::= '(' [typedargslist] ')' @@ -133,18 +141,25 @@ class _SplitPenaltyAssigner(pytree_visitor.PyTreeVisitor): # Can't break before the opening paren of a parameter list. _SetUnbreakable(node.children[0]) - if not style.Get('DEDENT_CLOSING_BRACKETS'): + if not (style.Get('INDENT_CLOSING_BRACKETS') or + style.Get('DEDENT_CLOSING_BRACKETS')): _SetStronglyConnected(node.children[-1]) def Visit_arglist(self, node): # pylint: disable=invalid-name # arglist ::= argument (',' argument)* [','] + if node.children[0].type == grammar_token.STAR: + # Python 3 treats a star expression as a specific expression type. + # Process it in that method. + self.Visit_star_expr(node) + return + self.DefaultNodeVisit(node) - index = 1 - while index < len(node.children): + + for index in py3compat.range(1, len(node.children)): child = node.children[index] if isinstance(child, pytree.Leaf) and child.value == ',': _SetUnbreakable(child) - index += 1 + for child in node.children: if pytree_utils.NodeName(child) == 'atom': _IncreasePenalty(child, CONNECTED) @@ -152,32 +167,34 @@ class _SplitPenaltyAssigner(pytree_visitor.PyTreeVisitor): def Visit_argument(self, node): # pylint: disable=invalid-name # argument ::= test [comp_for] | test '=' test # Really [keyword '='] test self.DefaultNodeVisit(node) - index = 1 - while index < len(node.children) - 1: + + for index in py3compat.range(1, len(node.children) - 1): child = node.children[index] if isinstance(child, pytree.Leaf) and child.value == '=': _SetSplitPenalty( pytree_utils.FirstLeafNode(node.children[index]), NAMED_ASSIGN) _SetSplitPenalty( pytree_utils.FirstLeafNode(node.children[index + 1]), NAMED_ASSIGN) - index += 1 def Visit_tname(self, node): # pylint: disable=invalid-name # tname ::= NAME [':' test] self.DefaultNodeVisit(node) - index = 1 - while index < len(node.children) - 1: + + for index in py3compat.range(1, len(node.children) - 1): child = node.children[index] if isinstance(child, pytree.Leaf) and child.value == ':': _SetSplitPenalty( pytree_utils.FirstLeafNode(node.children[index]), NAMED_ASSIGN) _SetSplitPenalty( pytree_utils.FirstLeafNode(node.children[index + 1]), NAMED_ASSIGN) - index += 1 def Visit_dotted_name(self, node): # pylint: disable=invalid-name # dotted_name ::= NAME ('.' NAME)* - self._SetUnbreakableOnChildren(node) + for child in node.children: + self.Visit(child) + start = 2 if hasattr(node.children[0], 'is_pseudo') else 1 + for i in py3compat.range(start, len(node.children)): + _SetUnbreakable(node.children[i]) def Visit_dictsetmaker(self, node): # pylint: disable=invalid-name # dictsetmaker ::= ( (test ':' test @@ -185,7 +202,7 @@ class _SplitPenaltyAssigner(pytree_visitor.PyTreeVisitor): # (test (comp_for | (',' test)* [','])) ) for child in node.children: self.Visit(child) - if pytree_utils.NodeName(child) == 'COLON': + if child.type == grammar_token.COLON: # This is a key to a dictionary. We don't want to split the key if at # all possible. _SetStronglyConnected(child) @@ -193,8 +210,11 @@ class _SplitPenaltyAssigner(pytree_visitor.PyTreeVisitor): def Visit_trailer(self, node): # pylint: disable=invalid-name # trailer ::= '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME if node.children[0].value == '.': - self._SetUnbreakableOnChildren(node) - _SetSplitPenalty(node.children[1], DOTTED_NAME) + before = style.Get('SPLIT_BEFORE_DOT') + _SetSplitPenalty(node.children[0], + VERY_STRONGLY_CONNECTED if before else DOTTED_NAME) + _SetSplitPenalty(node.children[1], + DOTTED_NAME if before else VERY_STRONGLY_CONNECTED) elif len(node.children) == 2: # Don't split an empty argument list if at all possible. _SetSplitPenalty(node.children[1], VERY_STRONGLY_CONNECTED) @@ -211,7 +231,7 @@ class _SplitPenaltyAssigner(pytree_visitor.PyTreeVisitor): _SetSplitPenalty( pytree_utils.FirstLeafNode(node.children[1]), ONE_ELEMENT_ARGUMENT) - elif (pytree_utils.NodeName(node.children[0]) == 'LSQB' and + elif (node.children[0].type == grammar_token.LSQB and len(node.children[1].children) > 2 and (name.endswith('_test') or name.endswith('_expr'))): _SetStronglyConnected(node.children[1].children[0]) @@ -230,13 +250,23 @@ class _SplitPenaltyAssigner(pytree_visitor.PyTreeVisitor): pytree_utils.FirstLeafNode(node.children[1].children[2]), 0) # Don't split the ending bracket of a subscript list. - _SetVeryStronglyConnected(node.children[-1]) + _RecAnnotate(node.children[-1], pytree_utils.Annotation.SPLIT_PENALTY, + VERY_STRONGLY_CONNECTED) elif name not in { 'arglist', 'argument', 'term', 'or_test', 'and_test', 'comparison', 'atom', 'power' }: # Don't split an argument list with one element if at all possible. - _SetStronglyConnected(node.children[1], node.children[2]) + stypes = pytree_utils.GetNodeAnnotation( + pytree_utils.FirstLeafNode(node), pytree_utils.Annotation.SUBTYPE) + if stypes and subtypes.SUBSCRIPT_BRACKET in stypes: + _IncreasePenalty(node, SUBSCRIPT) + + # Bump up the split penalty for the first part of a subscript. We + # would rather not split there. + _IncreasePenalty(node.children[1], CONNECTED) + else: + _SetStronglyConnected(node.children[1], node.children[2]) if name == 'arglist': _SetStronglyConnected(node.children[-1]) @@ -253,7 +283,9 @@ class _SplitPenaltyAssigner(pytree_visitor.PyTreeVisitor): pytree_utils.NodeName(node.children[1]) == 'trailer'): # children[1] itself is a whole trailer: we don't want to # mark all of it as unbreakable, only its first token: (, [ or . - _SetUnbreakable(node.children[1].children[0]) + first = pytree_utils.FirstLeafNode(node.children[1]) + if first.value != '.': + _SetUnbreakable(node.children[1].children[0]) # A special case when there are more trailers in the sequence. Given: # atom tr1 tr2 @@ -265,20 +297,20 @@ class _SplitPenaltyAssigner(pytree_visitor.PyTreeVisitor): while prev_trailer_idx < len(node.children) - 1: cur_trailer_idx = prev_trailer_idx + 1 cur_trailer = node.children[cur_trailer_idx] - if pytree_utils.NodeName(cur_trailer) == 'trailer': - # Now we know we have two trailers one after the other - prev_trailer = node.children[prev_trailer_idx] - if prev_trailer.children[-1].value != ')': - # Set the previous node unbreakable if it's not a function call: - # atom tr1() tr2 - # It may be necessary (though undesirable) to split up a previous - # function call's parentheses to the next line. - _SetStronglyConnected(prev_trailer.children[-1]) - _SetStronglyConnected(cur_trailer.children[0]) - prev_trailer_idx = cur_trailer_idx - else: + if pytree_utils.NodeName(cur_trailer) != 'trailer': break + # Now we know we have two trailers one after the other + prev_trailer = node.children[prev_trailer_idx] + if prev_trailer.children[-1].value != ')': + # Set the previous node unbreakable if it's not a function call: + # atom tr1() tr2 + # It may be necessary (though undesirable) to split up a previous + # function call's parentheses to the next line. + _SetStronglyConnected(prev_trailer.children[-1]) + _SetStronglyConnected(cur_trailer.children[0]) + prev_trailer_idx = cur_trailer_idx + # We don't want to split before the last ')' of a function call. This also # takes care of the special case of: # atom tr1 tr2 ... trn @@ -288,16 +320,17 @@ class _SplitPenaltyAssigner(pytree_visitor.PyTreeVisitor): break if trailer.children[0].value in '([': if len(trailer.children) > 2: - subtypes = pytree_utils.GetNodeAnnotation( + stypes = pytree_utils.GetNodeAnnotation( trailer.children[0], pytree_utils.Annotation.SUBTYPE) - if subtypes and format_token.Subtype.SUBSCRIPT_BRACKET in subtypes: + if stypes and subtypes.SUBSCRIPT_BRACKET in stypes: _SetStronglyConnected( pytree_utils.FirstLeafNode(trailer.children[1])) last_child_node = pytree_utils.LastLeafNode(trailer) if last_child_node.value.strip().startswith('#'): last_child_node = last_child_node.prev_sibling - if not style.Get('DEDENT_CLOSING_BRACKETS'): + if not (style.Get('INDENT_CLOSING_BRACKETS') or + style.Get('DEDENT_CLOSING_BRACKETS')): last = pytree_utils.LastLeafNode(last_child_node.prev_sibling) if last.value != ',': if last_child_node.value == ']': @@ -310,9 +343,15 @@ class _SplitPenaltyAssigner(pytree_visitor.PyTreeVisitor): # split the two. _SetStronglyConnected(trailer.children[-1]) - # If the original source has a "builder" style calls, then we should allow - # the reformatter to retain that. - _AllowBuilderStyleCalls(node) + def Visit_subscriptlist(self, node): # pylint: disable=invalid-name + # subscriptlist ::= subscript (',' subscript)* [','] + self.DefaultNodeVisit(node) + _SetSplitPenalty(pytree_utils.FirstLeafNode(node), 0) + prev_child = None + for child in node.children: + if prev_child and prev_child.type == grammar_token.COMMA: + _SetSplitPenalty(pytree_utils.FirstLeafNode(child), 0) + prev_child = child def Visit_subscript(self, node): # pylint: disable=invalid-name # subscript ::= test | [test] ':' [test] [sliceop] @@ -325,6 +364,10 @@ class _SplitPenaltyAssigner(pytree_visitor.PyTreeVisitor): _SetStronglyConnected(*node.children[1:]) self.DefaultNodeVisit(node) + def Visit_old_comp_for(self, node): # pylint: disable=invalid-name + # Python 3.7 + self.Visit_comp_for(node) + def Visit_comp_if(self, node): # pylint: disable=invalid-name # comp_if ::= 'if' old_test [comp_iter] _SetSplitPenalty(node.children[0], @@ -332,6 +375,15 @@ class _SplitPenaltyAssigner(pytree_visitor.PyTreeVisitor): _SetStronglyConnected(*node.children[1:]) self.DefaultNodeVisit(node) + def Visit_old_comp_if(self, node): # pylint: disable=invalid-name + # Python 3.7 + self.Visit_comp_if(node) + + def Visit_test(self, node): # pylint: disable=invalid-name + # test ::= or_test ['if' or_test 'else' test] | lambdef + _IncreasePenalty(node, OR_TEST) + self.DefaultNodeVisit(node) + def Visit_or_test(self, node): # pylint: disable=invalid-name # or_test ::= and_test ('or' and_test)* self.DefaultNodeVisit(node) @@ -369,8 +421,7 @@ class _SplitPenaltyAssigner(pytree_visitor.PyTreeVisitor): # comparison ::= expr (comp_op expr)* self.DefaultNodeVisit(node) if len(node.children) == 3 and _StronglyConnectedCompOp(node): - _SetSplitPenalty( - pytree_utils.FirstLeafNode(node.children[1]), STRONGLY_CONNECTED) + _IncreasePenalty(node.children[1], VERY_STRONGLY_CONNECTED) _SetSplitPenalty( pytree_utils.FirstLeafNode(node.children[2]), STRONGLY_CONNECTED) else: @@ -385,27 +436,19 @@ class _SplitPenaltyAssigner(pytree_visitor.PyTreeVisitor): # expr ::= xor_expr ('|' xor_expr)* self.DefaultNodeVisit(node) _IncreasePenalty(node, EXPR) - index = 1 - while index < len(node.children) - 1: - child = node.children[index] - if isinstance(child, pytree.Leaf) and child.value == '|': - if style.Get('SPLIT_BEFORE_BITWISE_OPERATOR'): - _SetSplitPenalty(child, style.Get('SPLIT_PENALTY_BITWISE_OPERATOR')) - else: - _SetSplitPenalty( - pytree_utils.FirstLeafNode(node.children[index + 1]), - style.Get('SPLIT_PENALTY_BITWISE_OPERATOR')) - index += 1 + _SetBitwiseOperandPenalty(node, '|') def Visit_xor_expr(self, node): # pylint: disable=invalid-name # xor_expr ::= and_expr ('^' and_expr)* self.DefaultNodeVisit(node) _IncreasePenalty(node, XOR_EXPR) + _SetBitwiseOperandPenalty(node, '^') def Visit_and_expr(self, node): # pylint: disable=invalid-name # and_expr ::= shift_expr ('&' shift_expr)* self.DefaultNodeVisit(node) _IncreasePenalty(node, AND_EXPR) + _SetBitwiseOperandPenalty(node, '&') def Visit_shift_expr(self, node): # pylint: disable=invalid-name # shift_expr ::= arith_expr (('<<'|'>>') arith_expr)* @@ -418,14 +461,7 @@ class _SplitPenaltyAssigner(pytree_visitor.PyTreeVisitor): # arith_expr ::= term (('+'|'-') term)* self.DefaultNodeVisit(node) _IncreasePenalty(node, ARITH_EXPR) - - index = 1 - while index < len(node.children) - 1: - child = node.children[index] - if pytree_utils.NodeName(child) in self._ARITH_OPS: - next_node = pytree_utils.FirstLeafNode(node.children[index + 1]) - _SetSplitPenalty(next_node, ARITH_EXPR) - index += 1 + _SetExpressionOperandPenalty(node, self._ARITH_OPS) _TERM_OPS = frozenset({'STAR', 'AT', 'SLASH', 'PERCENT', 'DOUBLESLASH'}) @@ -433,14 +469,7 @@ class _SplitPenaltyAssigner(pytree_visitor.PyTreeVisitor): # term ::= factor (('*'|'@'|'/'|'%'|'//') factor)* self.DefaultNodeVisit(node) _IncreasePenalty(node, TERM) - - index = 1 - while index < len(node.children) - 1: - child = node.children[index] - if pytree_utils.NodeName(child) in self._TERM_OPS: - next_node = pytree_utils.FirstLeafNode(node.children[index + 1]) - _SetSplitPenalty(next_node, TERM) - index += 1 + _SetExpressionOperandPenalty(node, self._TERM_OPS) def Visit_factor(self, node): # pyline: disable=invalid-name # factor ::= ('+'|'-'|'~') factor | power @@ -452,7 +481,8 @@ class _SplitPenaltyAssigner(pytree_visitor.PyTreeVisitor): # '[' [listmaker] ']' | # '{' [dictsetmaker] '}') self.DefaultNodeVisit(node) - if node.children[0].value == '(': + if (node.children[0].value == '(' and + not hasattr(node.children[0], 'is_pseudo')): if node.children[-1].value == ')': if pytree_utils.NodeName(node.parent) == 'if_stmt': _SetSplitPenalty(node.children[-1], STRONGLY_CONNECTED) @@ -473,20 +503,9 @@ class _SplitPenaltyAssigner(pytree_visitor.PyTreeVisitor): prev_was_comma = True else: if prev_was_comma: - _SetSplitPenalty(pytree_utils.FirstLeafNode(child), 0) + _SetSplitPenalty(pytree_utils.FirstLeafNode(child), TOGETHER) prev_was_comma = False - ############################################################################ - # Helper methods that set the annotations. - - def _SetUnbreakableOnChildren(self, node): - """Set an UNBREAKABLE penalty annotation on children of node.""" - for child in node.children: - self.Visit(child) - start = 2 if hasattr(node.children[0], 'is_pseudo') else 1 - for i in py3compat.range(start, len(node.children)): - _SetUnbreakable(node.children[i]) - def _SetUnbreakable(node): """Set an UNBREAKABLE penalty annotation for the given node.""" @@ -500,13 +519,6 @@ def _SetStronglyConnected(*nodes): STRONGLY_CONNECTED) -def _SetVeryStronglyConnected(*nodes): - """Set a VERY_STRONGLY_CONNECTED penalty annotation for the given nodes.""" - for node in nodes: - _RecAnnotate(node, pytree_utils.Annotation.SPLIT_PENALTY, - VERY_STRONGLY_CONNECTED) - - def _SetExpressionPenalty(node, penalty): """Set a penalty annotation on children nodes.""" @@ -528,6 +540,30 @@ def _SetExpressionPenalty(node, penalty): RecExpression(node, pytree_utils.FirstLeafNode(node)) +def _SetBitwiseOperandPenalty(node, op): + for index in py3compat.range(1, len(node.children) - 1): + child = node.children[index] + if isinstance(child, pytree.Leaf) and child.value == op: + if style.Get('SPLIT_BEFORE_BITWISE_OPERATOR'): + _SetSplitPenalty(child, style.Get('SPLIT_PENALTY_BITWISE_OPERATOR')) + else: + _SetSplitPenalty( + pytree_utils.FirstLeafNode(node.children[index + 1]), + style.Get('SPLIT_PENALTY_BITWISE_OPERATOR')) + + +def _SetExpressionOperandPenalty(node, ops): + for index in py3compat.range(1, len(node.children) - 1): + child = node.children[index] + if pytree_utils.NodeName(child) in ops: + if style.Get('SPLIT_BEFORE_ARITHMETIC_OPERATOR'): + _SetSplitPenalty(child, style.Get('SPLIT_PENALTY_ARITHMETIC_OPERATOR')) + else: + _SetSplitPenalty( + pytree_utils.FirstLeafNode(node.children[index + 1]), + style.Get('SPLIT_PENALTY_ARITHMETIC_OPERATOR')) + + def _IncreasePenalty(node, amt): """Increase a penalty annotation on children nodes.""" @@ -536,7 +572,7 @@ def _IncreasePenalty(node, amt): return if isinstance(node, pytree.Leaf): - if node.value in {'(', 'for', 'if'}: + if node.value in {'(', 'for'}: return penalty = pytree_utils.GetNodeAnnotation( node, pytree_utils.Annotation.SPLIT_PENALTY, default=0) @@ -570,10 +606,13 @@ def _RecAnnotate(tree, annotate_name, annotate_value): def _StronglyConnectedCompOp(op): if (len(op.children[1].children) == 2 and - pytree_utils.NodeName(op.children[1]) == 'comp_op' and - pytree_utils.FirstLeafNode(op.children[1]).value == 'not' and - pytree_utils.LastLeafNode(op.children[1]).value == 'in'): - return True + pytree_utils.NodeName(op.children[1]) == 'comp_op'): + if (pytree_utils.FirstLeafNode(op.children[1]).value == 'not' and + pytree_utils.LastLeafNode(op.children[1]).value == 'in'): + return True + if (pytree_utils.FirstLeafNode(op.children[1]).value == 'is' and + pytree_utils.LastLeafNode(op.children[1]).value == 'not'): + return True if (isinstance(op.children[1], pytree.Leaf) and op.children[1].value in {'==', 'in'}): return True @@ -590,23 +629,3 @@ def _DecrementSplitPenalty(node, amt): def _SetSplitPenalty(node, penalty): pytree_utils.SetNodeAnnotation(node, pytree_utils.Annotation.SPLIT_PENALTY, penalty) - - -def _AllowBuilderStyleCalls(node): - """Allow splitting before '.' if it's a builder style function call.""" - - def RecGetLeaves(node): - if isinstance(node, pytree.Leaf): - return [node] - children = [] - for child in node.children: - children += RecGetLeaves(child) - return children - - list_of_children = RecGetLeaves(node) - prev_child = None - for child in list_of_children: - if child.value == '.': - if prev_child.lineno != child.lineno: - _SetSplitPenalty(child, 0) - prev_child = child diff --git a/yapf/yapflib/style.py b/yapf/yapflib/style.py index 6144246..233a64e 100644 --- a/yapf/yapflib/style.py +++ b/yapf/yapflib/style.py @@ -31,6 +31,11 @@ def Get(setting_name): return _style[setting_name] +def GetOrDefault(setting_name, default_value): + """Get a style setting or default value if the setting does not exist.""" + return _style.get(setting_name, default_value) + + def Help(): """Return dict mapping style names to help strings.""" return _STYLE_HELP @@ -59,8 +64,31 @@ _STYLE_HELP = dict( 'this is the second element of a tuple'): value, }"""), + ALLOW_SPLIT_BEFORE_DEFAULT_OR_NAMED_ASSIGNS=textwrap.dedent("""\ + Allow splitting before a default / named assignment in an argument list. + """), ALLOW_SPLIT_BEFORE_DICT_VALUE=textwrap.dedent("""\ Allow splits before the dictionary value."""), + ARITHMETIC_PRECEDENCE_INDICATION=textwrap.dedent("""\ + Let spacing indicate operator precedence. For example: + + a = 1 * 2 + 3 / 4 + b = 1 / 2 - 3 * 4 + c = (1 + 2) * (3 - 4) + d = (1 - 2) / (3 + 4) + e = 1 * 2 - 3 + f = 1 + 2 + 3 + 4 + + will be formatted as follows to indicate precedence: + + a = 1*2 + 3/4 + b = 1/2 - 3*4 + c = (1+2) * (3-4) + d = (1-2) / (3+4) + e = 1*2 - 3 + f = 1 + 2 + 3 + 4 + + """), BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF=textwrap.dedent("""\ Insert a blank line before a 'def' or 'class' immediately nested within another 'def' or 'class'. For example: @@ -76,6 +104,9 @@ _STYLE_HELP = dict( BLANK_LINES_AROUND_TOP_LEVEL_DEFINITION=textwrap.dedent("""\ Number of blank lines surrounding top-level function and class definitions."""), + BLANK_LINES_BETWEEN_TOP_LEVEL_IMPORTS_AND_VARIABLES=textwrap.dedent("""\ + Number of blank lines between top-level imports and variable + definitions."""), COALESCE_BRACKETS=textwrap.dedent("""\ Do not split consecutive brackets. Only relevant when dedent_closing_brackets is set. For example: @@ -100,16 +131,11 @@ _STYLE_HELP = dict( - SPACE: Use spaces for continuation alignment. This is default behavior. - FIXED: Use fixed number (CONTINUATION_INDENT_WIDTH) of columns - (ie: CONTINUATION_INDENT_WIDTH/INDENT_WIDTH tabs) for continuation - alignment. - - LESS: Slightly left if cannot vertically align continuation lines with - indent characters. - - VALIGN-RIGHT: Vertically align continuation lines with indent - characters. Slightly right (one more indent character) if cannot - vertically align continuation lines with indent characters. - - For options FIXED, and VALIGN-RIGHT are only available when USE_TABS is - enabled."""), + (ie: CONTINUATION_INDENT_WIDTH/INDENT_WIDTH tabs or + CONTINUATION_INDENT_WIDTH spaces) for continuation alignment. + - VALIGN-RIGHT: Vertically align continuation lines to multiple of + INDENT_WIDTH columns. Slightly right (one tab or a few spaces) if + cannot vertically align continuation lines with indent characters."""), CONTINUATION_INDENT_WIDTH=textwrap.dedent("""\ Indent width used for line continuations."""), DEDENT_CLOSING_BRACKETS=textwrap.dedent("""\ @@ -128,12 +154,20 @@ _STYLE_HELP = dict( transform=Transformation.AVERAGE(window=timedelta(seconds=60)), start_ts=now()-timedelta(days=3), end_ts=now(), - ) # <--- this bracket is dedented and on a separate line"""), + ) # <--- this bracket is dedented and on a separate line + """), DISABLE_ENDING_COMMA_HEURISTIC=textwrap.dedent("""\ Disable the heuristic which places each list element on a separate line if the list is comma-terminated."""), EACH_DICT_ENTRY_ON_SEPARATE_LINE=textwrap.dedent("""\ Place each dictionary entry onto its own line."""), + FORCE_MULTILINE_DICT=textwrap.dedent("""\ + Require multiline dictionary even if it would normally fit on one line. + For example: + + config = { + 'key1': 'value1' + }"""), I18N_COMMENT=textwrap.dedent("""\ The regex for an i18n comment. The presence of this comment stops reformatting of that line, because the comments are required to be @@ -142,6 +176,24 @@ _STYLE_HELP = dict( The i18n function call names. The presence of this function stops reformattting on that line, because the string it has cannot be moved away from the i18n comment."""), + INDENT_CLOSING_BRACKETS=textwrap.dedent("""\ + Put closing brackets on a separate line, indented, if the bracketed + expression can't fit in a single line. Applies to all kinds of brackets, + including function definitions and calls. For example: + + config = { + 'key1': 'value1', + 'key2': 'value2', + } # <--- this bracket is indented and on a separate line + + time_series = self.remote_client.query_entity_counters( + entity='dev3246.region1', + key='dns.query_latency_tcp', + transform=Transformation.AVERAGE(window=timedelta(seconds=60)), + start_ts=now()-timedelta(days=3), + end_ts=now(), + ) # <--- this bracket is indented and on a separate line + """), INDENT_DICTIONARY_VALUE=textwrap.dedent("""\ Indent the dictionary value if it cannot fit on the same line as the dictionary key. For example: @@ -151,9 +203,12 @@ _STYLE_HELP = dict( 'value1', 'key2': value1 + value2, - }"""), + } + """), INDENT_WIDTH=textwrap.dedent("""\ The number of columns to use for indentation."""), + INDENT_BLANK_LINES=textwrap.dedent("""\ + Indent blank lines."""), JOIN_MULTIPLE_LINES=textwrap.dedent("""\ Join short lines into one line. E.g., single line 'if' statements."""), NO_SPACES_AROUND_SELECTED_BINARY_OPERATORS=textwrap.dedent("""\ @@ -161,25 +216,112 @@ _STYLE_HELP = dict( 1 + 2 * 3 - 4 / 5 - will be formatted as follows when configured with *,/: + will be formatted as follows when configured with "*,/": 1 + 2*3 - 4/5 - """), SPACE_BETWEEN_ENDING_COMMA_AND_CLOSING_BRACKET=textwrap.dedent("""\ Insert a space between the ending comma and closing bracket of a list, etc."""), + SPACE_INSIDE_BRACKETS=textwrap.dedent("""\ + Use spaces inside brackets, braces, and parentheses. For example: + + method_call( 1 ) + my_dict[ 3 ][ 1 ][ get_index( *args, **kwargs ) ] + my_set = { 1, 2, 3 } + """), SPACES_AROUND_POWER_OPERATOR=textwrap.dedent("""\ Use spaces around the power operator."""), SPACES_AROUND_DEFAULT_OR_NAMED_ASSIGN=textwrap.dedent("""\ Use spaces around default or named assigns."""), + SPACES_AROUND_DICT_DELIMITERS=textwrap.dedent("""\ + Adds a space after the opening '{' and before the ending '}' dict + delimiters. + + {1: 2} + + will be formatted as: + + { 1: 2 } + """), + SPACES_AROUND_LIST_DELIMITERS=textwrap.dedent("""\ + Adds a space after the opening '[' and before the ending ']' list + delimiters. + + [1, 2] + + will be formatted as: + + [ 1, 2 ] + """), + SPACES_AROUND_SUBSCRIPT_COLON=textwrap.dedent("""\ + Use spaces around the subscript / slice operator. For example: + + my_list[1 : 10 : 2] + """), + SPACES_AROUND_TUPLE_DELIMITERS=textwrap.dedent("""\ + Adds a space after the opening '(' and before the ending ')' tuple + delimiters. + + (1, 2, 3) + + will be formatted as: + + ( 1, 2, 3 ) + """), SPACES_BEFORE_COMMENT=textwrap.dedent("""\ - The number of spaces required before a trailing comment."""), + The number of spaces required before a trailing comment. + This can be a single value (representing the number of spaces + before each trailing comment) or list of values (representing + alignment column values; trailing comments within a block will + be aligned to the first column value that is greater than the maximum + line length within the block). For example: + + With spaces_before_comment=5: + + 1 + 1 # Adding values + + will be formatted as: + + 1 + 1 # Adding values <-- 5 spaces between the end of the + # statement and comment + + With spaces_before_comment=15, 20: + + 1 + 1 # Adding values + two + two # More adding + + longer_statement # This is a longer statement + short # This is a shorter statement + + a_very_long_statement_that_extends_beyond_the_final_column # Comment + short # This is a shorter statement + + will be formatted as: + + 1 + 1 # Adding values <-- end of line comments in block + # aligned to col 15 + two + two # More adding + + longer_statement # This is a longer statement <-- end of line + # comments in block aligned to col 20 + short # This is a shorter statement + + a_very_long_statement_that_extends_beyond_the_final_column # Comment <-- the end of line comments are aligned based on the line length + short # This is a shorter statement + + """), # noqa SPLIT_ARGUMENTS_WHEN_COMMA_TERMINATED=textwrap.dedent("""\ Split before arguments if the argument list is terminated by a comma."""), SPLIT_ALL_COMMA_SEPARATED_VALUES=textwrap.dedent("""\ Split before arguments"""), + SPLIT_ALL_TOP_LEVEL_COMMA_SEPARATED_VALUES=textwrap.dedent("""\ + Split before arguments, but do not split all subexpressions recursively + (unless needed)."""), + SPLIT_BEFORE_ARITHMETIC_OPERATOR=textwrap.dedent("""\ + Set to True to prefer splitting before '+', '-', '*', '/', '//', or '@' + rather than after."""), SPLIT_BEFORE_BITWISE_OPERATOR=textwrap.dedent("""\ Set to True to prefer splitting before '&', '|' or '^' rather than after."""), @@ -194,6 +336,16 @@ _STYLE_HELP = dict( variable: 'Hello world, have a nice day!' for variable in bar if variable != 42 }"""), + SPLIT_BEFORE_DOT=textwrap.dedent("""\ + Split before the '.' if we need to split a longer expression: + + foo = ('This is a really long string: {}, {}, {}, {}'.format(a, b, c, d)) + + would reformat to something like: + + foo = ('This is a really long string: {}, {}, {}, {}' + .format(a, b, c, d)) + """), # noqa SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN=textwrap.dedent("""\ Split after the opening paren which surrounds an expression if it doesn't fit on a single line. @@ -226,6 +378,9 @@ _STYLE_HELP = dict( The penalty for splitting right after the opening bracket."""), SPLIT_PENALTY_AFTER_UNARY_OPERATOR=textwrap.dedent("""\ The penalty for splitting the line after a unary operator."""), + SPLIT_PENALTY_ARITHMETIC_OPERATOR=textwrap.dedent("""\ + The penalty of splitting the line around the '+', '-', '*', '/', '//', + ``%``, and '@' operators."""), SPLIT_PENALTY_BEFORE_IF_EXPR=textwrap.dedent("""\ The penalty for splitting right before an if expression."""), SPLIT_PENALTY_BITWISE_OPERATOR=textwrap.dedent("""\ @@ -237,7 +392,7 @@ _STYLE_HELP = dict( SPLIT_PENALTY_EXCESS_CHARACTER=textwrap.dedent("""\ The penalty for characters over the column limit."""), SPLIT_PENALTY_FOR_ADDED_LINE_SPLIT=textwrap.dedent("""\ - The penalty incurred by adding a line split to the unwrapped line. The + The penalty incurred by adding a line split to the logical line. The more line splits added the higher the penalty."""), SPLIT_PENALTY_IMPORT_NAMES=textwrap.dedent("""\ The penalty of splitting a list of "import as" names. For example: @@ -250,7 +405,7 @@ _STYLE_HELP = dict( from a_very_long_or_indented_module_name_yada_yad import ( long_argument_1, long_argument_2, long_argument_3) - """), + """), # noqa SPLIT_PENALTY_LOGICAL_OPERATOR=textwrap.dedent("""\ The penalty of splitting the line around the 'and' and 'or' operators."""), @@ -261,48 +416,64 @@ _STYLE_HELP = dict( def CreatePEP8Style(): + """Create the PEP8 formatting style.""" return dict( ALIGN_CLOSING_BRACKET_WITH_VISUAL_INDENT=True, ALLOW_MULTILINE_LAMBDAS=False, ALLOW_MULTILINE_DICTIONARY_KEYS=False, + ALLOW_SPLIT_BEFORE_DEFAULT_OR_NAMED_ASSIGNS=True, ALLOW_SPLIT_BEFORE_DICT_VALUE=True, - BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF=False, + ARITHMETIC_PRECEDENCE_INDICATION=False, + BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF=True, BLANK_LINE_BEFORE_CLASS_DOCSTRING=False, BLANK_LINE_BEFORE_MODULE_DOCSTRING=False, BLANK_LINES_AROUND_TOP_LEVEL_DEFINITION=2, + BLANK_LINES_BETWEEN_TOP_LEVEL_IMPORTS_AND_VARIABLES=1, COALESCE_BRACKETS=False, COLUMN_LIMIT=79, CONTINUATION_ALIGN_STYLE='SPACE', CONTINUATION_INDENT_WIDTH=4, DEDENT_CLOSING_BRACKETS=False, + INDENT_CLOSING_BRACKETS=False, DISABLE_ENDING_COMMA_HEURISTIC=False, EACH_DICT_ENTRY_ON_SEPARATE_LINE=True, + FORCE_MULTILINE_DICT=False, I18N_COMMENT='', I18N_FUNCTION_CALL='', INDENT_DICTIONARY_VALUE=False, INDENT_WIDTH=4, + INDENT_BLANK_LINES=False, JOIN_MULTIPLE_LINES=True, + NO_SPACES_AROUND_SELECTED_BINARY_OPERATORS=set(), SPACE_BETWEEN_ENDING_COMMA_AND_CLOSING_BRACKET=True, + SPACE_INSIDE_BRACKETS=False, SPACES_AROUND_POWER_OPERATOR=False, - NO_SPACES_AROUND_SELECTED_BINARY_OPERATORS=set(), SPACES_AROUND_DEFAULT_OR_NAMED_ASSIGN=False, + SPACES_AROUND_DICT_DELIMITERS=False, + SPACES_AROUND_LIST_DELIMITERS=False, + SPACES_AROUND_SUBSCRIPT_COLON=False, + SPACES_AROUND_TUPLE_DELIMITERS=False, SPACES_BEFORE_COMMENT=2, SPLIT_ARGUMENTS_WHEN_COMMA_TERMINATED=False, SPLIT_ALL_COMMA_SEPARATED_VALUES=False, + SPLIT_ALL_TOP_LEVEL_COMMA_SEPARATED_VALUES=False, + SPLIT_BEFORE_ARITHMETIC_OPERATOR=False, SPLIT_BEFORE_BITWISE_OPERATOR=True, SPLIT_BEFORE_CLOSING_BRACKET=True, SPLIT_BEFORE_DICT_SET_GENERATOR=True, + SPLIT_BEFORE_DOT=False, SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN=False, SPLIT_BEFORE_FIRST_ARGUMENT=False, SPLIT_BEFORE_LOGICAL_OPERATOR=True, SPLIT_BEFORE_NAMED_ASSIGNS=True, SPLIT_COMPLEX_COMPREHENSION=False, - SPLIT_PENALTY_AFTER_OPENING_BRACKET=30, + SPLIT_PENALTY_AFTER_OPENING_BRACKET=300, SPLIT_PENALTY_AFTER_UNARY_OPERATOR=10000, + SPLIT_PENALTY_ARITHMETIC_OPERATOR=300, SPLIT_PENALTY_BEFORE_IF_EXPR=0, SPLIT_PENALTY_BITWISE_OPERATOR=300, SPLIT_PENALTY_COMPREHENSION=80, - SPLIT_PENALTY_EXCESS_CHARACTER=4500, + SPLIT_PENALTY_EXCESS_CHARACTER=7000, SPLIT_PENALTY_FOR_ADDED_LINE_SPLIT=30, SPLIT_PENALTY_IMPORT_NAMES=0, SPLIT_PENALTY_LOGICAL_OPERATOR=300, @@ -311,13 +482,15 @@ def CreatePEP8Style(): def CreateGoogleStyle(): + """Create the Google formatting style.""" style = CreatePEP8Style() style['ALIGN_CLOSING_BRACKET_WITH_VISUAL_INDENT'] = False - style['BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF'] = True style['COLUMN_LIMIT'] = 80 + style['INDENT_DICTIONARY_VALUE'] = True style['INDENT_WIDTH'] = 4 style['I18N_COMMENT'] = r'#\..*' style['I18N_FUNCTION_CALL'] = ['N_', '_'] + style['JOIN_MULTIPLE_LINES'] = False style['SPACE_BETWEEN_ENDING_COMMA_AND_CLOSING_BRACKET'] = False style['SPLIT_BEFORE_BITWISE_OPERATOR'] = False style['SPLIT_BEFORE_DICT_SET_GENERATOR'] = False @@ -327,22 +500,26 @@ def CreateGoogleStyle(): return style -def CreateChromiumStyle(): +def CreateYapfStyle(): + """Create the YAPF formatting style.""" style = CreateGoogleStyle() style['ALLOW_MULTILINE_DICTIONARY_KEYS'] = True - style['INDENT_DICTIONARY_VALUE'] = True + style['ALLOW_SPLIT_BEFORE_DEFAULT_OR_NAMED_ASSIGNS'] = False style['INDENT_WIDTH'] = 2 - style['JOIN_MULTIPLE_LINES'] = False style['SPLIT_BEFORE_BITWISE_OPERATOR'] = True + style['SPLIT_BEFORE_DOT'] = True style['SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN'] = True return style def CreateFacebookStyle(): + """Create the Facebook formatting style.""" style = CreatePEP8Style() style['ALIGN_CLOSING_BRACKET_WITH_VISUAL_INDENT'] = False + style['BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF'] = False style['COLUMN_LIMIT'] = 80 style['DEDENT_CLOSING_BRACKETS'] = True + style['INDENT_CLOSING_BRACKETS'] = False style['INDENT_DICTIONARY_VALUE'] = True style['JOIN_MULTIPLE_LINES'] = False style['SPACES_BEFORE_COMMENT'] = 2 @@ -356,16 +533,16 @@ def CreateFacebookStyle(): _STYLE_NAME_TO_FACTORY = dict( pep8=CreatePEP8Style, - chromium=CreateChromiumStyle, google=CreateGoogleStyle, facebook=CreateFacebookStyle, + yapf=CreateYapfStyle, ) _DEFAULT_STYLE_TO_FACTORY = [ - (CreateChromiumStyle(), CreateChromiumStyle), (CreateFacebookStyle(), CreateFacebookStyle), (CreateGoogleStyle(), CreateGoogleStyle), (CreatePEP8Style(), CreatePEP8Style), + (CreateYapfStyle(), CreateYapfStyle), ] @@ -380,7 +557,7 @@ def _ContinuationAlignStyleStringConverter(s): """Option value converter for a continuation align style string.""" accepted_styles = ('SPACE', 'FIXED', 'VALIGN-RIGHT') if s: - r = s.upper() + r = s.strip('"\'').replace('_', '-').upper() if r not in accepted_styles: raise ValueError('unknown continuation align style: %r' % (s,)) else: @@ -395,7 +572,9 @@ def _StringListConverter(s): def _StringSetConverter(s): """Option value converter for a comma-separated set of strings.""" - return set(part.strip() for part in s.split(',')) + if len(s) > 2 and s[0] in '"\'': + s = s[1:-1] + return {part.strip() for part in s.split(',')} def _BoolConverter(s): @@ -403,6 +582,22 @@ def _BoolConverter(s): return py3compat.CONFIGPARSER_BOOLEAN_STATES[s.lower()] +def _IntListConverter(s): + """Option value converter for a comma-separated list of integers.""" + s = s.strip() + if s.startswith('[') and s.endswith(']'): + s = s[1:-1] + + return [int(part.strip()) for part in s.split(',') if part.strip()] + + +def _IntOrIntListConverter(s): + """Option value converter for an integer or list of integers.""" + if len(s) > 2 and s[0] in '"\'': + s = s[1:-1] + return _IntListConverter(s) if ',' in s else int(s) + + # Different style options need to have their values interpreted differently when # read from the config file. This dict maps an option name to a "converter" # function that accepts the string read for the option's value from the file and @@ -414,33 +609,47 @@ _STYLE_OPTION_VALUE_CONVERTER = dict( ALIGN_CLOSING_BRACKET_WITH_VISUAL_INDENT=_BoolConverter, ALLOW_MULTILINE_LAMBDAS=_BoolConverter, ALLOW_MULTILINE_DICTIONARY_KEYS=_BoolConverter, + ALLOW_SPLIT_BEFORE_DEFAULT_OR_NAMED_ASSIGNS=_BoolConverter, ALLOW_SPLIT_BEFORE_DICT_VALUE=_BoolConverter, + ARITHMETIC_PRECEDENCE_INDICATION=_BoolConverter, BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF=_BoolConverter, BLANK_LINE_BEFORE_CLASS_DOCSTRING=_BoolConverter, BLANK_LINE_BEFORE_MODULE_DOCSTRING=_BoolConverter, BLANK_LINES_AROUND_TOP_LEVEL_DEFINITION=int, + BLANK_LINES_BETWEEN_TOP_LEVEL_IMPORTS_AND_VARIABLES=int, COALESCE_BRACKETS=_BoolConverter, COLUMN_LIMIT=int, CONTINUATION_ALIGN_STYLE=_ContinuationAlignStyleStringConverter, CONTINUATION_INDENT_WIDTH=int, DEDENT_CLOSING_BRACKETS=_BoolConverter, + INDENT_CLOSING_BRACKETS=_BoolConverter, DISABLE_ENDING_COMMA_HEURISTIC=_BoolConverter, EACH_DICT_ENTRY_ON_SEPARATE_LINE=_BoolConverter, + FORCE_MULTILINE_DICT=_BoolConverter, I18N_COMMENT=str, I18N_FUNCTION_CALL=_StringListConverter, INDENT_DICTIONARY_VALUE=_BoolConverter, INDENT_WIDTH=int, + INDENT_BLANK_LINES=_BoolConverter, JOIN_MULTIPLE_LINES=_BoolConverter, NO_SPACES_AROUND_SELECTED_BINARY_OPERATORS=_StringSetConverter, SPACE_BETWEEN_ENDING_COMMA_AND_CLOSING_BRACKET=_BoolConverter, + SPACE_INSIDE_BRACKETS=_BoolConverter, SPACES_AROUND_POWER_OPERATOR=_BoolConverter, SPACES_AROUND_DEFAULT_OR_NAMED_ASSIGN=_BoolConverter, - SPACES_BEFORE_COMMENT=int, + SPACES_AROUND_DICT_DELIMITERS=_BoolConverter, + SPACES_AROUND_LIST_DELIMITERS=_BoolConverter, + SPACES_AROUND_SUBSCRIPT_COLON=_BoolConverter, + SPACES_AROUND_TUPLE_DELIMITERS=_BoolConverter, + SPACES_BEFORE_COMMENT=_IntOrIntListConverter, SPLIT_ARGUMENTS_WHEN_COMMA_TERMINATED=_BoolConverter, SPLIT_ALL_COMMA_SEPARATED_VALUES=_BoolConverter, + SPLIT_ALL_TOP_LEVEL_COMMA_SEPARATED_VALUES=_BoolConverter, + SPLIT_BEFORE_ARITHMETIC_OPERATOR=_BoolConverter, SPLIT_BEFORE_BITWISE_OPERATOR=_BoolConverter, SPLIT_BEFORE_CLOSING_BRACKET=_BoolConverter, SPLIT_BEFORE_DICT_SET_GENERATOR=_BoolConverter, + SPLIT_BEFORE_DOT=_BoolConverter, SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN=_BoolConverter, SPLIT_BEFORE_FIRST_ARGUMENT=_BoolConverter, SPLIT_BEFORE_LOGICAL_OPERATOR=_BoolConverter, @@ -448,6 +657,7 @@ _STYLE_OPTION_VALUE_CONVERTER = dict( SPLIT_COMPLEX_COMPREHENSION=_BoolConverter, SPLIT_PENALTY_AFTER_OPENING_BRACKET=int, SPLIT_PENALTY_AFTER_UNARY_OPERATOR=int, + SPLIT_PENALTY_ARITHMETIC_OPERATOR=int, SPLIT_PENALTY_BEFORE_IF_EXPR=int, SPLIT_PENALTY_BITWISE_OPERATOR=int, SPLIT_PENALTY_COMPREHENSION=int, @@ -489,6 +699,7 @@ def CreateStyleFromConfig(style_config): if not def_style: return _style return _GLOBAL_STYLE_FACTORY() + if isinstance(style_config, dict): config = _CreateConfigParserFromConfigDict(style_config) elif isinstance(style_config, py3compat.basestring): @@ -519,8 +730,12 @@ def _CreateConfigParserFromConfigString(config_string): "Invalid style dict syntax: '{}'.".format(config_string)) config = py3compat.ConfigParser() config.add_section('style') - for key, value in re.findall(r'([a-zA-Z0-9_]+)\s*[:=]\s*([a-zA-Z0-9_]+)', - config_string): + for key, value, _ in re.findall( + r'([a-zA-Z0-9_]+)\s*[:=]\s*' + r'(?:' + r'((?P<quote>[\'"]).*?(?P=quote)|' + r'[a-zA-Z0-9_]+)' + r')', config_string): # yapf: disable config.set('style', key, value) return config @@ -533,19 +748,40 @@ def _CreateConfigParserFromConfigFile(config_filename): '"{0}" is not a valid style or file path'.format(config_filename)) with open(config_filename) as style_file: config = py3compat.ConfigParser() + if config_filename.endswith(PYPROJECT_TOML): + try: + import toml + except ImportError: + raise errors.YapfError( + "toml package is needed for using pyproject.toml as a " + "configuration file") + + pyproject_toml = toml.load(style_file) + style_dict = pyproject_toml.get("tool", {}).get("yapf", None) + if style_dict is None: + raise StyleConfigError( + 'Unable to find section [tool.yapf] in {0}'.format(config_filename)) + config.add_section('style') + for k, v in style_dict.items(): + config.set('style', k, str(v)) + return config + config.read_file(style_file) if config_filename.endswith(SETUP_CONFIG): if not config.has_section('yapf'): raise StyleConfigError( 'Unable to find section [yapf] in {0}'.format(config_filename)) - elif config_filename.endswith(LOCAL_STYLE): - if not config.has_section('style'): - raise StyleConfigError( - 'Unable to find section [style] in {0}'.format(config_filename)) - else: + return config + + if config_filename.endswith(LOCAL_STYLE): if not config.has_section('style'): raise StyleConfigError( 'Unable to find section [style] in {0}'.format(config_filename)) + return config + + if not config.has_section('style'): + raise StyleConfigError( + 'Unable to find section [style] in {0}'.format(config_filename)) return config @@ -608,6 +844,10 @@ LOCAL_STYLE = '.style.yapf' # specified in the '[yapf]' section. SETUP_CONFIG = 'setup.cfg' +# Style definition by local pyproject.toml file. Style should be specified +# in the '[tool.yapf]' section. +PYPROJECT_TOML = 'pyproject.toml' + # TODO(eliben): For now we're preserving the global presence of a style dict. # Refactor this so that the style is passed around through yapf rather than # being global. diff --git a/yapf/yapflib/subtype_assigner.py b/yapf/yapflib/subtype_assigner.py index 8bf3d8d..7b45586 100644 --- a/yapf/yapflib/subtype_assigner.py +++ b/yapf/yapflib/subtype_assigner.py @@ -11,28 +11,28 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Subtype assigner for lib2to3 trees. +"""Subtype assigner for format tokens. -This module assigns extra type information to the lib2to3 trees. This -information is more specific than whether something is an operator or an -identifier. For instance, it can specify if a node in the tree is part of a -subscript. +This module assigns extra type information to format tokens. This information is +more specific than whether something is an operator or an identifier. For +instance, it can specify if a node in the tree is part of a subscript. AssignSubtypes(): the main function exported by this module. Annotations: - subtype: The subtype of a pytree token. See 'format_token' module for a list - of subtypes. + subtype: The subtype of a pytree token. See 'subtypes' module for a list of + subtypes. """ from lib2to3 import pytree -from lib2to3.pgen2 import token +from lib2to3.pgen2 import token as grammar_token from lib2to3.pygram import python_symbols as syms from yapf.yapflib import format_token from yapf.yapflib import pytree_utils from yapf.yapflib import pytree_visitor from yapf.yapflib import style +from yapf.yapflib import subtypes def AssignSubtypes(tree): @@ -47,10 +47,10 @@ def AssignSubtypes(tree): # Map tokens in argument lists to their respective subtype. _ARGLIST_TOKEN_TO_SUBTYPE = { - '=': format_token.Subtype.DEFAULT_OR_NAMED_ASSIGN, - ':': format_token.Subtype.TYPED_NAME, - '*': format_token.Subtype.VARARGS_STAR, - '**': format_token.Subtype.KWARGS_STAR_STAR, + '=': subtypes.DEFAULT_OR_NAMED_ASSIGN, + ':': subtypes.TYPED_NAME, + '*': subtypes.VARARGS_STAR, + '**': subtypes.KWARGS_STAR_STAR, } @@ -73,35 +73,31 @@ class _SubtypeAssigner(pytree_visitor.PyTreeVisitor): for child in node.children: if pytree_utils.NodeName(child) == 'comp_for': comp_for = True - _AppendFirstLeafTokenSubtype(child, - format_token.Subtype.DICT_SET_GENERATOR) - elif pytree_utils.NodeName(child) in ('COLON', 'DOUBLESTAR'): + _AppendFirstLeafTokenSubtype(child, subtypes.DICT_SET_GENERATOR) + elif child.type in (grammar_token.COLON, grammar_token.DOUBLESTAR): dict_maker = True if not comp_for and dict_maker: last_was_colon = False unpacking = False for child in node.children: - if pytree_utils.NodeName(child) == 'DOUBLESTAR': - _AppendFirstLeafTokenSubtype(child, - format_token.Subtype.KWARGS_STAR_STAR) + if child.type == grammar_token.DOUBLESTAR: + _AppendFirstLeafTokenSubtype(child, subtypes.KWARGS_STAR_STAR) if last_was_colon: if style.Get('INDENT_DICTIONARY_VALUE'): _InsertPseudoParentheses(child) else: - _AppendFirstLeafTokenSubtype(child, - format_token.Subtype.DICTIONARY_VALUE) + _AppendFirstLeafTokenSubtype(child, subtypes.DICTIONARY_VALUE) elif (isinstance(child, pytree.Node) or (not child.value.startswith('#') and child.value not in '{:,')): # Mark the first leaf of a key entry as a DICTIONARY_KEY. We # normally want to split before them if the dictionary cannot exist # on a single line. if not unpacking or pytree_utils.FirstLeafNode(child).value == '**': - _AppendFirstLeafTokenSubtype(child, - format_token.Subtype.DICTIONARY_KEY) - _AppendSubtypeRec(child, format_token.Subtype.DICTIONARY_KEY_PART) - last_was_colon = pytree_utils.NodeName(child) == 'COLON' - if pytree_utils.NodeName(child) == 'DOUBLESTAR': + _AppendFirstLeafTokenSubtype(child, subtypes.DICTIONARY_KEY) + _AppendSubtypeRec(child, subtypes.DICTIONARY_KEY_PART) + last_was_colon = child.type == grammar_token.COLON + if child.type == grammar_token.DOUBLESTAR: unpacking = True elif last_was_colon: unpacking = False @@ -112,28 +108,28 @@ class _SubtypeAssigner(pytree_visitor.PyTreeVisitor): for child in node.children: self.Visit(child) if isinstance(child, pytree.Leaf) and child.value == '=': - _AppendTokenSubtype(child, format_token.Subtype.ASSIGN_OPERATOR) + _AppendTokenSubtype(child, subtypes.ASSIGN_OPERATOR) def Visit_or_test(self, node): # pylint: disable=invalid-name # or_test ::= and_test ('or' and_test)* for child in node.children: self.Visit(child) if isinstance(child, pytree.Leaf) and child.value == 'or': - _AppendTokenSubtype(child, format_token.Subtype.BINARY_OPERATOR) + _AppendTokenSubtype(child, subtypes.BINARY_OPERATOR) def Visit_and_test(self, node): # pylint: disable=invalid-name # and_test ::= not_test ('and' not_test)* for child in node.children: self.Visit(child) if isinstance(child, pytree.Leaf) and child.value == 'and': - _AppendTokenSubtype(child, format_token.Subtype.BINARY_OPERATOR) + _AppendTokenSubtype(child, subtypes.BINARY_OPERATOR) def Visit_not_test(self, node): # pylint: disable=invalid-name # not_test ::= 'not' not_test | comparison for child in node.children: self.Visit(child) if isinstance(child, pytree.Leaf) and child.value == 'not': - _AppendTokenSubtype(child, format_token.Subtype.UNARY_OPERATOR) + _AppendTokenSubtype(child, subtypes.UNARY_OPERATOR) def Visit_comparison(self, node): # pylint: disable=invalid-name # comparison ::= expr (comp_op expr)* @@ -142,95 +138,104 @@ class _SubtypeAssigner(pytree_visitor.PyTreeVisitor): self.Visit(child) if (isinstance(child, pytree.Leaf) and child.value in {'<', '>', '==', '>=', '<=', '<>', '!=', 'in', 'is'}): - _AppendTokenSubtype(child, format_token.Subtype.BINARY_OPERATOR) + _AppendTokenSubtype(child, subtypes.BINARY_OPERATOR) elif pytree_utils.NodeName(child) == 'comp_op': for grandchild in child.children: - _AppendTokenSubtype(grandchild, format_token.Subtype.BINARY_OPERATOR) + _AppendTokenSubtype(grandchild, subtypes.BINARY_OPERATOR) def Visit_star_expr(self, node): # pylint: disable=invalid-name # star_expr ::= '*' expr for child in node.children: self.Visit(child) if isinstance(child, pytree.Leaf) and child.value == '*': - _AppendTokenSubtype(child, format_token.Subtype.UNARY_OPERATOR) - _AppendTokenSubtype(child, format_token.Subtype.VARARGS_STAR) + _AppendTokenSubtype(child, subtypes.UNARY_OPERATOR) + _AppendTokenSubtype(child, subtypes.VARARGS_STAR) def Visit_expr(self, node): # pylint: disable=invalid-name # expr ::= xor_expr ('|' xor_expr)* for child in node.children: self.Visit(child) if isinstance(child, pytree.Leaf) and child.value == '|': - _AppendTokenSubtype(child, format_token.Subtype.BINARY_OPERATOR) + _AppendTokenSubtype(child, subtypes.BINARY_OPERATOR) def Visit_xor_expr(self, node): # pylint: disable=invalid-name # xor_expr ::= and_expr ('^' and_expr)* for child in node.children: self.Visit(child) if isinstance(child, pytree.Leaf) and child.value == '^': - _AppendTokenSubtype(child, format_token.Subtype.BINARY_OPERATOR) + _AppendTokenSubtype(child, subtypes.BINARY_OPERATOR) def Visit_and_expr(self, node): # pylint: disable=invalid-name # and_expr ::= shift_expr ('&' shift_expr)* for child in node.children: self.Visit(child) if isinstance(child, pytree.Leaf) and child.value == '&': - _AppendTokenSubtype(child, format_token.Subtype.BINARY_OPERATOR) + _AppendTokenSubtype(child, subtypes.BINARY_OPERATOR) def Visit_shift_expr(self, node): # pylint: disable=invalid-name # shift_expr ::= arith_expr (('<<'|'>>') arith_expr)* for child in node.children: self.Visit(child) if isinstance(child, pytree.Leaf) and child.value in {'<<', '>>'}: - _AppendTokenSubtype(child, format_token.Subtype.BINARY_OPERATOR) + _AppendTokenSubtype(child, subtypes.BINARY_OPERATOR) def Visit_arith_expr(self, node): # pylint: disable=invalid-name # arith_expr ::= term (('+'|'-') term)* for child in node.children: self.Visit(child) - if isinstance(child, pytree.Leaf) and child.value in '+-': - _AppendTokenSubtype(child, format_token.Subtype.BINARY_OPERATOR) + if _IsAExprOperator(child): + _AppendTokenSubtype(child, subtypes.BINARY_OPERATOR) + + if _IsSimpleExpression(node): + for child in node.children: + if _IsAExprOperator(child): + _AppendTokenSubtype(child, subtypes.SIMPLE_EXPRESSION) def Visit_term(self, node): # pylint: disable=invalid-name - # term ::= factor (('*'|'/'|'%'|'//') factor)* + # term ::= factor (('*'|'/'|'%'|'//'|'@') factor)* for child in node.children: self.Visit(child) - if (isinstance(child, pytree.Leaf) and - child.value in {'*', '/', '%', '//'}): - _AppendTokenSubtype(child, format_token.Subtype.BINARY_OPERATOR) + if _IsMExprOperator(child): + _AppendTokenSubtype(child, subtypes.BINARY_OPERATOR) + + if _IsSimpleExpression(node): + for child in node.children: + if _IsMExprOperator(child): + _AppendTokenSubtype(child, subtypes.SIMPLE_EXPRESSION) def Visit_factor(self, node): # pylint: disable=invalid-name # factor ::= ('+'|'-'|'~') factor | power for child in node.children: self.Visit(child) if isinstance(child, pytree.Leaf) and child.value in '+-~': - _AppendTokenSubtype(child, format_token.Subtype.UNARY_OPERATOR) + _AppendTokenSubtype(child, subtypes.UNARY_OPERATOR) def Visit_power(self, node): # pylint: disable=invalid-name # power ::= atom trailer* ['**' factor] for child in node.children: self.Visit(child) if isinstance(child, pytree.Leaf) and child.value == '**': - _AppendTokenSubtype(child, format_token.Subtype.BINARY_OPERATOR) + _AppendTokenSubtype(child, subtypes.BINARY_OPERATOR) def Visit_trailer(self, node): # pylint: disable=invalid-name for child in node.children: self.Visit(child) if isinstance(child, pytree.Leaf) and child.value in '[]': - _AppendTokenSubtype(child, format_token.Subtype.SUBSCRIPT_BRACKET) + _AppendTokenSubtype(child, subtypes.SUBSCRIPT_BRACKET) def Visit_subscript(self, node): # pylint: disable=invalid-name # subscript ::= test | [test] ':' [test] [sliceop] for child in node.children: self.Visit(child) if isinstance(child, pytree.Leaf) and child.value == ':': - _AppendTokenSubtype(child, format_token.Subtype.SUBSCRIPT_COLON) + _AppendTokenSubtype(child, subtypes.SUBSCRIPT_COLON) def Visit_sliceop(self, node): # pylint: disable=invalid-name # sliceop ::= ':' [test] for child in node.children: self.Visit(child) if isinstance(child, pytree.Leaf) and child.value == ':': - _AppendTokenSubtype(child, format_token.Subtype.SUBSCRIPT_COLON) + _AppendTokenSubtype(child, subtypes.SUBSCRIPT_COLON) def Visit_argument(self, node): # pylint: disable=invalid-name # argument ::= @@ -243,32 +248,39 @@ class _SubtypeAssigner(pytree_visitor.PyTreeVisitor): # | '*' test (',' argument)* [',' '**' test] # | '**' test) self._ProcessArgLists(node) - _SetArgListSubtype(node, format_token.Subtype.DEFAULT_OR_NAMED_ASSIGN, - format_token.Subtype.DEFAULT_OR_NAMED_ASSIGN_ARG_LIST) + _SetArgListSubtype(node, subtypes.DEFAULT_OR_NAMED_ASSIGN, + subtypes.DEFAULT_OR_NAMED_ASSIGN_ARG_LIST) def Visit_tname(self, node): # pylint: disable=invalid-name self._ProcessArgLists(node) - _SetArgListSubtype(node, format_token.Subtype.DEFAULT_OR_NAMED_ASSIGN, - format_token.Subtype.DEFAULT_OR_NAMED_ASSIGN_ARG_LIST) + _SetArgListSubtype(node, subtypes.DEFAULT_OR_NAMED_ASSIGN, + subtypes.DEFAULT_OR_NAMED_ASSIGN_ARG_LIST) def Visit_decorator(self, node): # pylint: disable=invalid-name # decorator ::= # '@' dotted_name [ '(' [arglist] ')' ] NEWLINE for child in node.children: if isinstance(child, pytree.Leaf) and child.value == '@': - _AppendTokenSubtype(child, subtype=format_token.Subtype.DECORATOR) + _AppendTokenSubtype(child, subtype=subtypes.DECORATOR) self.Visit(child) def Visit_funcdef(self, node): # pylint: disable=invalid-name # funcdef ::= # 'def' NAME parameters ['->' test] ':' suite for child in node.children: - if pytree_utils.NodeName(child) == 'NAME' and child.value != 'def': - _AppendTokenSubtype(child, format_token.Subtype.FUNC_DEF) + if child.type == grammar_token.NAME and child.value != 'def': + _AppendTokenSubtype(child, subtypes.FUNC_DEF) break for child in node.children: self.Visit(child) + def Visit_parameters(self, node): # pylint: disable=invalid-name + # parameters ::= '(' [typedargslist] ')' + self._ProcessArgLists(node) + if len(node.children) > 2: + _AppendFirstLeafTokenSubtype(node.children[1], subtypes.PARAMETER_START) + _AppendLastLeafTokenSubtype(node.children[-2], subtypes.PARAMETER_STOP) + def Visit_typedargslist(self, node): # pylint: disable=invalid-name # typedargslist ::= # ((tfpdef ['=' test] ',')* @@ -276,20 +288,32 @@ class _SubtypeAssigner(pytree_visitor.PyTreeVisitor): # | '**' tname) # | tfpdef ['=' test] (',' tfpdef ['=' test])* [',']) self._ProcessArgLists(node) - _SetArgListSubtype(node, format_token.Subtype.DEFAULT_OR_NAMED_ASSIGN, - format_token.Subtype.DEFAULT_OR_NAMED_ASSIGN_ARG_LIST) + _SetArgListSubtype(node, subtypes.DEFAULT_OR_NAMED_ASSIGN, + subtypes.DEFAULT_OR_NAMED_ASSIGN_ARG_LIST) tname = False - for child in node.children: + if not node.children: + return + + _AppendFirstLeafTokenSubtype(node.children[0], subtypes.PARAMETER_START) + _AppendLastLeafTokenSubtype(node.children[-1], subtypes.PARAMETER_STOP) + + tname = pytree_utils.NodeName(node.children[0]) == 'tname' + for i in range(1, len(node.children)): + prev_child = node.children[i - 1] + child = node.children[i] + if prev_child.type == grammar_token.COMMA: + _AppendFirstLeafTokenSubtype(child, subtypes.PARAMETER_START) + elif child.type == grammar_token.COMMA: + _AppendLastLeafTokenSubtype(prev_child, subtypes.PARAMETER_STOP) + if pytree_utils.NodeName(child) == 'tname': tname = True - _SetArgListSubtype(child, format_token.Subtype.TYPED_NAME, - format_token.Subtype.TYPED_NAME_ARG_LIST) - if not isinstance(child, pytree.Leaf): - continue - if child.value == ',': + _SetArgListSubtype(child, subtypes.TYPED_NAME, + subtypes.TYPED_NAME_ARG_LIST) + elif child.type == grammar_token.COMMA: tname = False - elif child.value == '=' and tname: - _AppendTokenSubtype(child, subtype=format_token.Subtype.TYPED_NAME) + elif child.type == grammar_token.EQUAL and tname: + _AppendTokenSubtype(child, subtype=subtypes.TYPED_NAME) tname = False def Visit_varargslist(self, node): # pylint: disable=invalid-name @@ -302,24 +326,32 @@ class _SubtypeAssigner(pytree_visitor.PyTreeVisitor): for child in node.children: self.Visit(child) if isinstance(child, pytree.Leaf) and child.value == '=': - _AppendTokenSubtype(child, format_token.Subtype.VARARGS_LIST) + _AppendTokenSubtype(child, subtypes.VARARGS_LIST) def Visit_comp_for(self, node): # pylint: disable=invalid-name # comp_for ::= 'for' exprlist 'in' testlist_safe [comp_iter] - _AppendSubtypeRec(node, format_token.Subtype.COMP_FOR) + _AppendSubtypeRec(node, subtypes.COMP_FOR) # Mark the previous node as COMP_EXPR unless this is a nested comprehension # as these will have the outer comprehension as their previous node. attr = pytree_utils.GetNodeAnnotation(node.parent, pytree_utils.Annotation.SUBTYPE) - if not attr or format_token.Subtype.COMP_FOR not in attr: - _AppendSubtypeRec(node.parent.children[0], format_token.Subtype.COMP_EXPR) + if not attr or subtypes.COMP_FOR not in attr: + _AppendSubtypeRec(node.parent.children[0], subtypes.COMP_EXPR) self.DefaultNodeVisit(node) + def Visit_old_comp_for(self, node): # pylint: disable=invalid-name + # Python 3.7 + self.Visit_comp_for(node) + def Visit_comp_if(self, node): # pylint: disable=invalid-name # comp_if ::= 'if' old_test [comp_iter] - _AppendSubtypeRec(node, format_token.Subtype.COMP_IF) + _AppendSubtypeRec(node, subtypes.COMP_IF) self.DefaultNodeVisit(node) + def Visit_old_comp_if(self, node): # pylint: disable=invalid-name + # Python 3.7 + self.Visit_comp_if(node) + def _ProcessArgLists(self, node): """Common method for processing argument lists.""" for child in node.children: @@ -327,8 +359,7 @@ class _SubtypeAssigner(pytree_visitor.PyTreeVisitor): if isinstance(child, pytree.Leaf): _AppendTokenSubtype( child, - subtype=_ARGLIST_TOKEN_TO_SUBTYPE.get(child.value, - format_token.Subtype.NONE)) + subtype=_ARGLIST_TOKEN_TO_SUBTYPE.get(child.value, subtypes.NONE)) def _SetArgListSubtype(node, node_subtype, list_subtype): @@ -337,20 +368,24 @@ def _SetArgListSubtype(node, node_subtype, list_subtype): def HasSubtype(node): """Return True if the arg list has a named assign subtype.""" if isinstance(node, pytree.Leaf): - if node_subtype in pytree_utils.GetNodeAnnotation( - node, pytree_utils.Annotation.SUBTYPE, set()): - return True - return False - has_subtype = False - for child in node.children: - if pytree_utils.NodeName(child) != 'arglist': - has_subtype |= HasSubtype(child) - return has_subtype + return node_subtype in pytree_utils.GetNodeAnnotation( + node, pytree_utils.Annotation.SUBTYPE, set()) - if HasSubtype(node): for child in node.children: - if pytree_utils.NodeName(child) != 'COMMA': - _AppendFirstLeafTokenSubtype(child, list_subtype) + node_name = pytree_utils.NodeName(child) + if node_name not in {'atom', 'arglist', 'power'}: + if HasSubtype(child): + return True + + return False + + if not HasSubtype(node): + return + + for child in node.children: + node_name = pytree_utils.NodeName(child) + if node_name not in {'atom', 'COMMA'}: + _AppendFirstLeafTokenSubtype(child, list_subtype) def _AppendTokenSubtype(node, subtype): @@ -367,6 +402,14 @@ def _AppendFirstLeafTokenSubtype(node, subtype): _AppendFirstLeafTokenSubtype(node.children[0], subtype) +def _AppendLastLeafTokenSubtype(node, subtype): + """Append the last leaf token's subtypes.""" + if isinstance(node, pytree.Leaf): + _AppendTokenSubtype(node, subtype) + return + _AppendLastLeafTokenSubtype(node.children[-1], subtype) + + def _AppendSubtypeRec(node, subtype, force=True): """Append the leafs in the node to the given subtype.""" if isinstance(node, pytree.Leaf): @@ -380,18 +423,24 @@ def _InsertPseudoParentheses(node): """Insert pseudo parentheses so that dicts can be formatted correctly.""" comment_node = None if isinstance(node, pytree.Node): - if node.children[-1].type == token.COMMENT: + if node.children[-1].type == grammar_token.COMMENT: comment_node = node.children[-1].clone() node.children[-1].remove() first = pytree_utils.FirstLeafNode(node) last = pytree_utils.LastLeafNode(node) - if first == last and first.type == token.COMMENT: + if first == last and first.type == grammar_token.COMMENT: # A comment was inserted before the value, which is a pytree.Leaf. # Encompass the dictionary's value into an ATOM node. last = first.next_sibling - new_node = pytree.Node(syms.atom, [first.clone(), last.clone()]) + last_clone = last.clone() + new_node = pytree.Node(syms.atom, [first.clone(), last_clone]) + for orig_leaf, clone_leaf in zip(last.leaves(), last_clone.leaves()): + pytree_utils.CopyYapfAnnotations(orig_leaf, clone_leaf) + if hasattr(orig_leaf, 'is_pseudo'): + clone_leaf.is_pseudo = orig_leaf.is_pseudo + node.replace(new_node) node = new_node last.remove() @@ -400,17 +449,19 @@ def _InsertPseudoParentheses(node): last = pytree_utils.LastLeafNode(node) lparen = pytree.Leaf( - token.LPAR, u'(', context=('', (first.get_lineno(), first.column - 1))) + grammar_token.LPAR, + u'(', + context=('', (first.get_lineno(), first.column - 1))) last_lineno = last.get_lineno() - if last.type == token.STRING and '\n' in last.value: + if last.type == grammar_token.STRING and '\n' in last.value: last_lineno += last.value.count('\n') - if last.type == token.STRING and '\n' in last.value: + if last.type == grammar_token.STRING and '\n' in last.value: last_column = len(last.value.split('\n')[-1]) + 1 else: last_column = last.column + len(last.value) + 1 rparen = pytree.Leaf( - token.RPAR, u')', context=('', (last_lineno, last_column))) + grammar_token.RPAR, u')', context=('', (last_lineno, last_column))) lparen.is_pseudo = True rparen.is_pseudo = True @@ -420,9 +471,25 @@ def _InsertPseudoParentheses(node): node.append_child(rparen) if comment_node: node.append_child(comment_node) - _AppendFirstLeafTokenSubtype(node, format_token.Subtype.DICTIONARY_VALUE) + _AppendFirstLeafTokenSubtype(node, subtypes.DICTIONARY_VALUE) else: clone = node.clone() + for orig_leaf, clone_leaf in zip(node.leaves(), clone.leaves()): + pytree_utils.CopyYapfAnnotations(orig_leaf, clone_leaf) new_node = pytree.Node(syms.atom, [lparen, clone, rparen]) node.replace(new_node) - _AppendFirstLeafTokenSubtype(clone, format_token.Subtype.DICTIONARY_VALUE) + _AppendFirstLeafTokenSubtype(clone, subtypes.DICTIONARY_VALUE) + + +def _IsAExprOperator(node): + return isinstance(node, pytree.Leaf) and node.value in {'+', '-'} + + +def _IsMExprOperator(node): + return isinstance(node, + pytree.Leaf) and node.value in {'*', '/', '%', '//', '@'} + + +def _IsSimpleExpression(node): + """A node with only leafs as children.""" + return all(isinstance(child, pytree.Leaf) for child in node.children) diff --git a/yapf/yapflib/subtypes.py b/yapf/yapflib/subtypes.py new file mode 100644 index 0000000..b4b7efe --- /dev/null +++ b/yapf/yapflib/subtypes.py @@ -0,0 +1,40 @@ +# Copyright 2021 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Token subtypes used to improve formatting.""" + +NONE = 0 +UNARY_OPERATOR = 1 +BINARY_OPERATOR = 2 +SUBSCRIPT_COLON = 3 +SUBSCRIPT_BRACKET = 4 +DEFAULT_OR_NAMED_ASSIGN = 5 +DEFAULT_OR_NAMED_ASSIGN_ARG_LIST = 6 +VARARGS_LIST = 7 +VARARGS_STAR = 8 +KWARGS_STAR_STAR = 9 +ASSIGN_OPERATOR = 10 +DICTIONARY_KEY = 11 +DICTIONARY_KEY_PART = 12 +DICTIONARY_VALUE = 13 +DICT_SET_GENERATOR = 14 +COMP_EXPR = 15 +COMP_FOR = 16 +COMP_IF = 17 +FUNC_DEF = 18 +DECORATOR = 19 +TYPED_NAME = 20 +TYPED_NAME_ARG_LIST = 21 +SIMPLE_EXPRESSION = 22 +PARAMETER_START = 23 +PARAMETER_STOP = 24 diff --git a/yapf/yapflib/yapf_api.py b/yapf/yapflib/yapf_api.py index 282dea3..09c31bc 100644 --- a/yapf/yapflib/yapf_api.py +++ b/yapf/yapflib/yapf_api.py @@ -37,10 +37,12 @@ import re import sys from lib2to3.pgen2 import parse +from lib2to3.pgen2 import tokenize from yapf.yapflib import blank_line_calculator from yapf.yapflib import comment_splicer from yapf.yapflib import continuation_splicer +from yapf.yapflib import errors from yapf.yapflib import file_resources from yapf.yapflib import identify_container from yapf.yapflib import py3compat @@ -63,9 +65,18 @@ def FormatFile(filename, Arguments: filename: (unicode) The file to reformat. + style_config: (string) Either a style name or a path to a file that contains + formatting style settings. If None is specified, use the default style + as set in style.DEFAULT_STYLE_FACTORY + lines: (list of tuples of integers) A list of tuples of lines, [start, end], + that we want to format. The lines are 1-based indexed. It can be used by + third-party code (e.g., IDEs) when reformatting a snippet of code rather + than a whole file. + print_diff: (bool) Instead of returning the reformatted source, return a + diff that turns the formatted source into reformatter source. + verify: (bool) True if reformatted code should be verified for syntax. in_place: (bool) If True, write the reformatted code back to the file. logger: (io streamer) A stream to output logging. - remaining arguments: see comment at the top of this module. Returns: Tuple of (reformatted_code, encoding, changed). reformatted_code is None if @@ -91,7 +102,7 @@ def FormatFile(filename, verify=verify) if reformatted_source.rstrip('\n'): lines = reformatted_source.rstrip('\n').split('\n') - reformatted_source = newline.join(line for line in lines) + newline + reformatted_source = newline.join(iter(lines)) + newline if in_place: if original_source and original_source != reformatted_source: file_resources.WriteReformattedCode(filename, reformatted_source, @@ -101,6 +112,45 @@ def FormatFile(filename, return reformatted_source, encoding, changed +def FormatTree(tree, style_config=None, lines=None, verify=False): + """Format a parsed lib2to3 pytree. + + This provides an alternative entry point to YAPF. + + Arguments: + tree: (pytree.Node) The root of the pytree to format. + style_config: (string) Either a style name or a path to a file that contains + formatting style settings. If None is specified, use the default style + as set in style.DEFAULT_STYLE_FACTORY + lines: (list of tuples of integers) A list of tuples of lines, [start, end], + that we want to format. The lines are 1-based indexed. It can be used by + third-party code (e.g., IDEs) when reformatting a snippet of code rather + than a whole file. + verify: (bool) True if reformatted code should be verified for syntax. + + Returns: + The source formatted according to the given formatting style. + """ + _CheckPythonVersion() + style.SetGlobalStyle(style.CreateStyleFromConfig(style_config)) + + # Run passes on the tree, modifying it in place. + comment_splicer.SpliceComments(tree) + continuation_splicer.SpliceContinuations(tree) + subtype_assigner.AssignSubtypes(tree) + identify_container.IdentifyContainers(tree) + split_penalty.ComputeSplitPenalties(tree) + blank_line_calculator.CalculateBlankLines(tree) + + llines = pytree_unwrapper.UnwrapPyTree(tree) + for lline in llines: + lline.CalculateFormattingInformation() + + lines = _LineRangesToSet(lines) + _MarkLinesToFormat(llines, lines) + return reformatter.Reformat(_SplitSemicolons(llines), verify, lines) + + def FormatCode(unformatted_source, filename='<unknown>', style_config=None, @@ -114,39 +164,29 @@ def FormatCode(unformatted_source, Arguments: unformatted_source: (unicode) The code to format. filename: (unicode) The name of the file being reformatted. - remaining arguments: see comment at the top of this module. + style_config: (string) Either a style name or a path to a file that contains + formatting style settings. If None is specified, use the default style + as set in style.DEFAULT_STYLE_FACTORY + lines: (list of tuples of integers) A list of tuples of lines, [start, end], + that we want to format. The lines are 1-based indexed. It can be used by + third-party code (e.g., IDEs) when reformatting a snippet of code rather + than a whole file. + print_diff: (bool) Instead of returning the reformatted source, return a + diff that turns the formatted source into reformatter source. + verify: (bool) True if reformatted code should be verified for syntax. Returns: Tuple of (reformatted_source, changed). reformatted_source conforms to the desired formatting style. changed is True if the source changed. """ - _CheckPythonVersion() - style.SetGlobalStyle(style.CreateStyleFromConfig(style_config)) - if not unformatted_source.endswith('\n'): - unformatted_source += '\n' - try: tree = pytree_utils.ParseCodeToTree(unformatted_source) - except parse.ParseError as e: - e.msg = filename + ': ' + e.msg - raise + except Exception as e: + e.filename = filename + raise errors.YapfError(errors.FormatErrorMsg(e)) - # Run passes on the tree, modifying it in place. - comment_splicer.SpliceComments(tree) - continuation_splicer.SpliceContinuations(tree) - subtype_assigner.AssignSubtypes(tree) - identify_container.IdentifyContainers(tree) - split_penalty.ComputeSplitPenalties(tree) - blank_line_calculator.CalculateBlankLines(tree) - - uwlines = pytree_unwrapper.UnwrapPyTree(tree) - for uwl in uwlines: - uwl.CalculateFormattingInformation() - - lines = _LineRangesToSet(lines) - _MarkLinesToFormat(uwlines, lines) - reformatted_source = reformatter.Reformat( - _SplitSemicolons(uwlines), verify, lines) + reformatted_source = FormatTree( + tree, style_config=style_config, lines=lines, verify=verify) if unformatted_source == reformatted_source: return '' if print_diff else reformatted_source, False @@ -155,7 +195,7 @@ def FormatCode(unformatted_source, unformatted_source, reformatted_source, filename=filename) if print_diff: - return code_diff, code_diff.strip() != '' # pylint: disable=g-explicit-bool-comparison + return code_diff, code_diff.strip() != '' # pylint: disable=g-explicit-bool-comparison # noqa return reformatted_source, True @@ -198,16 +238,24 @@ def ReadFile(filename, logger=None): line_ending = file_resources.LineEnding(lines) source = '\n'.join(line.rstrip('\r\n') for line in lines) + '\n' return source, line_ending, encoding - except IOError as err: # pragma: no cover + except IOError as e: # pragma: no cover + if logger: + logger(e) + e.args = (e.args[0], (filename, e.args[1][1], e.args[1][2], e.args[1][3])) + raise + except UnicodeDecodeError as e: # pragma: no cover if logger: - logger(err) + logger('Could not parse %s! Consider excluding this file with --exclude.', + filename) + logger(e) + e.args = (e.args[0], (filename, e.args[1][1], e.args[1][2], e.args[1][3])) raise -def _SplitSemicolons(uwlines): +def _SplitSemicolons(lines): res = [] - for uwline in uwlines: - res.extend(uwline.Split()) + for line in lines: + res.extend(line.Split()) return res @@ -228,25 +276,27 @@ def _LineRangesToSet(line_ranges): return line_set -def _MarkLinesToFormat(uwlines, lines): +def _MarkLinesToFormat(llines, lines): """Skip sections of code that we shouldn't reformat.""" if lines: - for uwline in uwlines: + for uwline in llines: uwline.disable = not lines.intersection( range(uwline.lineno, uwline.last.lineno + 1)) # Now go through the lines and disable any lines explicitly marked as # disabled. index = 0 - while index < len(uwlines): - uwline = uwlines[index] + while index < len(llines): + uwline = llines[index] if uwline.is_comment: if _DisableYAPF(uwline.first.value.strip()): index += 1 - while index < len(uwlines): - uwline = uwlines[index] - if uwline.is_comment and _EnableYAPF(uwline.first.value.strip()): - break + while index < len(llines): + uwline = llines[index] + line = uwline.first.value.strip() + if uwline.is_comment and _EnableYAPF(line): + if not _DisableYAPF(line): + break uwline.disable = True index += 1 elif re.search(DISABLE_PATTERN, uwline.last.value.strip(), re.IGNORECASE): |