diff options
Diffstat (limited to 'python/helpers/pydev/third_party/pep8/autopep8.py')
-rw-r--r-- | python/helpers/pydev/third_party/pep8/autopep8.py | 3687 |
1 files changed, 3687 insertions, 0 deletions
diff --git a/python/helpers/pydev/third_party/pep8/autopep8.py b/python/helpers/pydev/third_party/pep8/autopep8.py new file mode 100644 index 000000000000..224b5c645949 --- /dev/null +++ b/python/helpers/pydev/third_party/pep8/autopep8.py @@ -0,0 +1,3687 @@ +#!/usr/bin/env python +# +# Copyright (C) 2010-2011 Hideo Hattori +# Copyright (C) 2011-2013 Hideo Hattori, Steven Myint +# Copyright (C) 2013-2014 Hideo Hattori, Steven Myint, Bill Wendling +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""Automatically formats Python code to conform to the PEP 8 style guide. + +Fixes that only need be done once can be added by adding a function of the form +"fix_<code>(source)" to this module. They should return the fixed source code. +These fixes are picked up by apply_global_fixes(). + +Fixes that depend on pep8 should be added as methods to FixPEP8. See the class +documentation for more information. + +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import bisect +import codecs +import collections +import copy +import difflib +import fnmatch +import inspect +import io +import itertools +import keyword +import locale +import os +import re +import signal +import sys +import token +import tokenize + +import pep8 + + +try: + unicode +except NameError: + unicode = str + + +__version__ = '1.0.3' + + +CR = '\r' +LF = '\n' +CRLF = '\r\n' + + +PYTHON_SHEBANG_REGEX = re.compile(r'^#!.*\bpython[23]?\b\s*$') + + +# For generating line shortening candidates. +SHORTEN_OPERATOR_GROUPS = frozenset([ + frozenset([',']), + frozenset(['%']), + frozenset([',', '(', '[', '{']), + frozenset(['%', '(', '[', '{']), + frozenset([',', '(', '[', '{', '%', '+', '-', '*', '/', '//']), + frozenset(['%', '+', '-', '*', '/', '//']), +]) + + +DEFAULT_IGNORE = 'E24' +DEFAULT_INDENT_SIZE = 4 + + +# W602 is handled separately due to the need to avoid "with_traceback". +CODE_TO_2TO3 = { + 'E721': ['idioms'], + 'W601': ['has_key'], + 'W603': ['ne'], + 'W604': ['repr'], + 'W690': ['apply', + 'except', + 'exitfunc', + 'import', + 'numliterals', + 'operator', + 'paren', + 'reduce', + 'renames', + 'standarderror', + 'sys_exc', + 'throw', + 'tuple_params', + 'xreadlines']} + + +def check_lib2to3(): + try: + import lib2to3 + except ImportError: + sys.path.append(os.path.join(os.path.dirname(__file__), 'lib2to3')) + import lib2to3 + + +def open_with_encoding(filename, encoding=None, mode='r'): + """Return opened file with a specific encoding.""" + if not encoding: + encoding = detect_encoding(filename) + + return io.open(filename, mode=mode, encoding=encoding, + newline='') # Preserve line endings + + +def detect_encoding(filename): + """Return file encoding.""" + try: + with open(filename, 'rb') as input_file: + check_lib2to3() + from lib2to3.pgen2 import tokenize as lib2to3_tokenize + encoding = lib2to3_tokenize.detect_encoding(input_file.readline)[0] + + # Check for correctness of encoding + with open_with_encoding(filename, encoding) as test_file: + test_file.read() + + return encoding + except (LookupError, SyntaxError, UnicodeDecodeError): + return 'latin-1' + + +def readlines_from_file(filename): + """Return contents of file.""" + with open_with_encoding(filename) as input_file: + return input_file.readlines() + + +def extended_blank_lines(logical_line, + blank_lines, + indent_level, + previous_logical): + """Check for missing blank lines after class declaration.""" + if previous_logical.startswith('class '): + if ( + logical_line.startswith(('def ', 'class ', '@')) or + pep8.DOCSTRING_REGEX.match(logical_line) + ): + if indent_level and not blank_lines: + yield (0, 'E309 expected 1 blank line after class declaration') + elif previous_logical.startswith('def '): + if blank_lines and pep8.DOCSTRING_REGEX.match(logical_line): + yield (0, 'E303 too many blank lines ({0})'.format(blank_lines)) + elif pep8.DOCSTRING_REGEX.match(previous_logical): + # Missing blank line between class docstring and method declaration. + if ( + indent_level and + not blank_lines and + logical_line.startswith(('def ')) and + '(self' in logical_line + ): + yield (0, 'E301 expected 1 blank line, found 0') +pep8.register_check(extended_blank_lines) + + +def continued_indentation(logical_line, tokens, indent_level, indent_char, + noqa): + """Override pep8's function to provide indentation information.""" + first_row = tokens[0][2][0] + nrows = 1 + tokens[-1][2][0] - first_row + if noqa or nrows == 1: + return + + # indent_next tells us whether the next block is indented. Assuming + # that it is indented by 4 spaces, then we should not allow 4-space + # indents on the final continuation line. In turn, some other + # indents are allowed to have an extra 4 spaces. + indent_next = logical_line.endswith(':') + + row = depth = 0 + valid_hangs = ( + (DEFAULT_INDENT_SIZE,) + if indent_char != '\t' else (DEFAULT_INDENT_SIZE, + 2 * DEFAULT_INDENT_SIZE) + ) + + # Remember how many brackets were opened on each line. + parens = [0] * nrows + + # Relative indents of physical lines. + rel_indent = [0] * nrows + + # For each depth, collect a list of opening rows. + open_rows = [[0]] + # For each depth, memorize the hanging indentation. + hangs = [None] + + # Visual indents. + indent_chances = {} + last_indent = tokens[0][2] + indent = [last_indent[1]] + + last_token_multiline = None + line = None + last_line = '' + last_line_begins_with_multiline = False + for token_type, text, start, end, line in tokens: + + newline = row < start[0] - first_row + if newline: + row = start[0] - first_row + newline = (not last_token_multiline and + token_type not in (tokenize.NL, tokenize.NEWLINE)) + last_line_begins_with_multiline = last_token_multiline + + if newline: + # This is the beginning of a continuation line. + last_indent = start + + # Record the initial indent. + rel_indent[row] = pep8.expand_indent(line) - indent_level + + # Identify closing bracket. + close_bracket = (token_type == tokenize.OP and text in ']})') + + # Is the indent relative to an opening bracket line? + for open_row in reversed(open_rows[depth]): + hang = rel_indent[row] - rel_indent[open_row] + hanging_indent = hang in valid_hangs + if hanging_indent: + break + if hangs[depth]: + hanging_indent = (hang == hangs[depth]) + + visual_indent = (not close_bracket and hang > 0 and + indent_chances.get(start[1])) + + if close_bracket and indent[depth]: + # Closing bracket for visual indent. + if start[1] != indent[depth]: + yield (start, 'E124 {0}'.format(indent[depth])) + elif close_bracket and not hang: + pass + elif indent[depth] and start[1] < indent[depth]: + # Visual indent is broken. + yield (start, 'E128 {0}'.format(indent[depth])) + elif (hanging_indent or + (indent_next and + rel_indent[row] == 2 * DEFAULT_INDENT_SIZE)): + # Hanging indent is verified. + if close_bracket: + yield (start, 'E123 {0}'.format(indent_level + + rel_indent[open_row])) + hangs[depth] = hang + elif visual_indent is True: + # Visual indent is verified. + indent[depth] = start[1] + elif visual_indent in (text, unicode): + # Ignore token lined up with matching one from a previous line. + pass + else: + one_indented = (indent_level + rel_indent[open_row] + + DEFAULT_INDENT_SIZE) + # Indent is broken. + if hang <= 0: + error = ('E122', one_indented) + elif indent[depth]: + error = ('E127', indent[depth]) + elif hang > DEFAULT_INDENT_SIZE: + error = ('E126', one_indented) + else: + hangs[depth] = hang + error = ('E121', one_indented) + + yield (start, '{0} {1}'.format(*error)) + + # Look for visual indenting. + if (parens[row] and token_type not in (tokenize.NL, tokenize.COMMENT) + and not indent[depth]): + indent[depth] = start[1] + indent_chances[start[1]] = True + # Deal with implicit string concatenation. + elif (token_type in (tokenize.STRING, tokenize.COMMENT) or + text in ('u', 'ur', 'b', 'br')): + indent_chances[start[1]] = unicode + # Special case for the "if" statement because len("if (") is equal to + # 4. + elif not indent_chances and not row and not depth and text == 'if': + indent_chances[end[1] + 1] = True + elif text == ':' and line[end[1]:].isspace(): + open_rows[depth].append(row) + + # Keep track of bracket depth. + if token_type == tokenize.OP: + if text in '([{': + depth += 1 + indent.append(0) + hangs.append(None) + if len(open_rows) == depth: + open_rows.append([]) + open_rows[depth].append(row) + parens[row] += 1 + elif text in ')]}' and depth > 0: + # Parent indents should not be more than this one. + prev_indent = indent.pop() or last_indent[1] + hangs.pop() + for d in range(depth): + if indent[d] > prev_indent: + indent[d] = 0 + for ind in list(indent_chances): + if ind >= prev_indent: + del indent_chances[ind] + del open_rows[depth + 1:] + depth -= 1 + if depth: + indent_chances[indent[depth]] = True + for idx in range(row, -1, -1): + if parens[idx]: + parens[idx] -= 1 + break + assert len(indent) == depth + 1 + if ( + start[1] not in indent_chances and + # This is for purposes of speeding up E121 (GitHub #90). + not last_line.rstrip().endswith(',') + ): + # Allow to line up tokens. + indent_chances[start[1]] = text + + last_token_multiline = (start[0] != end[0]) + if last_token_multiline: + rel_indent[end[0] - first_row] = rel_indent[row] + + last_line = line + + if ( + indent_next and + not last_line_begins_with_multiline and + pep8.expand_indent(line) == indent_level + DEFAULT_INDENT_SIZE + ): + pos = (start[0], indent[0] + 4) + yield (pos, 'E125 {0}'.format(indent_level + + 2 * DEFAULT_INDENT_SIZE)) +del pep8._checks['logical_line'][pep8.continued_indentation] +pep8.register_check(continued_indentation) + + +class FixPEP8(object): + + """Fix invalid code. + + Fixer methods are prefixed "fix_". The _fix_source() method looks for these + automatically. + + The fixer method can take either one or two arguments (in addition to + self). The first argument is "result", which is the error information from + pep8. The second argument, "logical", is required only for logical-line + fixes. + + The fixer method can return the list of modified lines or None. An empty + list would mean that no changes were made. None would mean that only the + line reported in the pep8 error was modified. Note that the modified line + numbers that are returned are indexed at 1. This typically would correspond + with the line number reported in the pep8 error information. + + [fixed method list] + - e121,e122,e123,e124,e125,e126,e127,e128,e129 + - e201,e202,e203 + - e211 + - e221,e222,e223,e224,e225 + - e231 + - e251 + - e261,e262 + - e271,e272,e273,e274 + - e301,e302,e303 + - e401 + - e502 + - e701,e702 + - e711 + - w291 + + """ + + def __init__(self, filename, + options, + contents=None, + long_line_ignore_cache=None): + self.filename = filename + if contents is None: + self.source = readlines_from_file(filename) + else: + sio = io.StringIO(contents) + self.source = sio.readlines() + self.options = options + self.indent_word = _get_indentword(''.join(self.source)) + + self.long_line_ignore_cache = ( + set() if long_line_ignore_cache is None + else long_line_ignore_cache) + + # Many fixers are the same even though pep8 categorizes them + # differently. + self.fix_e115 = self.fix_e112 + self.fix_e116 = self.fix_e113 + self.fix_e121 = self._fix_reindent + self.fix_e122 = self._fix_reindent + self.fix_e123 = self._fix_reindent + self.fix_e124 = self._fix_reindent + self.fix_e126 = self._fix_reindent + self.fix_e127 = self._fix_reindent + self.fix_e128 = self._fix_reindent + self.fix_e129 = self._fix_reindent + self.fix_e202 = self.fix_e201 + self.fix_e203 = self.fix_e201 + self.fix_e211 = self.fix_e201 + self.fix_e221 = self.fix_e271 + self.fix_e222 = self.fix_e271 + self.fix_e223 = self.fix_e271 + self.fix_e226 = self.fix_e225 + self.fix_e227 = self.fix_e225 + self.fix_e228 = self.fix_e225 + self.fix_e241 = self.fix_e271 + self.fix_e242 = self.fix_e224 + self.fix_e261 = self.fix_e262 + self.fix_e272 = self.fix_e271 + self.fix_e273 = self.fix_e271 + self.fix_e274 = self.fix_e271 + self.fix_e309 = self.fix_e301 + self.fix_e501 = ( + self.fix_long_line_logically if + options and (options.aggressive >= 2 or options.experimental) else + self.fix_long_line_physically) + self.fix_e703 = self.fix_e702 + + self._ws_comma_done = False + + def _fix_source(self, results): + try: + (logical_start, logical_end) = _find_logical(self.source) + logical_support = True + except (SyntaxError, tokenize.TokenError): # pragma: no cover + logical_support = False + + completed_lines = set() + for result in sorted(results, key=_priority_key): + if result['line'] in completed_lines: + continue + + fixed_methodname = 'fix_' + result['id'].lower() + if hasattr(self, fixed_methodname): + fix = getattr(self, fixed_methodname) + + line_index = result['line'] - 1 + original_line = self.source[line_index] + + is_logical_fix = len(inspect.getargspec(fix).args) > 2 + if is_logical_fix: + logical = None + if logical_support: + logical = _get_logical(self.source, + result, + logical_start, + logical_end) + if logical and set(range( + logical[0][0] + 1, + logical[1][0] + 1)).intersection( + completed_lines): + continue + + modified_lines = fix(result, logical) + else: + modified_lines = fix(result) + + if modified_lines is None: + # Force logical fixes to report what they modified. + assert not is_logical_fix + + if self.source[line_index] == original_line: + modified_lines = [] + + if modified_lines: + completed_lines.update(modified_lines) + elif modified_lines == []: # Empty list means no fix + if self.options.verbose >= 2: + print( + '---> Not fixing {f} on line {l}'.format( + f=result['id'], l=result['line']), + file=sys.stderr) + else: # We assume one-line fix when None. + completed_lines.add(result['line']) + else: + if self.options.verbose >= 3: + print( + "---> '{0}' is not defined.".format(fixed_methodname), + file=sys.stderr) + + info = result['info'].strip() + print('---> {0}:{1}:{2}:{3}'.format(self.filename, + result['line'], + result['column'], + info), + file=sys.stderr) + + def fix(self): + """Return a version of the source code with PEP 8 violations fixed.""" + pep8_options = { + 'ignore': self.options.ignore, + 'select': self.options.select, + 'max_line_length': self.options.max_line_length, + } + results = _execute_pep8(pep8_options, self.source) + + if self.options.verbose: + progress = {} + for r in results: + if r['id'] not in progress: + progress[r['id']] = set() + progress[r['id']].add(r['line']) + print('---> {n} issue(s) to fix {progress}'.format( + n=len(results), progress=progress), file=sys.stderr) + + if self.options.line_range: + start, end = self.options.line_range + results = [r for r in results + if start <= r['line'] <= end] + + self._fix_source(filter_results(source=''.join(self.source), + results=results, + aggressive=self.options.aggressive)) + + if self.options.line_range: + # If number of lines has changed then change line_range. + count = sum(sline.count('\n') + for sline in self.source[start - 1:end]) + self.options.line_range[1] = start + count - 1 + + return ''.join(self.source) + + def _fix_reindent(self, result): + """Fix a badly indented line. + + This is done by adding or removing from its initial indent only. + + """ + num_indent_spaces = int(result['info'].split()[1]) + line_index = result['line'] - 1 + target = self.source[line_index] + + self.source[line_index] = ' ' * num_indent_spaces + target.lstrip() + + def fix_e112(self, result): + """Fix under-indented comments.""" + line_index = result['line'] - 1 + target = self.source[line_index] + + if not target.lstrip().startswith('#'): + # Don't screw with invalid syntax. + return [] + + self.source[line_index] = self.indent_word + target + + def fix_e113(self, result): + """Fix over-indented comments.""" + line_index = result['line'] - 1 + target = self.source[line_index] + + indent = _get_indentation(target) + stripped = target.lstrip() + + if not stripped.startswith('#'): + # Don't screw with invalid syntax. + return [] + + self.source[line_index] = indent[1:] + stripped + + def fix_e125(self, result): + """Fix indentation undistinguish from the next logical line.""" + num_indent_spaces = int(result['info'].split()[1]) + line_index = result['line'] - 1 + target = self.source[line_index] + + spaces_to_add = num_indent_spaces - len(_get_indentation(target)) + indent = len(_get_indentation(target)) + modified_lines = [] + + while len(_get_indentation(self.source[line_index])) >= indent: + self.source[line_index] = (' ' * spaces_to_add + + self.source[line_index]) + modified_lines.append(1 + line_index) # Line indexed at 1. + line_index -= 1 + + return modified_lines + + def fix_e201(self, result): + """Remove extraneous whitespace.""" + line_index = result['line'] - 1 + target = self.source[line_index] + offset = result['column'] - 1 + + if is_probably_part_of_multiline(target): + return [] + + fixed = fix_whitespace(target, + offset=offset, + replacement='') + + self.source[line_index] = fixed + + def fix_e224(self, result): + """Remove extraneous whitespace around operator.""" + target = self.source[result['line'] - 1] + offset = result['column'] - 1 + fixed = target[:offset] + target[offset:].replace('\t', ' ') + self.source[result['line'] - 1] = fixed + + def fix_e225(self, result): + """Fix missing whitespace around operator.""" + target = self.source[result['line'] - 1] + offset = result['column'] - 1 + fixed = target[:offset] + ' ' + target[offset:] + + # Only proceed if non-whitespace characters match. + # And make sure we don't break the indentation. + if ( + fixed.replace(' ', '') == target.replace(' ', '') and + _get_indentation(fixed) == _get_indentation(target) + ): + self.source[result['line'] - 1] = fixed + else: + return [] + + def fix_e231(self, result): + """Add missing whitespace.""" + # Optimize for comma case. This will fix all commas in the full source + # code in one pass. Don't do this more than once. If it fails the first + # time, there is no point in trying again. + if ',' in result['info'] and not self._ws_comma_done: + self._ws_comma_done = True + original = ''.join(self.source) + new = refactor(original, ['ws_comma']) + if original.strip() != new.strip(): + self.source = [new] + return range(1, 1 + len(original)) + + line_index = result['line'] - 1 + target = self.source[line_index] + offset = result['column'] + fixed = target[:offset] + ' ' + target[offset:] + self.source[line_index] = fixed + + def fix_e251(self, result): + """Remove whitespace around parameter '=' sign.""" + line_index = result['line'] - 1 + target = self.source[line_index] + + # This is necessary since pep8 sometimes reports columns that goes + # past the end of the physical line. This happens in cases like, + # foo(bar\n=None) + c = min(result['column'] - 1, + len(target) - 1) + + if target[c].strip(): + fixed = target + else: + fixed = target[:c].rstrip() + target[c:].lstrip() + + # There could be an escaped newline + # + # def foo(a=\ + # 1) + if fixed.endswith(('=\\\n', '=\\\r\n', '=\\\r')): + self.source[line_index] = fixed.rstrip('\n\r \t\\') + self.source[line_index + 1] = self.source[line_index + 1].lstrip() + return [line_index + 1, line_index + 2] # Line indexed at 1 + + self.source[result['line'] - 1] = fixed + + def fix_e262(self, result): + """Fix spacing after comment hash.""" + target = self.source[result['line'] - 1] + offset = result['column'] + + code = target[:offset].rstrip(' \t#') + comment = target[offset:].lstrip(' \t#') + + fixed = code + (' # ' + comment if comment.strip() else '\n') + + self.source[result['line'] - 1] = fixed + + def fix_e271(self, result): + """Fix extraneous whitespace around keywords.""" + line_index = result['line'] - 1 + target = self.source[line_index] + offset = result['column'] - 1 + + if is_probably_part_of_multiline(target): + return [] + + fixed = fix_whitespace(target, + offset=offset, + replacement=' ') + + if fixed == target: + return [] + else: + self.source[line_index] = fixed + + def fix_e301(self, result): + """Add missing blank line.""" + cr = '\n' + self.source[result['line'] - 1] = cr + self.source[result['line'] - 1] + + def fix_e302(self, result): + """Add missing 2 blank lines.""" + add_linenum = 2 - int(result['info'].split()[-1]) + cr = '\n' * add_linenum + self.source[result['line'] - 1] = cr + self.source[result['line'] - 1] + + def fix_e303(self, result): + """Remove extra blank lines.""" + delete_linenum = int(result['info'].split('(')[1].split(')')[0]) - 2 + delete_linenum = max(1, delete_linenum) + + # We need to count because pep8 reports an offset line number if there + # are comments. + cnt = 0 + line = result['line'] - 2 + modified_lines = [] + while cnt < delete_linenum and line >= 0: + if not self.source[line].strip(): + self.source[line] = '' + modified_lines.append(1 + line) # Line indexed at 1 + cnt += 1 + line -= 1 + + return modified_lines + + def fix_e304(self, result): + """Remove blank line following function decorator.""" + line = result['line'] - 2 + if not self.source[line].strip(): + self.source[line] = '' + + def fix_e401(self, result): + """Put imports on separate lines.""" + line_index = result['line'] - 1 + target = self.source[line_index] + offset = result['column'] - 1 + + if not target.lstrip().startswith('import'): + return [] + + indentation = re.split(pattern=r'\bimport\b', + string=target, maxsplit=1)[0] + fixed = (target[:offset].rstrip('\t ,') + '\n' + + indentation + 'import ' + target[offset:].lstrip('\t ,')) + self.source[line_index] = fixed + + def fix_long_line_logically(self, result, logical): + """Try to make lines fit within --max-line-length characters.""" + if ( + not logical or + len(logical[2]) == 1 or + self.source[result['line'] - 1].lstrip().startswith('#') + ): + return self.fix_long_line_physically(result) + + start_line_index = logical[0][0] + end_line_index = logical[1][0] + logical_lines = logical[2] + + previous_line = get_item(self.source, start_line_index - 1, default='') + next_line = get_item(self.source, end_line_index + 1, default='') + + single_line = join_logical_line(''.join(logical_lines)) + + try: + fixed = self.fix_long_line( + target=single_line, + previous_line=previous_line, + next_line=next_line, + original=''.join(logical_lines)) + except (SyntaxError, tokenize.TokenError): + return self.fix_long_line_physically(result) + + if fixed: + for line_index in range(start_line_index, end_line_index + 1): + self.source[line_index] = '' + self.source[start_line_index] = fixed + return range(start_line_index + 1, end_line_index + 1) + else: + return [] + + def fix_long_line_physically(self, result): + """Try to make lines fit within --max-line-length characters.""" + line_index = result['line'] - 1 + target = self.source[line_index] + + previous_line = get_item(self.source, line_index - 1, default='') + next_line = get_item(self.source, line_index + 1, default='') + + try: + fixed = self.fix_long_line( + target=target, + previous_line=previous_line, + next_line=next_line, + original=target) + except (SyntaxError, tokenize.TokenError): + return [] + + if fixed: + self.source[line_index] = fixed + return [line_index + 1] + else: + return [] + + def fix_long_line(self, target, previous_line, + next_line, original): + cache_entry = (target, previous_line, next_line) + if cache_entry in self.long_line_ignore_cache: + return [] + + if target.lstrip().startswith('#'): + # Wrap commented lines. + return shorten_comment( + line=target, + max_line_length=self.options.max_line_length, + last_comment=not next_line.lstrip().startswith('#')) + + fixed = get_fixed_long_line( + target=target, + previous_line=previous_line, + original=original, + indent_word=self.indent_word, + max_line_length=self.options.max_line_length, + aggressive=self.options.aggressive, + experimental=self.options.experimental, + verbose=self.options.verbose) + if fixed and not code_almost_equal(original, fixed): + return fixed + else: + self.long_line_ignore_cache.add(cache_entry) + return None + + def fix_e502(self, result): + """Remove extraneous escape of newline.""" + line_index = result['line'] - 1 + target = self.source[line_index] + self.source[line_index] = target.rstrip('\n\r \t\\') + '\n' + + def fix_e701(self, result): + """Put colon-separated compound statement on separate lines.""" + line_index = result['line'] - 1 + target = self.source[line_index] + c = result['column'] + + fixed_source = (target[:c] + '\n' + + _get_indentation(target) + self.indent_word + + target[c:].lstrip('\n\r \t\\')) + self.source[result['line'] - 1] = fixed_source + return [result['line'], result['line'] + 1] + + def fix_e702(self, result, logical): + """Put semicolon-separated compound statement on separate lines.""" + if not logical: + return [] # pragma: no cover + logical_lines = logical[2] + + line_index = result['line'] - 1 + target = self.source[line_index] + + if target.rstrip().endswith('\\'): + # Normalize '1; \\\n2' into '1; 2'. + self.source[line_index] = target.rstrip('\n \r\t\\') + self.source[line_index + 1] = self.source[line_index + 1].lstrip() + return [line_index + 1, line_index + 2] + + if target.rstrip().endswith(';'): + self.source[line_index] = target.rstrip('\n \r\t;') + '\n' + return [line_index + 1] + + offset = result['column'] - 1 + first = target[:offset].rstrip(';').rstrip() + second = (_get_indentation(logical_lines[0]) + + target[offset:].lstrip(';').lstrip()) + + self.source[line_index] = first + '\n' + second + return [line_index + 1] + + def fix_e711(self, result): + """Fix comparison with None.""" + line_index = result['line'] - 1 + target = self.source[line_index] + offset = result['column'] - 1 + + right_offset = offset + 2 + if right_offset >= len(target): + return [] + + left = target[:offset].rstrip() + center = target[offset:right_offset] + right = target[right_offset:].lstrip() + + if not right.startswith('None'): + return [] + + if center.strip() == '==': + new_center = 'is' + elif center.strip() == '!=': + new_center = 'is not' + else: + return [] + + self.source[line_index] = ' '.join([left, new_center, right]) + + def fix_e712(self, result): + """Fix comparison with boolean.""" + line_index = result['line'] - 1 + target = self.source[line_index] + offset = result['column'] - 1 + + # Handle very easy "not" special cases. + if re.match(r'^\s*if \w+ == False:$', target): + self.source[line_index] = re.sub(r'if (\w+) == False:', + r'if not \1:', target, count=1) + elif re.match(r'^\s*if \w+ != True:$', target): + self.source[line_index] = re.sub(r'if (\w+) != True:', + r'if not \1:', target, count=1) + else: + right_offset = offset + 2 + if right_offset >= len(target): + return [] + + left = target[:offset].rstrip() + center = target[offset:right_offset] + right = target[right_offset:].lstrip() + + # Handle simple cases only. + new_right = None + if center.strip() == '==': + if re.match(r'\bTrue\b', right): + new_right = re.sub(r'\bTrue\b *', '', right, count=1) + elif center.strip() == '!=': + if re.match(r'\bFalse\b', right): + new_right = re.sub(r'\bFalse\b *', '', right, count=1) + + if new_right is None: + return [] + + if new_right[0].isalnum(): + new_right = ' ' + new_right + + self.source[line_index] = left + new_right + + def fix_e713(self, result): + """Fix non-membership check.""" + line_index = result['line'] - 1 + target = self.source[line_index] + + # Handle very easy case only. + if re.match(r'^\s*if not \w+ in \w+:$', target): + self.source[line_index] = re.sub(r'if not (\w+) in (\w+):', + r'if \1 not in \2:', + target, + count=1) + + def fix_w291(self, result): + """Remove trailing whitespace.""" + fixed_line = self.source[result['line'] - 1].rstrip() + self.source[result['line'] - 1] = fixed_line + '\n' + + +def get_fixed_long_line(target, previous_line, original, + indent_word=' ', max_line_length=79, + aggressive=False, experimental=False, verbose=False): + """Break up long line and return result. + + Do this by generating multiple reformatted candidates and then + ranking the candidates to heuristically select the best option. + + """ + indent = _get_indentation(target) + source = target[len(indent):] + assert source.lstrip() == source + + # Check for partial multiline. + tokens = list(generate_tokens(source)) + + candidates = shorten_line( + tokens, source, indent, + indent_word, + max_line_length, + aggressive=aggressive, + experimental=experimental, + previous_line=previous_line) + + # Also sort alphabetically as a tie breaker (for determinism). + candidates = sorted( + sorted(set(candidates).union([target, original])), + key=lambda x: line_shortening_rank(x, + indent_word, + max_line_length, + experimental)) + + if verbose >= 4: + print(('-' * 79 + '\n').join([''] + candidates + ['']), + file=codecs.getwriter('utf-8')(sys.stderr.buffer + if hasattr(sys.stderr, + 'buffer') + else sys.stderr)) + + if candidates: + return candidates[0] + + +def join_logical_line(logical_line): + """Return single line based on logical line input.""" + indentation = _get_indentation(logical_line) + + return indentation + untokenize_without_newlines( + generate_tokens(logical_line.lstrip())) + '\n' + + +def untokenize_without_newlines(tokens): + """Return source code based on tokens.""" + text = '' + last_row = 0 + last_column = -1 + + for t in tokens: + token_string = t[1] + (start_row, start_column) = t[2] + (end_row, end_column) = t[3] + + if start_row > last_row: + last_column = 0 + if ( + (start_column > last_column or token_string == '\n') and + not text.endswith(' ') + ): + text += ' ' + + if token_string != '\n': + text += token_string + + last_row = end_row + last_column = end_column + + return text + + +def _find_logical(source_lines): + # Make a variable which is the index of all the starts of lines. + logical_start = [] + logical_end = [] + last_newline = True + parens = 0 + for t in generate_tokens(''.join(source_lines)): + if t[0] in [tokenize.COMMENT, tokenize.DEDENT, + tokenize.INDENT, tokenize.NL, + tokenize.ENDMARKER]: + continue + if not parens and t[0] in [tokenize.NEWLINE, tokenize.SEMI]: + last_newline = True + logical_end.append((t[3][0] - 1, t[2][1])) + continue + if last_newline and not parens: + logical_start.append((t[2][0] - 1, t[2][1])) + last_newline = False + if t[0] == tokenize.OP: + if t[1] in '([{': + parens += 1 + elif t[1] in '}])': + parens -= 1 + return (logical_start, logical_end) + + +def _get_logical(source_lines, result, logical_start, logical_end): + """Return the logical line corresponding to the result. + + Assumes input is already E702-clean. + + """ + row = result['line'] - 1 + col = result['column'] - 1 + ls = None + le = None + for i in range(0, len(logical_start), 1): + assert logical_end + x = logical_end[i] + if x[0] > row or (x[0] == row and x[1] > col): + le = x + ls = logical_start[i] + break + if ls is None: + return None + original = source_lines[ls[0]:le[0] + 1] + return ls, le, original + + +def get_item(items, index, default=None): + if 0 <= index < len(items): + return items[index] + else: + return default + + +def reindent(source, indent_size): + """Reindent all lines.""" + reindenter = Reindenter(source) + return reindenter.run(indent_size) + + +def code_almost_equal(a, b): + """Return True if code is similar. + + Ignore whitespace when comparing specific line. + + """ + split_a = split_and_strip_non_empty_lines(a) + split_b = split_and_strip_non_empty_lines(b) + + if len(split_a) != len(split_b): + return False + + for index in range(len(split_a)): + if ''.join(split_a[index].split()) != ''.join(split_b[index].split()): + return False + + return True + + +def split_and_strip_non_empty_lines(text): + """Return lines split by newline. + + Ignore empty lines. + + """ + return [line.strip() for line in text.splitlines() if line.strip()] + + +def fix_e265(source, aggressive=False): # pylint: disable=unused-argument + """Format block comments.""" + if '#' not in source: + # Optimization. + return source + + ignored_line_numbers = multiline_string_lines( + source, + include_docstrings=True) | set(commented_out_code_lines(source)) + + fixed_lines = [] + sio = io.StringIO(source) + line_number = 0 + for line in sio.readlines(): + line_number += 1 + if ( + line.lstrip().startswith('#') and + line_number not in ignored_line_numbers + ): + indentation = _get_indentation(line) + line = line.lstrip() + + # Normalize beginning if not a shebang. + if len(line) > 1: + if ( + # Leave multiple spaces like '# ' alone. + (line.count('#') > 1 or line[1].isalnum()) + # Leave stylistic outlined blocks alone. + and not line.rstrip().endswith('#') + ): + line = '# ' + line.lstrip('# \t') + + fixed_lines.append(indentation + line) + else: + fixed_lines.append(line) + + return ''.join(fixed_lines) + + +def refactor(source, fixer_names, ignore=None): + """Return refactored code using lib2to3. + + Skip if ignore string is produced in the refactored code. + + """ + check_lib2to3() + from lib2to3 import pgen2 + try: + new_text = refactor_with_2to3(source, + fixer_names=fixer_names) + except (pgen2.parse.ParseError, + SyntaxError, + UnicodeDecodeError, + UnicodeEncodeError): + return source + + if ignore: + if ignore in new_text and ignore not in source: + return source + + return new_text + + +def code_to_2to3(select, ignore): + fixes = set() + for code, fix in CODE_TO_2TO3.items(): + if code_match(code, select=select, ignore=ignore): + fixes |= set(fix) + return fixes + + +def fix_2to3(source, aggressive=True, select=None, ignore=None): + """Fix various deprecated code (via lib2to3).""" + if not aggressive: + return source + + select = select or [] + ignore = ignore or [] + + return refactor(source, + code_to_2to3(select=select, + ignore=ignore)) + + +def fix_w602(source, aggressive=True): + """Fix deprecated form of raising exception.""" + if not aggressive: + return source + + return refactor(source, ['raise'], + ignore='with_traceback') + + +def find_newline(source): + """Return type of newline used in source. + + Input is a list of lines. + + """ + assert not isinstance(source, unicode) + + counter = collections.defaultdict(int) + for line in source: + if line.endswith(CRLF): + counter[CRLF] += 1 + elif line.endswith(CR): + counter[CR] += 1 + elif line.endswith(LF): + counter[LF] += 1 + + return (sorted(counter, key=counter.get, reverse=True) or [LF])[0] + + +def _get_indentword(source): + """Return indentation type.""" + indent_word = ' ' # Default in case source has no indentation + try: + for t in generate_tokens(source): + if t[0] == token.INDENT: + indent_word = t[1] + break + except (SyntaxError, tokenize.TokenError): + pass + return indent_word + + +def _get_indentation(line): + """Return leading whitespace.""" + if line.strip(): + non_whitespace_index = len(line) - len(line.lstrip()) + return line[:non_whitespace_index] + else: + return '' + + +def get_diff_text(old, new, filename): + """Return text of unified diff between old and new.""" + newline = '\n' + diff = difflib.unified_diff( + old, new, + 'original/' + filename, + 'fixed/' + filename, + lineterm=newline) + + text = '' + for line in diff: + text += line + + # Work around missing newline (http://bugs.python.org/issue2142). + if text and not line.endswith(newline): + text += newline + r'\ No newline at end of file' + newline + + return text + + +def _priority_key(pep8_result): + """Key for sorting PEP8 results. + + Global fixes should be done first. This is important for things like + indentation. + + """ + priority = [ + # Fix multiline colon-based before semicolon based. + 'e701', + # Break multiline statements early. + 'e702', + # Things that make lines longer. + 'e225', 'e231', + # Remove extraneous whitespace before breaking lines. + 'e201', + # Shorten whitespace in comment before resorting to wrapping. + 'e262' + ] + middle_index = 10000 + lowest_priority = [ + # We need to shorten lines last since the logical fixer can get in a + # loop, which causes us to exit early. + 'e501' + ] + key = pep8_result['id'].lower() + try: + return priority.index(key) + except ValueError: + try: + return middle_index + lowest_priority.index(key) + 1 + except ValueError: + return middle_index + + +def shorten_line(tokens, source, indentation, indent_word, max_line_length, + aggressive=False, experimental=False, previous_line=''): + """Separate line at OPERATOR. + + Multiple candidates will be yielded. + + """ + for candidate in _shorten_line(tokens=tokens, + source=source, + indentation=indentation, + indent_word=indent_word, + aggressive=aggressive, + previous_line=previous_line): + yield candidate + + if aggressive: + for key_token_strings in SHORTEN_OPERATOR_GROUPS: + shortened = _shorten_line_at_tokens( + tokens=tokens, + source=source, + indentation=indentation, + indent_word=indent_word, + key_token_strings=key_token_strings, + aggressive=aggressive) + + if shortened is not None and shortened != source: + yield shortened + + if experimental: + for shortened in _shorten_line_at_tokens_new( + tokens=tokens, + source=source, + indentation=indentation, + max_line_length=max_line_length): + + yield shortened + + +def _shorten_line(tokens, source, indentation, indent_word, + aggressive=False, previous_line=''): + """Separate line at OPERATOR. + + The input is expected to be free of newlines except for inside multiline + strings and at the end. + + Multiple candidates will be yielded. + + """ + for (token_type, + token_string, + start_offset, + end_offset) in token_offsets(tokens): + + if ( + token_type == tokenize.COMMENT and + not is_probably_part_of_multiline(previous_line) and + not is_probably_part_of_multiline(source) and + not source[start_offset + 1:].strip().lower().startswith( + ('noqa', 'pragma:', 'pylint:')) + ): + # Move inline comments to previous line. + first = source[:start_offset] + second = source[start_offset:] + yield (indentation + second.strip() + '\n' + + indentation + first.strip() + '\n') + elif token_type == token.OP and token_string != '=': + # Don't break on '=' after keyword as this violates PEP 8. + + assert token_type != token.INDENT + + first = source[:end_offset] + + second_indent = indentation + if first.rstrip().endswith('('): + second_indent += indent_word + elif '(' in first: + second_indent += ' ' * (1 + first.find('(')) + else: + second_indent += indent_word + + second = (second_indent + source[end_offset:].lstrip()) + if ( + not second.strip() or + second.lstrip().startswith('#') + ): + continue + + # Do not begin a line with a comma + if second.lstrip().startswith(','): + continue + # Do end a line with a dot + if first.rstrip().endswith('.'): + continue + if token_string in '+-*/': + fixed = first + ' \\' + '\n' + second + else: + fixed = first + '\n' + second + + # Only fix if syntax is okay. + if check_syntax(normalize_multiline(fixed) + if aggressive else fixed): + yield indentation + fixed + + +# A convenient way to handle tokens. +Token = collections.namedtuple('Token', ['token_type', 'token_string', + 'spos', 'epos', 'line']) + + +class ReformattedLines(object): + + """The reflowed lines of atoms. + + Each part of the line is represented as an "atom." They can be moved + around when need be to get the optimal formatting. + + """ + + ########################################################################### + # Private Classes + + class _Indent(object): + + """Represent an indentation in the atom stream.""" + + def __init__(self, indent_amt): + self._indent_amt = indent_amt + + def emit(self): + return ' ' * self._indent_amt + + @property + def size(self): + return self._indent_amt + + class _Space(object): + + """Represent a space in the atom stream.""" + + def emit(self): + return ' ' + + @property + def size(self): + return 1 + + class _LineBreak(object): + + """Represent a line break in the atom stream.""" + + def emit(self): + return '\n' + + @property + def size(self): + return 0 + + def __init__(self, max_line_length): + self._max_line_length = max_line_length + self._lines = [] + self._bracket_depth = 0 + self._prev_item = None + self._prev_prev_item = None + + def __repr__(self): + return self.emit() + + ########################################################################### + # Public Methods + + def add(self, obj, indent_amt, break_after_open_bracket): + if isinstance(obj, Atom): + self._add_item(obj, indent_amt) + return + + self._add_container(obj, indent_amt, break_after_open_bracket) + + def add_comment(self, item): + num_spaces = 2 + if len(self._lines) > 1: + if isinstance(self._lines[-1], self._Space): + num_spaces -= 1 + if len(self._lines) > 2: + if isinstance(self._lines[-2], self._Space): + num_spaces -= 1 + + while num_spaces > 0: + self._lines.append(self._Space()) + num_spaces -= 1 + self._lines.append(item) + + def add_indent(self, indent_amt): + self._lines.append(self._Indent(indent_amt)) + + def add_line_break(self, indent): + self._lines.append(self._LineBreak()) + self.add_indent(len(indent)) + + def add_line_break_at(self, index, indent_amt): + self._lines.insert(index, self._LineBreak()) + self._lines.insert(index + 1, self._Indent(indent_amt)) + + def add_space_if_needed(self, curr_text, equal=False): + if ( + not self._lines or isinstance( + self._lines[-1], (self._LineBreak, self._Indent, self._Space)) + ): + return + + prev_text = unicode(self._prev_item) + prev_prev_text = ( + unicode(self._prev_prev_item) if self._prev_prev_item else '') + + if ( + # The previous item was a keyword or identifier and the current + # item isn't an operator that doesn't require a space. + ((self._prev_item.is_keyword or self._prev_item.is_string or + self._prev_item.is_name or self._prev_item.is_number) and + (curr_text[0] not in '([{.,:}])' or + (curr_text[0] == '=' and equal))) or + + # Don't place spaces around a '.', unless it's in an 'import' + # statement. + ((prev_prev_text != 'from' and prev_text[-1] != '.' and + curr_text != 'import') and + + # Don't place a space before a colon. + curr_text[0] != ':' and + + # Don't split up ending brackets by spaces. + ((prev_text[-1] in '}])' and curr_text[0] not in '.,}])') or + + # Put a space after a colon or comma. + prev_text[-1] in ':,' or + + # Put space around '=' if asked to. + (equal and prev_text == '=') or + + # Put spaces around non-unary arithmetic operators. + ((self._prev_prev_item and + (prev_text not in '+-' and + (self._prev_prev_item.is_name or + self._prev_prev_item.is_number or + self._prev_prev_item.is_string)) and + prev_text in ('+', '-', '%', '*', '/', '//', '**'))))) + ): + self._lines.append(self._Space()) + + def previous_item(self): + """Return the previous non-whitespace item.""" + return self._prev_item + + def fits_on_current_line(self, item_extent): + return self.current_size() + item_extent <= self._max_line_length + + def current_size(self): + """The size of the current line minus the indentation.""" + size = 0 + for item in reversed(self._lines): + size += item.size + if isinstance(item, self._LineBreak): + break + + return size + + def line_empty(self): + return (self._lines and + isinstance(self._lines[-1], + (self._LineBreak, self._Indent))) + + def emit(self): + string = '' + for item in self._lines: + if isinstance(item, self._LineBreak): + string = string.rstrip() + string += item.emit() + + return string.rstrip() + '\n' + + ########################################################################### + # Private Methods + + def _add_item(self, item, indent_amt): + """Add an item to the line. + + Reflow the line to get the best formatting after the item is + inserted. The bracket depth indicates if the item is being + inserted inside of a container or not. + + """ + if self._prev_item and self._prev_item.is_string and item.is_string: + # Place consecutive string literals on separate lines. + self._lines.append(self._LineBreak()) + self._lines.append(self._Indent(indent_amt)) + + item_text = unicode(item) + if self._lines and self._bracket_depth: + # Adding the item into a container. + self._prevent_default_initializer_splitting(item, indent_amt) + + if item_text in '.,)]}': + self._split_after_delimiter(item, indent_amt) + + elif self._lines and not self.line_empty(): + # Adding the item outside of a container. + if self.fits_on_current_line(len(item_text)): + self._enforce_space(item) + + else: + # Line break for the new item. + self._lines.append(self._LineBreak()) + self._lines.append(self._Indent(indent_amt)) + + self._lines.append(item) + self._prev_item, self._prev_prev_item = item, self._prev_item + + if item_text in '([{': + self._bracket_depth += 1 + + elif item_text in '}])': + self._bracket_depth -= 1 + assert self._bracket_depth >= 0 + + def _add_container(self, container, indent_amt, break_after_open_bracket): + actual_indent = indent_amt + 1 + + if ( + unicode(self._prev_item) != '=' and + not self.line_empty() and + not self.fits_on_current_line( + container.size + self._bracket_depth + 2) + ): + + if unicode(container)[0] == '(' and self._prev_item.is_name: + # Don't split before the opening bracket of a call. + break_after_open_bracket = True + actual_indent = indent_amt + 4 + elif ( + break_after_open_bracket or + unicode(self._prev_item) not in '([{' + ): + # If the container doesn't fit on the current line and the + # current line isn't empty, place the container on the next + # line. + self._lines.append(self._LineBreak()) + self._lines.append(self._Indent(indent_amt)) + break_after_open_bracket = False + else: + actual_indent = self.current_size() + 1 + break_after_open_bracket = False + + if isinstance(container, (ListComprehension, IfExpression)): + actual_indent = indent_amt + + # Increase the continued indentation only if recursing on a + # container. + container.reflow(self, ' ' * actual_indent, + break_after_open_bracket=break_after_open_bracket) + + def _prevent_default_initializer_splitting(self, item, indent_amt): + """Prevent splitting between a default initializer. + + When there is a default initializer, it's best to keep it all on + the same line. It's nicer and more readable, even if it goes + over the maximum allowable line length. This goes back along the + current line to determine if we have a default initializer, and, + if so, to remove extraneous whitespaces and add a line + break/indent before it if needed. + + """ + if unicode(item) == '=': + # This is the assignment in the initializer. Just remove spaces for + # now. + self._delete_whitespace() + return + + if (not self._prev_item or not self._prev_prev_item or + unicode(self._prev_item) != '='): + return + + self._delete_whitespace() + prev_prev_index = self._lines.index(self._prev_prev_item) + + if ( + isinstance(self._lines[prev_prev_index - 1], self._Indent) or + self.fits_on_current_line(item.size + 1) + ): + # The default initializer is already the only item on this line. + # Don't insert a newline here. + return + + # Replace the space with a newline/indent combo. + if isinstance(self._lines[prev_prev_index - 1], self._Space): + del self._lines[prev_prev_index - 1] + + self.add_line_break_at(self._lines.index(self._prev_prev_item), + indent_amt) + + def _split_after_delimiter(self, item, indent_amt): + """Split the line only after a delimiter.""" + self._delete_whitespace() + + if self.fits_on_current_line(item.size): + return + + last_space = None + for item in reversed(self._lines): + if ( + last_space and + (not isinstance(item, Atom) or not item.is_colon) + ): + break + else: + last_space = None + if isinstance(item, self._Space): + last_space = item + if isinstance(item, (self._LineBreak, self._Indent)): + return + + if not last_space: + return + + self.add_line_break_at(self._lines.index(last_space), indent_amt) + + def _enforce_space(self, item): + """Enforce a space in certain situations. + + There are cases where we will want a space where normally we + wouldn't put one. This just enforces the addition of a space. + + """ + if isinstance(self._lines[-1], + (self._Space, self._LineBreak, self._Indent)): + return + + if not self._prev_item: + return + + item_text = unicode(item) + prev_text = unicode(self._prev_item) + + # Prefer a space around a '.' in an import statement, and between the + # 'import' and '('. + if ( + (item_text == '.' and prev_text == 'from') or + (item_text == 'import' and prev_text == '.') or + (item_text == '(' and prev_text == 'import') + ): + self._lines.append(self._Space()) + + def _delete_whitespace(self): + """Delete all whitespace from the end of the line.""" + while isinstance(self._lines[-1], (self._Space, self._LineBreak, + self._Indent)): + del self._lines[-1] + + +class Atom(object): + + """The smallest unbreakable unit that can be reflowed.""" + + def __init__(self, atom): + self._atom = atom + + def __repr__(self): + return self._atom.token_string + + def __len__(self): + return self.size + + def reflow( + self, reflowed_lines, continued_indent, extent, + break_after_open_bracket=False, + is_list_comp_or_if_expr=False, + next_is_dot=False + ): + if self._atom.token_type == tokenize.COMMENT: + reflowed_lines.add_comment(self) + return + + total_size = extent if extent else self.size + + if self._atom.token_string not in ',:([{}])': + # Some atoms will need an extra 1-sized space token after them. + total_size += 1 + + prev_item = reflowed_lines.previous_item() + if ( + not is_list_comp_or_if_expr and + not reflowed_lines.fits_on_current_line(total_size) and + not (next_is_dot and + reflowed_lines.fits_on_current_line(self.size + 1)) and + not reflowed_lines.line_empty() and + not self.is_colon and + not (prev_item and prev_item.is_name and + unicode(self) == '(') + ): + # Start a new line if there is already something on the line and + # adding this atom would make it go over the max line length. + reflowed_lines.add_line_break(continued_indent) + else: + reflowed_lines.add_space_if_needed(unicode(self)) + + reflowed_lines.add(self, len(continued_indent), + break_after_open_bracket) + + def emit(self): + return self.__repr__() + + @property + def is_keyword(self): + return keyword.iskeyword(self._atom.token_string) + + @property + def is_string(self): + return self._atom.token_type == tokenize.STRING + + @property + def is_name(self): + return self._atom.token_type == tokenize.NAME + + @property + def is_number(self): + return self._atom.token_type == tokenize.NUMBER + + @property + def is_comma(self): + return self._atom.token_string == ',' + + @property + def is_colon(self): + return self._atom.token_string == ':' + + @property + def size(self): + return len(self._atom.token_string) + + +class Container(object): + + """Base class for all container types.""" + + def __init__(self, items): + self._items = items + + def __repr__(self): + string = '' + last_was_keyword = False + + for item in self._items: + if item.is_comma: + string += ', ' + elif item.is_colon: + string += ': ' + else: + item_string = unicode(item) + if ( + string and + (last_was_keyword or + (not string.endswith(tuple('([{,.:}]) ')) and + not item_string.startswith(tuple('([{,.:}])')))) + ): + string += ' ' + string += item_string + + last_was_keyword = item.is_keyword + return string + + def __iter__(self): + for element in self._items: + yield element + + def __getitem__(self, idx): + return self._items[idx] + + def reflow(self, reflowed_lines, continued_indent, + break_after_open_bracket=False): + last_was_container = False + for (index, item) in enumerate(self._items): + next_item = get_item(self._items, index + 1) + + if isinstance(item, Atom): + is_list_comp_or_if_expr = ( + isinstance(self, (ListComprehension, IfExpression))) + item.reflow(reflowed_lines, continued_indent, + self._get_extent(index), + is_list_comp_or_if_expr=is_list_comp_or_if_expr, + next_is_dot=(next_item and + unicode(next_item) == '.')) + if last_was_container and item.is_comma: + reflowed_lines.add_line_break(continued_indent) + last_was_container = False + else: # isinstance(item, Container) + reflowed_lines.add(item, len(continued_indent), + break_after_open_bracket) + last_was_container = not isinstance(item, (ListComprehension, + IfExpression)) + + if ( + break_after_open_bracket and index == 0 and + # Prefer to keep empty containers together instead of + # separating them. + unicode(item) == self.open_bracket and + (not next_item or unicode(next_item) != self.close_bracket) and + (len(self._items) != 3 or not isinstance(next_item, Atom)) + ): + reflowed_lines.add_line_break(continued_indent) + break_after_open_bracket = False + else: + next_next_item = get_item(self._items, index + 2) + if ( + unicode(item) not in ['.', '%', 'in'] and + next_item and not isinstance(next_item, Container) and + unicode(next_item) != ':' and + next_next_item and (not isinstance(next_next_item, Atom) or + unicode(next_item) == 'not') and + not reflowed_lines.line_empty() and + not reflowed_lines.fits_on_current_line( + self._get_extent(index + 1) + 2) + ): + reflowed_lines.add_line_break(continued_indent) + + def _get_extent(self, index): + """The extent of the full element. + + E.g., the length of a function call or keyword. + + """ + extent = 0 + prev_item = get_item(self._items, index - 1) + seen_dot = prev_item and unicode(prev_item) == '.' + while index < len(self._items): + item = get_item(self._items, index) + index += 1 + + if isinstance(item, (ListComprehension, IfExpression)): + break + + if isinstance(item, Container): + if prev_item and prev_item.is_name: + if seen_dot: + extent += 1 + else: + extent += item.size + + prev_item = item + continue + elif (unicode(item) not in ['.', '=', ':', 'not'] and + not item.is_name and not item.is_string): + break + + if unicode(item) == '.': + seen_dot = True + + extent += item.size + prev_item = item + + return extent + + @property + def is_string(self): + return False + + @property + def size(self): + return len(self.__repr__()) + + @property + def is_keyword(self): + return False + + @property + def is_name(self): + return False + + @property + def is_comma(self): + return False + + @property + def is_colon(self): + return False + + @property + def open_bracket(self): + return None + + @property + def close_bracket(self): + return None + + +class Tuple(Container): + + """A high-level representation of a tuple.""" + + @property + def open_bracket(self): + return '(' + + @property + def close_bracket(self): + return ')' + + +class List(Container): + + """A high-level representation of a list.""" + + @property + def open_bracket(self): + return '[' + + @property + def close_bracket(self): + return ']' + + +class DictOrSet(Container): + + """A high-level representation of a dictionary or set.""" + + @property + def open_bracket(self): + return '{' + + @property + def close_bracket(self): + return '}' + + +class ListComprehension(Container): + + """A high-level representation of a list comprehension.""" + + @property + def size(self): + length = 0 + for item in self._items: + if isinstance(item, IfExpression): + break + length += item.size + return length + + +class IfExpression(Container): + + """A high-level representation of an if-expression.""" + + +def _parse_container(tokens, index, for_or_if=None): + """Parse a high-level container, such as a list, tuple, etc.""" + + # Store the opening bracket. + items = [Atom(Token(*tokens[index]))] + index += 1 + + num_tokens = len(tokens) + while index < num_tokens: + tok = Token(*tokens[index]) + + if tok.token_string in ',)]}': + # First check if we're at the end of a list comprehension or + # if-expression. Don't add the ending token as part of the list + # comprehension or if-expression, because they aren't part of those + # constructs. + if for_or_if == 'for': + return (ListComprehension(items), index - 1) + + elif for_or_if == 'if': + return (IfExpression(items), index - 1) + + # We've reached the end of a container. + items.append(Atom(tok)) + + # If not, then we are at the end of a container. + if tok.token_string == ')': + # The end of a tuple. + return (Tuple(items), index) + + elif tok.token_string == ']': + # The end of a list. + return (List(items), index) + + elif tok.token_string == '}': + # The end of a dictionary or set. + return (DictOrSet(items), index) + + elif tok.token_string in '([{': + # A sub-container is being defined. + (container, index) = _parse_container(tokens, index) + items.append(container) + + elif tok.token_string == 'for': + (container, index) = _parse_container(tokens, index, 'for') + items.append(container) + + elif tok.token_string == 'if': + (container, index) = _parse_container(tokens, index, 'if') + items.append(container) + + else: + items.append(Atom(tok)) + + index += 1 + + return (None, None) + + +def _parse_tokens(tokens): + """Parse the tokens. + + This converts the tokens into a form where we can manipulate them + more easily. + + """ + + index = 0 + parsed_tokens = [] + + num_tokens = len(tokens) + while index < num_tokens: + tok = Token(*tokens[index]) + + assert tok.token_type != token.INDENT + if tok.token_type == tokenize.NEWLINE: + # There's only one newline and it's at the end. + break + + if tok.token_string in '([{': + (container, index) = _parse_container(tokens, index) + if not container: + return None + parsed_tokens.append(container) + else: + parsed_tokens.append(Atom(tok)) + + index += 1 + + return parsed_tokens + + +def _reflow_lines(parsed_tokens, indentation, max_line_length, + start_on_prefix_line): + """Reflow the lines so that it looks nice.""" + + if unicode(parsed_tokens[0]) == 'def': + # A function definition gets indented a bit more. + continued_indent = indentation + ' ' * 2 * DEFAULT_INDENT_SIZE + else: + continued_indent = indentation + ' ' * DEFAULT_INDENT_SIZE + + break_after_open_bracket = not start_on_prefix_line + + lines = ReformattedLines(max_line_length) + lines.add_indent(len(indentation.lstrip('\r\n'))) + + if not start_on_prefix_line: + # If splitting after the opening bracket will cause the first element + # to be aligned weirdly, don't try it. + first_token = get_item(parsed_tokens, 0) + second_token = get_item(parsed_tokens, 1) + + if ( + first_token and second_token and + unicode(second_token)[0] == '(' and + len(indentation) + len(first_token) + 1 == len(continued_indent) + ): + return None + + for item in parsed_tokens: + lines.add_space_if_needed(unicode(item), equal=True) + + save_continued_indent = continued_indent + if start_on_prefix_line and isinstance(item, Container): + start_on_prefix_line = False + continued_indent = ' ' * (lines.current_size() + 1) + + item.reflow(lines, continued_indent, break_after_open_bracket) + continued_indent = save_continued_indent + + return lines.emit() + + +def _shorten_line_at_tokens_new(tokens, source, indentation, + max_line_length): + """Shorten the line taking its length into account. + + The input is expected to be free of newlines except for inside + multiline strings and at the end. + + """ + # Yield the original source so to see if it's a better choice than the + # shortened candidate lines we generate here. + yield indentation + source + + parsed_tokens = _parse_tokens(tokens) + + if parsed_tokens: + # Perform two reflows. The first one starts on the same line as the + # prefix. The second starts on the line after the prefix. + fixed = _reflow_lines(parsed_tokens, indentation, max_line_length, + start_on_prefix_line=True) + if fixed and check_syntax(normalize_multiline(fixed.lstrip())): + yield fixed + + fixed = _reflow_lines(parsed_tokens, indentation, max_line_length, + start_on_prefix_line=False) + if fixed and check_syntax(normalize_multiline(fixed.lstrip())): + yield fixed + + +def _shorten_line_at_tokens(tokens, source, indentation, indent_word, + key_token_strings, aggressive): + """Separate line by breaking at tokens in key_token_strings. + + The input is expected to be free of newlines except for inside + multiline strings and at the end. + + """ + offsets = [] + for (index, _t) in enumerate(token_offsets(tokens)): + (token_type, + token_string, + start_offset, + end_offset) = _t + + assert token_type != token.INDENT + + if token_string in key_token_strings: + # Do not break in containers with zero or one items. + unwanted_next_token = { + '(': ')', + '[': ']', + '{': '}'}.get(token_string) + if unwanted_next_token: + if ( + get_item(tokens, + index + 1, + default=[None, None])[1] == unwanted_next_token or + get_item(tokens, + index + 2, + default=[None, None])[1] == unwanted_next_token + ): + continue + + if ( + index > 2 and token_string == '(' and + tokens[index - 1][1] in ',(%[' + ): + # Don't split after a tuple start, or before a tuple start if + # the tuple is in a list. + continue + + if end_offset < len(source) - 1: + # Don't split right before newline. + offsets.append(end_offset) + else: + # Break at adjacent strings. These were probably meant to be on + # separate lines in the first place. + previous_token = get_item(tokens, index - 1) + if ( + token_type == tokenize.STRING and + previous_token and previous_token[0] == tokenize.STRING + ): + offsets.append(start_offset) + + current_indent = None + fixed = None + for line in split_at_offsets(source, offsets): + if fixed: + fixed += '\n' + current_indent + line + + for symbol in '([{': + if line.endswith(symbol): + current_indent += indent_word + else: + # First line. + fixed = line + assert not current_indent + current_indent = indent_word + + assert fixed is not None + + if check_syntax(normalize_multiline(fixed) + if aggressive > 1 else fixed): + return indentation + fixed + else: + return None + + +def token_offsets(tokens): + """Yield tokens and offsets.""" + end_offset = 0 + previous_end_row = 0 + previous_end_column = 0 + for t in tokens: + token_type = t[0] + token_string = t[1] + (start_row, start_column) = t[2] + (end_row, end_column) = t[3] + + # Account for the whitespace between tokens. + end_offset += start_column + if previous_end_row == start_row: + end_offset -= previous_end_column + + # Record the start offset of the token. + start_offset = end_offset + + # Account for the length of the token itself. + end_offset += len(token_string) + + yield (token_type, + token_string, + start_offset, + end_offset) + + previous_end_row = end_row + previous_end_column = end_column + + +def normalize_multiline(line): + """Normalize multiline-related code that will cause syntax error. + + This is for purposes of checking syntax. + + """ + if line.startswith('def ') and line.rstrip().endswith(':'): + return line + ' pass' + elif line.startswith('return '): + return 'def _(): ' + line + elif line.startswith('@'): + return line + 'def _(): pass' + elif line.startswith('class '): + return line + ' pass' + elif line.startswith('if '): + return line + ' pass' + else: + return line + + +def fix_whitespace(line, offset, replacement): + """Replace whitespace at offset and return fixed line.""" + # Replace escaped newlines too + left = line[:offset].rstrip('\n\r \t\\') + right = line[offset:].lstrip('\n\r \t\\') + if right.startswith('#'): + return line + else: + return left + replacement + right + + +def _execute_pep8(pep8_options, source): + """Execute pep8 via python method calls.""" + class QuietReport(pep8.BaseReport): + + """Version of checker that does not print.""" + + def __init__(self, options): + super(QuietReport, self).__init__(options) + self.__full_error_results = [] + + def error(self, line_number, offset, text, _): + """Collect errors.""" + code = super(QuietReport, self).error(line_number, offset, text, _) + if code: + self.__full_error_results.append( + {'id': code, + 'line': line_number, + 'column': offset + 1, + 'info': text}) + + def full_error_results(self): + """Return error results in detail. + + Results are in the form of a list of dictionaries. Each + dictionary contains 'id', 'line', 'column', and 'info'. + + """ + return self.__full_error_results + + checker = pep8.Checker('', lines=source, + reporter=QuietReport, **pep8_options) + checker.check_all() + return checker.report.full_error_results() + + +def _remove_leading_and_normalize(line): + return line.lstrip().rstrip(CR + LF) + '\n' + + +class Reindenter(object): + + """Reindents badly-indented code to uniformly use four-space indentation. + + Released to the public domain, by Tim Peters, 03 October 2000. + + """ + + def __init__(self, input_text): + sio = io.StringIO(input_text) + source_lines = sio.readlines() + + self.string_content_line_numbers = multiline_string_lines(input_text) + + # File lines, rstripped & tab-expanded. Dummy at start is so + # that we can use tokenize's 1-based line numbering easily. + # Note that a line is all-blank iff it is a newline. + self.lines = [] + line_number = 0 + for line in source_lines: + line_number += 1 + # Do not modify if inside a multiline string. + if line_number in self.string_content_line_numbers: + self.lines.append(line) + else: + # Only expand leading tabs. + self.lines.append(_get_indentation(line).expandtabs() + + _remove_leading_and_normalize(line)) + + self.lines.insert(0, None) + self.index = 1 # index into self.lines of next line + self.input_text = input_text + + def run(self, indent_size=DEFAULT_INDENT_SIZE): + """Fix indentation and return modified line numbers. + + Line numbers are indexed at 1. + + """ + if indent_size < 1: + return self.input_text + + try: + stats = _reindent_stats(tokenize.generate_tokens(self.getline)) + except (SyntaxError, tokenize.TokenError): + return self.input_text + # Remove trailing empty lines. + lines = self.lines + while lines and lines[-1] == '\n': + lines.pop() + # Sentinel. + stats.append((len(lines), 0)) + # Map count of leading spaces to # we want. + have2want = {} + # Program after transformation. + after = [] + # Copy over initial empty lines -- there's nothing to do until + # we see a line with *something* on it. + i = stats[0][0] + after.extend(lines[1:i]) + for i in range(len(stats) - 1): + thisstmt, thislevel = stats[i] + nextstmt = stats[i + 1][0] + have = _leading_space_count(lines[thisstmt]) + want = thislevel * indent_size + if want < 0: + # A comment line. + if have: + # An indented comment line. If we saw the same + # indentation before, reuse what it most recently + # mapped to. + want = have2want.get(have, -1) + if want < 0: + # Then it probably belongs to the next real stmt. + for j in range(i + 1, len(stats) - 1): + jline, jlevel = stats[j] + if jlevel >= 0: + if have == _leading_space_count(lines[jline]): + want = jlevel * indent_size + break + if want < 0: # Maybe it's a hanging + # comment like this one, + # in which case we should shift it like its base + # line got shifted. + for j in range(i - 1, -1, -1): + jline, jlevel = stats[j] + if jlevel >= 0: + want = (have + _leading_space_count( + after[jline - 1]) - + _leading_space_count(lines[jline])) + break + if want < 0: + # Still no luck -- leave it alone. + want = have + else: + want = 0 + assert want >= 0 + have2want[have] = want + diff = want - have + if diff == 0 or have == 0: + after.extend(lines[thisstmt:nextstmt]) + else: + line_number = thisstmt - 1 + for line in lines[thisstmt:nextstmt]: + line_number += 1 + if line_number in self.string_content_line_numbers: + after.append(line) + elif diff > 0: + if line == '\n': + after.append(line) + else: + after.append(' ' * diff + line) + else: + remove = min(_leading_space_count(line), -diff) + after.append(line[remove:]) + + return ''.join(after) + + def getline(self): + """Line-getter for tokenize.""" + if self.index >= len(self.lines): + line = '' + else: + line = self.lines[self.index] + self.index += 1 + return line + + +def _reindent_stats(tokens): + """Return list of (lineno, indentlevel) pairs. + + One for each stmt and comment line. indentlevel is -1 for comment lines, as + a signal that tokenize doesn't know what to do about them; indeed, they're + our headache! + + """ + find_stmt = 1 # Next token begins a fresh stmt? + level = 0 # Current indent level. + stats = [] + + for t in tokens: + token_type = t[0] + sline = t[2][0] + line = t[4] + + if token_type == tokenize.NEWLINE: + # A program statement, or ENDMARKER, will eventually follow, + # after some (possibly empty) run of tokens of the form + # (NL | COMMENT)* (INDENT | DEDENT+)? + find_stmt = 1 + + elif token_type == tokenize.INDENT: + find_stmt = 1 + level += 1 + + elif token_type == tokenize.DEDENT: + find_stmt = 1 + level -= 1 + + elif token_type == tokenize.COMMENT: + if find_stmt: + stats.append((sline, -1)) + # But we're still looking for a new stmt, so leave + # find_stmt alone. + + elif token_type == tokenize.NL: + pass + + elif find_stmt: + # This is the first "real token" following a NEWLINE, so it + # must be the first token of the next program statement, or an + # ENDMARKER. + find_stmt = 0 + if line: # Not endmarker. + stats.append((sline, level)) + + return stats + + +def _leading_space_count(line): + """Return number of leading spaces in line.""" + i = 0 + while i < len(line) and line[i] == ' ': + i += 1 + return i + + +def refactor_with_2to3(source_text, fixer_names): + """Use lib2to3 to refactor the source. + + Return the refactored source code. + + """ + check_lib2to3() + from lib2to3.refactor import RefactoringTool + fixers = ['lib2to3.fixes.fix_' + name for name in fixer_names] + tool = RefactoringTool(fixer_names=fixers, explicit=fixers) + + from lib2to3.pgen2 import tokenize as lib2to3_tokenize + try: + return unicode(tool.refactor_string(source_text, name='')) + except lib2to3_tokenize.TokenError: + return source_text + + +def check_syntax(code): + """Return True if syntax is okay.""" + try: + return compile(code, '<string>', 'exec') + except (SyntaxError, TypeError, UnicodeDecodeError): + return False + + +def filter_results(source, results, aggressive): + """Filter out spurious reports from pep8. + + If aggressive is True, we allow possibly unsafe fixes (E711, E712). + + """ + non_docstring_string_line_numbers = multiline_string_lines( + source, include_docstrings=False) + all_string_line_numbers = multiline_string_lines( + source, include_docstrings=True) + + commented_out_code_line_numbers = commented_out_code_lines(source) + + for r in results: + issue_id = r['id'].lower() + + if r['line'] in non_docstring_string_line_numbers: + if issue_id.startswith(('e1', 'e501', 'w191')): + continue + + if r['line'] in all_string_line_numbers: + if issue_id in ['e501']: + continue + + # We must offset by 1 for lines that contain the trailing contents of + # multiline strings. + if not aggressive and (r['line'] + 1) in all_string_line_numbers: + # Do not modify multiline strings in non-aggressive mode. Remove + # trailing whitespace could break doctests. + if issue_id.startswith(('w29', 'w39')): + continue + + if aggressive <= 0: + if issue_id.startswith(('e711', 'w6')): + continue + + if aggressive <= 1: + if issue_id.startswith(('e712', 'e713')): + continue + + if r['line'] in commented_out_code_line_numbers: + if issue_id.startswith(('e26', 'e501')): + continue + + yield r + + +def multiline_string_lines(source, include_docstrings=False): + """Return line numbers that are within multiline strings. + + The line numbers are indexed at 1. + + Docstrings are ignored. + + """ + line_numbers = set() + previous_token_type = '' + try: + for t in generate_tokens(source): + token_type = t[0] + start_row = t[2][0] + end_row = t[3][0] + + if token_type == tokenize.STRING and start_row != end_row: + if ( + include_docstrings or + previous_token_type != tokenize.INDENT + ): + # We increment by one since we want the contents of the + # string. + line_numbers |= set(range(1 + start_row, 1 + end_row)) + + previous_token_type = token_type + except (SyntaxError, tokenize.TokenError): + pass + + return line_numbers + + +def commented_out_code_lines(source): + """Return line numbers of comments that are likely code. + + Commented-out code is bad practice, but modifying it just adds even more + clutter. + + """ + line_numbers = [] + try: + for t in generate_tokens(source): + token_type = t[0] + token_string = t[1] + start_row = t[2][0] + line = t[4] + + # Ignore inline comments. + if not line.lstrip().startswith('#'): + continue + + if token_type == tokenize.COMMENT: + stripped_line = token_string.lstrip('#').strip() + if ( + ' ' in stripped_line and + '#' not in stripped_line and + check_syntax(stripped_line) + ): + line_numbers.append(start_row) + except (SyntaxError, tokenize.TokenError): + pass + + return line_numbers + + +def shorten_comment(line, max_line_length, last_comment=False): + """Return trimmed or split long comment line. + + If there are no comments immediately following it, do a text wrap. + Doing this wrapping on all comments in general would lead to jagged + comment text. + + """ + assert len(line) > max_line_length + line = line.rstrip() + + # PEP 8 recommends 72 characters for comment text. + indentation = _get_indentation(line) + '# ' + max_line_length = min(max_line_length, + len(indentation) + 72) + + MIN_CHARACTER_REPEAT = 5 + if ( + len(line) - len(line.rstrip(line[-1])) >= MIN_CHARACTER_REPEAT and + not line[-1].isalnum() + ): + # Trim comments that end with things like --------- + return line[:max_line_length] + '\n' + elif last_comment and re.match(r'\s*#+\s*\w+', line): + import textwrap + split_lines = textwrap.wrap(line.lstrip(' \t#'), + initial_indent=indentation, + subsequent_indent=indentation, + width=max_line_length, + break_long_words=False, + break_on_hyphens=False) + return '\n'.join(split_lines) + '\n' + else: + return line + '\n' + + +def normalize_line_endings(lines, newline): + """Return fixed line endings. + + All lines will be modified to use the most common line ending. + + """ + return [line.rstrip('\n\r') + newline for line in lines] + + +def mutual_startswith(a, b): + return b.startswith(a) or a.startswith(b) + + +def code_match(code, select, ignore): + if ignore: + assert not isinstance(ignore, unicode) + for ignored_code in [c.strip() for c in ignore]: + if mutual_startswith(code.lower(), ignored_code.lower()): + return False + + if select: + assert not isinstance(select, unicode) + for selected_code in [c.strip() for c in select]: + if mutual_startswith(code.lower(), selected_code.lower()): + return True + return False + + return True + + +def fix_code(source, options=None, encoding=None): + """Return fixed source code.""" + if not options: + options = parse_args(['']) + + if not isinstance(source, unicode): + source = source.decode(encoding or locale.getpreferredencoding()) + + sio = io.StringIO(source) + return fix_lines(sio.readlines(), options=options) + + +def fix_lines(source_lines, options, filename=''): + """Return fixed source code.""" + # Transform everything to line feed. Then change them back to original + # before returning fixed source code. + original_newline = find_newline(source_lines) + tmp_source = ''.join(normalize_line_endings(source_lines, '\n')) + + # Keep a history to break out of cycles. + previous_hashes = set() + + if options.line_range: + fixed_source = apply_local_fixes(tmp_source, options) + else: + # Apply global fixes only once (for efficiency). + fixed_source = apply_global_fixes(tmp_source, options) + + passes = 0 + long_line_ignore_cache = set() + while hash(fixed_source) not in previous_hashes: + if options.pep8_passes >= 0 and passes > options.pep8_passes: + break + passes += 1 + + previous_hashes.add(hash(fixed_source)) + + tmp_source = copy.copy(fixed_source) + + fix = FixPEP8( + filename, + options, + contents=tmp_source, + long_line_ignore_cache=long_line_ignore_cache) + + fixed_source = fix.fix() + + sio = io.StringIO(fixed_source) + return ''.join(normalize_line_endings(sio.readlines(), original_newline)) + + +def fix_file(filename, options=None, output=None): + if not options: + options = parse_args([filename]) + + original_source = readlines_from_file(filename) + + fixed_source = original_source + + if options.in_place or output: + encoding = detect_encoding(filename) + + if output: + output = codecs.getwriter(encoding)(output.buffer + if hasattr(output, 'buffer') + else output) + + output = LineEndingWrapper(output) + + fixed_source = fix_lines(fixed_source, options, filename=filename) + + if options.diff: + new = io.StringIO(fixed_source) + new = new.readlines() + diff = get_diff_text(original_source, new, filename) + if output: + output.write(diff) + output.flush() + else: + return diff + elif options.in_place: + fp = open_with_encoding(filename, encoding=encoding, + mode='w') + fp.write(fixed_source) + fp.close() + else: + if output: + output.write(fixed_source) + output.flush() + else: + return fixed_source + + +def global_fixes(): + """Yield multiple (code, function) tuples.""" + for function in globals().values(): + if inspect.isfunction(function): + arguments = inspect.getargspec(function)[0] + if arguments[:1] != ['source']: + continue + + code = extract_code_from_function(function) + if code: + yield (code, function) + + +def apply_global_fixes(source, options, where='global'): + """Run global fixes on source code. + + These are fixes that only need be done once (unlike those in + FixPEP8, which are dependent on pep8). + + """ + if code_match('E101', select=options.select, ignore=options.ignore): + source = reindent(source, + indent_size=options.indent_size) + + for (code, function) in global_fixes(): + if code_match(code, select=options.select, ignore=options.ignore): + if options.verbose: + print('---> Applying {0} fix for {1}'.format(where, + code.upper()), + file=sys.stderr) + source = function(source, + aggressive=options.aggressive) + + source = fix_2to3(source, + aggressive=options.aggressive, + select=options.select, + ignore=options.ignore) + + return source + + +def apply_local_fixes(source, options): + """Ananologus to apply_global_fixes, but runs only those which makes sense + for the given line_range. + + Do as much as we can without breaking code. + + """ + def find_ge(a, x): + """Find leftmost item greater than or equal to x.""" + i = bisect.bisect_left(a, x) + if i != len(a): + return i, a[i] + return len(a) - 1, a[-1] + + def find_le(a, x): + """Find rightmost value less than or equal to x.""" + i = bisect.bisect_right(a, x) + if i: + return i - 1, a[i - 1] + return 0, a[0] + + def local_fix(source, start_log, end_log, + start_lines, end_lines, indents, last_line): + """apply_global_fixes to the source between start_log and end_log. + + The subsource must be the correct syntax of a complete python program + (but all lines may share an indentation). The subsource's shared indent + is removed, fixes are applied and the indent prepended back. Taking + care to not reindent strings. + + last_line is the strict cut off (options.line_range[1]), so that + lines after last_line are not modified. + + """ + if end_log < start_log: + return source + + ind = indents[start_log] + indent = _get_indentation(source[start_lines[start_log]]) + + sl = slice(start_lines[start_log], end_lines[end_log] + 1) + + subsource = source[sl] + # Remove indent from subsource. + if ind: + for line_no in start_lines[start_log:end_log + 1]: + pos = line_no - start_lines[start_log] + subsource[pos] = subsource[pos][ind:] + + # Fix indentation of subsource. + fixed_subsource = apply_global_fixes(''.join(subsource), + options, + where='local') + fixed_subsource = fixed_subsource.splitlines(True) + + # Add back indent for non multi-line strings lines. + msl = multiline_string_lines(''.join(fixed_subsource), + include_docstrings=False) + for i, line in enumerate(fixed_subsource): + if not i + 1 in msl: + fixed_subsource[i] = indent + line if line != '\n' else line + + # We make a special case to look at the final line, if it's a multiline + # *and* the cut off is somewhere inside it, we take the fixed + # subset up until last_line, this assumes that the number of lines + # does not change in this multiline line. + changed_lines = len(fixed_subsource) + if (start_lines[end_log] != end_lines[end_log] + and end_lines[end_log] > last_line): + after_end = end_lines[end_log] - last_line + fixed_subsource = (fixed_subsource[:-after_end] + + source[sl][-after_end:]) + changed_lines -= after_end + + options.line_range[1] = (options.line_range[0] + + changed_lines - 1) + + return (source[:start_lines[start_log]] + + fixed_subsource + + source[end_lines[end_log] + 1:]) + + def is_continued_stmt(line, + continued_stmts=frozenset(['else', 'elif', + 'finally', 'except'])): + return re.split('[ :]', line.strip(), 1)[0] in continued_stmts + + assert options.line_range + start, end = options.line_range + start -= 1 + end -= 1 + last_line = end # We shouldn't modify lines after this cut-off. + + try: + logical = _find_logical(source) + except (SyntaxError, tokenize.TokenError): + return ''.join(source) + + if not logical[0]: + # Just blank lines, this should imply that it will become '\n' ? + return apply_global_fixes(source, options) + + start_lines, indents = zip(*logical[0]) + end_lines, _ = zip(*logical[1]) + + source = source.splitlines(True) + + start_log, start = find_ge(start_lines, start) + end_log, end = find_le(start_lines, end) + + # Look behind one line, if it's indented less than current indent + # then we can move to this previous line knowing that its + # indentation level will not be changed. + if (start_log > 0 + and indents[start_log - 1] < indents[start_log] + and not is_continued_stmt(source[start_log - 1])): + start_log -= 1 + start = start_lines[start_log] + + while start < end: + + if is_continued_stmt(source[start]): + start_log += 1 + start = start_lines[start_log] + continue + + ind = indents[start_log] + for t in itertools.takewhile(lambda t: t[1][1] >= ind, + enumerate(logical[0][start_log:])): + n_log, n = start_log + t[0], t[1][0] + # start shares indent up to n. + + if n <= end: + source = local_fix(source, start_log, n_log, + start_lines, end_lines, + indents, last_line) + start_log = n_log if n == end else n_log + 1 + start = start_lines[start_log] + continue + + else: + # Look at the line after end and see if allows us to reindent. + after_end_log, after_end = find_ge(start_lines, end + 1) + + if indents[after_end_log] > indents[start_log]: + start_log, start = find_ge(start_lines, start + 1) + continue + + if (indents[after_end_log] == indents[start_log] + and is_continued_stmt(source[after_end])): + # find n, the beginning of the last continued statement + # Apply fix to previous block if there is one. + only_block = True + for n, n_ind in logical[0][start_log:end_log + 1][::-1]: + if n_ind == ind and not is_continued_stmt(source[n]): + n_log = start_lines.index(n) + source = local_fix(source, start_log, n_log - 1, + start_lines, end_lines, + indents, last_line) + start_log = n_log + 1 + start = start_lines[start_log] + only_block = False + break + if only_block: + end_log, end = find_le(start_lines, end - 1) + continue + + source = local_fix(source, start_log, end_log, + start_lines, end_lines, + indents, last_line) + break + + return ''.join(source) + + +def extract_code_from_function(function): + """Return code handled by function.""" + if not function.__name__.startswith('fix_'): + return None + + code = re.sub('^fix_', '', function.__name__) + if not code: + return None + + try: + int(code[1:]) + except ValueError: + return None + + return code + + +def create_parser(): + """Return command-line parser.""" + # Do import locally to be friendly to those who use autopep8 as a library + # and are supporting Python 2.6. + import argparse + + parser = argparse.ArgumentParser(description=docstring_summary(__doc__), + prog='autopep8') + parser.add_argument('--version', action='version', + version='%(prog)s ' + __version__) + parser.add_argument('-v', '--verbose', action='count', dest='verbose', + default=0, + help='print verbose messages; ' + 'multiple -v result in more verbose messages') + parser.add_argument('-d', '--diff', action='store_true', dest='diff', + help='print the diff for the fixed source') + parser.add_argument('-i', '--in-place', action='store_true', + help='make changes to files in place') + parser.add_argument('-r', '--recursive', action='store_true', + help='run recursively over directories; ' + 'must be used with --in-place or --diff') + parser.add_argument('-j', '--jobs', type=int, metavar='n', default=1, + help='number of parallel jobs; ' + 'match CPU count if value is less than 1') + parser.add_argument('-p', '--pep8-passes', metavar='n', + default=-1, type=int, + help='maximum number of additional pep8 passes ' + '(default: infinite)') + parser.add_argument('-a', '--aggressive', action='count', default=0, + help='enable non-whitespace changes; ' + 'multiple -a result in more aggressive changes') + parser.add_argument('--experimental', action='store_true', + help='enable experimental fixes') + parser.add_argument('--exclude', metavar='globs', + help='exclude file/directory names that match these ' + 'comma-separated globs') + parser.add_argument('--list-fixes', action='store_true', + help='list codes for fixes; ' + 'used by --ignore and --select') + parser.add_argument('--ignore', metavar='errors', default='', + help='do not fix these errors/warnings ' + '(default: {0})'.format(DEFAULT_IGNORE)) + parser.add_argument('--select', metavar='errors', default='', + help='fix only these errors/warnings (e.g. E4,W)') + parser.add_argument('--max-line-length', metavar='n', default=79, type=int, + help='set maximum allowed line length ' + '(default: %(default)s)') + parser.add_argument('--range', metavar='line', dest='line_range', + default=None, type=int, nargs=2, + help='only fix errors found within this inclusive ' + 'range of line numbers (e.g. 1 99); ' + 'line numbers are indexed at 1') + parser.add_argument('--indent-size', default=DEFAULT_INDENT_SIZE, + type=int, metavar='n', + help='number of spaces per indent level ' + '(default %(default)s)') + parser.add_argument('files', nargs='*', + help="files to format or '-' for standard in") + + return parser + + +def parse_args(arguments): + """Parse command-line options.""" + parser = create_parser() + args = parser.parse_args(arguments) + + if not args.files and not args.list_fixes: + parser.error('incorrect number of arguments') + + args.files = [decode_filename(name) for name in args.files] + + if '-' in args.files: + if len(args.files) > 1: + parser.error('cannot mix stdin and regular files') + + if args.diff: + parser.error('--diff cannot be used with standard input') + + if args.in_place: + parser.error('--in-place cannot be used with standard input') + + if args.recursive: + parser.error('--recursive cannot be used with standard input') + + if len(args.files) > 1 and not (args.in_place or args.diff): + parser.error('autopep8 only takes one filename as argument ' + 'unless the "--in-place" or "--diff" args are ' + 'used') + + if args.recursive and not (args.in_place or args.diff): + parser.error('--recursive must be used with --in-place or --diff') + + if args.exclude and not args.recursive: + parser.error('--exclude is only relevant when used with --recursive') + + if args.in_place and args.diff: + parser.error('--in-place and --diff are mutually exclusive') + + if args.max_line_length <= 0: + parser.error('--max-line-length must be greater than 0') + + if args.select: + args.select = args.select.split(',') + + if args.ignore: + args.ignore = args.ignore.split(',') + elif not args.select: + if args.aggressive: + # Enable everything by default if aggressive. + args.select = ['E', 'W'] + else: + args.ignore = DEFAULT_IGNORE.split(',') + + if args.exclude: + args.exclude = args.exclude.split(',') + else: + args.exclude = [] + + if args.jobs < 1: + # Do not import multiprocessing globally in case it is not supported + # on the platform. + import multiprocessing + args.jobs = multiprocessing.cpu_count() + + if args.jobs > 1 and not args.in_place: + parser.error('parallel jobs requires --in-place') + + if args.line_range: + if args.line_range[0] <= 0: + parser.error('--range must be positive numbers') + if args.line_range[0] > args.line_range[1]: + parser.error('First value of --range should be less than or equal ' + 'to the second') + + return args + + +def decode_filename(filename): + """Return Unicode filename.""" + if isinstance(filename, unicode): + return filename + else: + return filename.decode(sys.getfilesystemencoding()) + + +def supported_fixes(): + """Yield pep8 error codes that autopep8 fixes. + + Each item we yield is a tuple of the code followed by its + description. + + """ + yield ('E101', docstring_summary(reindent.__doc__)) + + instance = FixPEP8(filename=None, options=None, contents='') + for attribute in dir(instance): + code = re.match('fix_([ew][0-9][0-9][0-9])', attribute) + if code: + yield ( + code.group(1).upper(), + re.sub(r'\s+', ' ', + docstring_summary(getattr(instance, attribute).__doc__)) + ) + + for (code, function) in sorted(global_fixes()): + yield (code.upper() + (4 - len(code)) * ' ', + re.sub(r'\s+', ' ', docstring_summary(function.__doc__))) + + for code in sorted(CODE_TO_2TO3): + yield (code.upper() + (4 - len(code)) * ' ', + re.sub(r'\s+', ' ', docstring_summary(fix_2to3.__doc__))) + + +def docstring_summary(docstring): + """Return summary of docstring.""" + return docstring.split('\n')[0] + + +def line_shortening_rank(candidate, indent_word, max_line_length, + experimental=False): + """Return rank of candidate. + + This is for sorting candidates. + + """ + if not candidate.strip(): + return 0 + + rank = 0 + lines = candidate.split('\n') + + offset = 0 + if ( + not lines[0].lstrip().startswith('#') and + lines[0].rstrip()[-1] not in '([{' + ): + for (opening, closing) in ('()', '[]', '{}'): + # Don't penalize empty containers that aren't split up. Things like + # this "foo(\n )" aren't particularly good. + opening_loc = lines[0].find(opening) + closing_loc = lines[0].find(closing) + if opening_loc >= 0: + if closing_loc < 0 or closing_loc != opening_loc + 1: + offset = max(offset, 1 + opening_loc) + + current_longest = max(offset + len(x.strip()) for x in lines) + + rank += 4 * max(0, current_longest - max_line_length) + + rank += len(lines) + + # Too much variation in line length is ugly. + rank += 2 * standard_deviation(len(line) for line in lines) + + bad_staring_symbol = { + '(': ')', + '[': ']', + '{': '}'}.get(lines[0][-1]) + + if len(lines) > 1: + if ( + bad_staring_symbol and + lines[1].lstrip().startswith(bad_staring_symbol) + ): + rank += 20 + + for lineno, current_line in enumerate(lines): + current_line = current_line.strip() + + if current_line.startswith('#'): + continue + + for bad_start in ['.', '%', '+', '-', '/']: + if current_line.startswith(bad_start): + rank += 100 + + # Do not tolerate operators on their own line. + if current_line == bad_start: + rank += 1000 + + if current_line.endswith(('(', '[', '{', '.')): + # Avoid lonely opening. They result in longer lines. + if len(current_line) <= len(indent_word): + rank += 100 + + # Avoid the ugliness of ", (\n". + if ( + current_line.endswith('(') and + current_line[:-1].rstrip().endswith(',') + ): + rank += 100 + + # Also avoid the ugliness of "foo.\nbar" + if current_line.endswith('.'): + rank += 100 + + if has_arithmetic_operator(current_line): + rank += 100 + + if current_line.endswith(('%', '(', '[', '{')): + rank -= 20 + + # Try to break list comprehensions at the "for". + if current_line.startswith('for '): + rank -= 50 + + if current_line.endswith('\\'): + # If a line ends in \-newline, it may be part of a + # multiline string. In that case, we would like to know + # how long that line is without the \-newline. If it's + # longer than the maximum, or has comments, then we assume + # that the \-newline is an okay candidate and only + # penalize it a bit. + total_len = len(current_line) + lineno += 1 + while lineno < len(lines): + total_len += len(lines[lineno]) + + if lines[lineno].lstrip().startswith('#'): + total_len = max_line_length + break + + if not lines[lineno].endswith('\\'): + break + + lineno += 1 + + if total_len < max_line_length: + rank += 10 + else: + rank += 100 if experimental else 1 + + # Prefer breaking at commas rather than colon. + if ',' in current_line and current_line.endswith(':'): + rank += 10 + + rank += 10 * count_unbalanced_brackets(current_line) + + return max(0, rank) + + +def standard_deviation(numbers): + """Return standard devation.""" + numbers = list(numbers) + if not numbers: + return 0 + mean = sum(numbers) / len(numbers) + return (sum((n - mean) ** 2 for n in numbers) / + len(numbers)) ** .5 + + +def has_arithmetic_operator(line): + """Return True if line contains any arithmetic operators.""" + for operator in pep8.ARITHMETIC_OP: + if operator in line: + return True + + return False + + +def count_unbalanced_brackets(line): + """Return number of unmatched open/close brackets.""" + count = 0 + for opening, closing in ['()', '[]', '{}']: + count += abs(line.count(opening) - line.count(closing)) + + return count + + +def split_at_offsets(line, offsets): + """Split line at offsets. + + Return list of strings. + + """ + result = [] + + previous_offset = 0 + current_offset = 0 + for current_offset in sorted(offsets): + if current_offset < len(line) and previous_offset != current_offset: + result.append(line[previous_offset:current_offset].strip()) + previous_offset = current_offset + + result.append(line[current_offset:]) + + return result + + +class LineEndingWrapper(object): + + r"""Replace line endings to work with sys.stdout. + + It seems that sys.stdout expects only '\n' as the line ending, no matter + the platform. Otherwise, we get repeated line endings. + + """ + + def __init__(self, output): + self.__output = output + + def write(self, s): + self.__output.write(s.replace('\r\n', '\n').replace('\r', '\n')) + + def flush(self): + self.__output.flush() + + +def match_file(filename, exclude): + """Return True if file is okay for modifying/recursing.""" + base_name = os.path.basename(filename) + + if base_name.startswith('.'): + return False + + for pattern in exclude: + if fnmatch.fnmatch(base_name, pattern): + return False + + if not os.path.isdir(filename) and not is_python_file(filename): + return False + + return True + + +def find_files(filenames, recursive, exclude): + """Yield filenames.""" + while filenames: + name = filenames.pop(0) + if recursive and os.path.isdir(name): + for root, directories, children in os.walk(name): + filenames += [os.path.join(root, f) for f in children + if match_file(os.path.join(root, f), + exclude)] + directories[:] = [d for d in directories + if match_file(os.path.join(root, d), + exclude)] + else: + yield name + + +def _fix_file(parameters): + """Helper function for optionally running fix_file() in parallel.""" + if parameters[1].verbose: + print('[file:{0}]'.format(parameters[0]), file=sys.stderr) + try: + fix_file(*parameters) + except IOError as error: + print(unicode(error), file=sys.stderr) + + +def fix_multiple_files(filenames, options, output=None): + """Fix list of files. + + Optionally fix files recursively. + + """ + filenames = find_files(filenames, options.recursive, options.exclude) + if options.jobs > 1: + import multiprocessing + pool = multiprocessing.Pool(options.jobs) + pool.map(_fix_file, + [(name, options) for name in filenames]) + else: + for name in filenames: + _fix_file((name, options, output)) + + +def is_python_file(filename): + """Return True if filename is Python file.""" + if filename.endswith('.py'): + return True + + try: + with open_with_encoding(filename) as f: + first_line = f.readlines(1)[0] + except (IOError, IndexError): + return False + + if not PYTHON_SHEBANG_REGEX.match(first_line): + return False + + return True + + +def is_probably_part_of_multiline(line): + """Return True if line is likely part of a multiline string. + + When multiline strings are involved, pep8 reports the error as being + at the start of the multiline string, which doesn't work for us. + + """ + return ( + '"""' in line or + "'''" in line or + line.rstrip().endswith('\\') + ) + + +def main(): + """Tool main.""" + try: + # Exit on broken pipe. + signal.signal(signal.SIGPIPE, signal.SIG_DFL) + except AttributeError: # pragma: no cover + # SIGPIPE is not available on Windows. + pass + + try: + args = parse_args(sys.argv[1:]) + + if args.list_fixes: + for code, description in sorted(supported_fixes()): + print('{code} - {description}'.format( + code=code, description=description)) + return 0 + + if args.files == ['-']: + assert not args.in_place + + # LineEndingWrapper is unnecessary here due to the symmetry between + # standard in and standard out. + + sys.stdout.write( + fix_code( + sys.stdin.read(), + args, + encoding=sys.stdin.encoding)) + else: + if args.in_place or args.diff: + args.files = list(set(args.files)) + else: + assert len(args.files) == 1 + assert not args.recursive + + fix_multiple_files(args.files, args, sys.stdout) + except KeyboardInterrupt: + return 1 # pragma: no cover + + +class CachedTokenizer(object): + + """A one-element cache around tokenize.generate_tokens(). + + Original code written by Ned Batchelder, in coverage.py. + + """ + + def __init__(self): + self.last_text = None + self.last_tokens = None + + def generate_tokens(self, text): + """A stand-in for tokenize.generate_tokens().""" + if text != self.last_text: + string_io = io.StringIO(text) + self.last_tokens = list( + tokenize.generate_tokens(string_io.readline) + ) + self.last_text = text + return self.last_tokens + +_cached_tokenizer = CachedTokenizer() +generate_tokens = _cached_tokenizer.generate_tokens + + +if __name__ == '__main__': + sys.exit(main()) |