aboutsummaryrefslogtreecommitdiff
path: root/tools/clang/scripts/apply_edits.py
blob: 7d373a95511c29bce85925d49e19b3c7fa357cfd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
#!/usr/bin/env python
# Copyright (c) 2013 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Applies edits generated by a clang tool that was run on Chromium code.

Synopsis:

  cat run_tool.out | extract_edits.py | apply_edits.py <build dir> <filters...>

For example - to apply edits only to WTF sources:

  ... | apply_edits.py out/gn third_party/WebKit/Source/wtf

In addition to filters specified on the command line, the tool also skips edits
that apply to files that are not covered by git.
"""

import argparse
import collections
import functools
import multiprocessing
import os
import os.path
import subprocess
import sys

script_dir = os.path.dirname(os.path.realpath(__file__))
tool_dir = os.path.abspath(os.path.join(script_dir, '../pylib'))
sys.path.insert(0, tool_dir)

from clang import compile_db

Edit = collections.namedtuple('Edit',
                              ('edit_type', 'offset', 'length', 'replacement'))


def _GetFilesFromGit(paths=None):
  """Gets the list of files in the git repository.

  Args:
    paths: Prefix filter for the returned paths. May contain multiple entries.
  """
  args = []
  if sys.platform == 'win32':
    args.append('git.bat')
  else:
    args.append('git')
  args.append('ls-files')
  if paths:
    args.extend(paths)
  command = subprocess.Popen(args, stdout=subprocess.PIPE)
  output, _ = command.communicate()
  return [os.path.realpath(p) for p in output.splitlines()]


def _ParseEditsFromStdin(build_directory):
  """Extracts generated list of edits from the tool's stdout.

  The expected format is documented at the top of this file.

  Args:
    build_directory: Directory that contains the compile database. Used to
      normalize the filenames.
    stdout: The stdout from running the clang tool.

  Returns:
    A dictionary mapping filenames to the associated edits.
  """
  path_to_resolved_path = {}
  def _ResolvePath(path):
    if path in path_to_resolved_path:
      return path_to_resolved_path[path]

    if not os.path.isfile(path):
      resolved_path = os.path.realpath(os.path.join(build_directory, path))
    else:
      resolved_path = path

    if not os.path.isfile(resolved_path):
      sys.stderr.write('Edit applies to a non-existent file: %s\n' % path)
      resolved_path = None

    path_to_resolved_path[path] = resolved_path
    return resolved_path

  edits = collections.defaultdict(list)
  for line in sys.stdin:
    line = line.rstrip("\n\r")
    try:
      edit_type, path, offset, length, replacement = line.split(':::', 4)
      replacement = replacement.replace('\0', '\n')
      path = _ResolvePath(path)
      if not path: continue
      edits[path].append(Edit(edit_type, int(offset), int(length), replacement))
    except ValueError:
      sys.stderr.write('Unable to parse edit: %s\n' % line)
  return edits


def _ApplyEditsToSingleFile(filename, edits):
  # Sort the edits and iterate through them in reverse order. Sorting allows
  # duplicate edits to be quickly skipped, while reversing means that
  # subsequent edits don't need to have their offsets updated with each edit
  # applied.
  edit_count = 0
  error_count = 0
  edits.sort()
  last_edit = None
  with open(filename, 'rb+') as f:
    contents = bytearray(f.read())
    for edit in reversed(edits):
      if edit == last_edit:
        continue
      if (last_edit is not None and edit.edit_type == last_edit.edit_type and
          edit.offset == last_edit.offset and edit.length == last_edit.length):
        sys.stderr.write(
            'Conflicting edit: %s at offset %d, length %d: "%s" != "%s"\n' %
            (filename, edit.offset, edit.length, edit.replacement,
             last_edit.replacement))
        error_count += 1
        continue

      last_edit = edit
      contents[edit.offset:edit.offset + edit.length] = edit.replacement
      if not edit.replacement:
        _ExtendDeletionIfElementIsInList(contents, edit.offset)
      edit_count += 1
    f.seek(0)
    f.truncate()
    f.write(contents)
  return (edit_count, error_count)


def _ApplyEdits(edits):
  """Apply the generated edits.

  Args:
    edits: A dict mapping filenames to Edit instances that apply to that file.
  """
  edit_count = 0
  error_count = 0
  done_files = 0
  for k, v in edits.iteritems():
    tmp_edit_count, tmp_error_count = _ApplyEditsToSingleFile(k, v)
    edit_count += tmp_edit_count
    error_count += tmp_error_count
    done_files += 1
    percentage = (float(done_files) / len(edits)) * 100
    sys.stderr.write('Applied %d edits (%d errors) to %d files [%.2f%%]\r' %
                     (edit_count, error_count, done_files, percentage))

  sys.stderr.write('\n')
  return -error_count


_WHITESPACE_BYTES = frozenset((ord('\t'), ord('\n'), ord('\r'), ord(' ')))


def _ExtendDeletionIfElementIsInList(contents, offset):
  """Extends the range of a deletion if the deleted element was part of a list.

  This rewriter helper makes it easy for refactoring tools to remove elements
  from a list. Even if a matcher callback knows that it is removing an element
  from a list, it may not have enough information to accurately remove the list
  element; for example, another matcher callback may end up removing an adjacent
  list element, or all the list elements may end up being removed.

  With this helper, refactoring tools can simply remove the list element and not
  worry about having to include the comma in the replacement.

  Args:
    contents: A bytearray with the deletion already applied.
    offset: The offset in the bytearray where the deleted range used to be.
  """
  char_before = char_after = None
  left_trim_count = 0
  for byte in reversed(contents[:offset]):
    left_trim_count += 1
    if byte in _WHITESPACE_BYTES:
      continue
    if byte in (ord(','), ord(':'), ord('('), ord('{')):
      char_before = chr(byte)
    break

  right_trim_count = 0
  for byte in contents[offset:]:
    right_trim_count += 1
    if byte in _WHITESPACE_BYTES:
      continue
    if byte == ord(','):
      char_after = chr(byte)
    break

  if char_before:
    if char_after:
      del contents[offset:offset + right_trim_count]
    elif char_before in (',', ':'):
      del contents[offset - left_trim_count:offset]


def main():
  parser = argparse.ArgumentParser()
  parser.add_argument(
      'build_directory',
      help='path to the build dir (dir that edit paths are relative to)')
  parser.add_argument(
      'path_filter',
      nargs='*',
      help='optional paths to filter what files the tool is run on')
  args = parser.parse_args()

  filenames = set(_GetFilesFromGit(args.path_filter))
  edits = _ParseEditsFromStdin(args.build_directory)
  return _ApplyEdits(
      {k: v for k, v in edits.iteritems()
            if os.path.realpath(k) in filenames})


if __name__ == '__main__':
  sys.exit(main())