diff options
Diffstat (limited to 'tools/clang/scripts/run_tool.py')
-rwxr-xr-x | tools/clang/scripts/run_tool.py | 256 |
1 files changed, 78 insertions, 178 deletions
diff --git a/tools/clang/scripts/run_tool.py b/tools/clang/scripts/run_tool.py index 68f12e98..53c7d0fc 100755 --- a/tools/clang/scripts/run_tool.py +++ b/tools/clang/scripts/run_tool.py @@ -4,45 +4,55 @@ # found in the LICENSE file. """Wrapper script to help run clang tools across Chromium code. -How to use this tool: -If you want to run the tool across all Chromium code: +How to use run_tool.py: +If you want to run a clang tool across all Chromium code: run_tool.py <tool> <path/to/compiledb> -If you want to include all files mentioned in the compilation database: +If you want to include all files mentioned in the compilation database +(this will also include generated files, unlike the previous command): run_tool.py <tool> <path/to/compiledb> --all -If you only want to run the tool across just chrome/browser and content/browser: +If you want to run the clang tool across only chrome/browser and +content/browser: run_tool.py <tool> <path/to/compiledb> chrome/browser content/browser -Please see https://chromium.googlesource.com/chromium/src/+/master/docs/clang_tool_refactoring.md for more -information, which documents the entire automated refactoring flow in Chromium. +Please see docs/clang_tool_refactoring.md for more information, which documents +the entire automated refactoring flow in Chromium. -Why use this tool: +Why use run_tool.py (instead of running a clang tool directly): The clang tool implementation doesn't take advantage of multiple cores, and if it fails mysteriously in the middle, all the generated replacements will be -lost. - -Unfortunately, if the work is simply sharded across multiple cores by running -multiple RefactoringTools, problems arise when they attempt to rewrite a file at -the same time. To work around that, clang tools that are run using this tool -should output edits to stdout in the following format: - -==== BEGIN EDITS ==== -r:<file path>:<offset>:<length>:<replacement text> -r:<file path>:<offset>:<length>:<replacement text> -...etc... -==== END EDITS ==== - -Any generated edits are applied once the clang tool has finished running -across Chromium, regardless of whether some instances failed or not. +lost. Additionally, if the work is simply sharded across multiple cores by +running multiple RefactoringTools, problems arise when they attempt to rewrite a +file at the same time. + +run_tool.py will +1) run multiple instances of clang tool in parallel +2) gather stdout from clang tool invocations +3) "atomically" forward #2 to stdout + +Output of run_tool.py can be piped into extract_edits.py and then into +apply_edits.py. These tools will extract individual edits and apply them to the +source files. These tools assume the clang tool emits the edits in the +following format: + ... + ==== BEGIN EDITS ==== + r:::<file path>:::<offset>:::<length>:::<replacement text> + r:::<file path>:::<offset>:::<length>:::<replacement text> + ...etc... + ==== END EDITS ==== + ... + +extract_edits.py extracts only lines between BEGIN/END EDITS markers +apply_edits.py reads edit lines from stdin and applies the edits """ import argparse -import collections import functools import multiprocessing import os import os.path +import re import subprocess import sys @@ -52,9 +62,6 @@ sys.path.insert(0, tool_dir) from clang import compile_db -Edit = collections.namedtuple('Edit', - ('edit_type', 'offset', 'length', 'replacement')) - def _GetFilesFromGit(paths=None): """Gets the list of files in the git repository. @@ -85,90 +92,62 @@ def _GetFilesFromCompileDB(build_directory): for entry in compile_db.Read(build_directory)] -def _ExtractEditsFromStdout(build_directory, stdout): - """Extracts generated list of edits from the tool's stdout. - - The expected format is documented at the top of this file. - - Args: - build_directory: Directory that contains the compile database. Used to - normalize the filenames. - stdout: The stdout from running the clang tool. - - Returns: - A dictionary mapping filenames to the associated edits. - """ - lines = stdout.splitlines() - start_index = lines.index('==== BEGIN EDITS ====') - end_index = lines.index('==== END EDITS ====') - edits = collections.defaultdict(list) - for line in lines[start_index + 1:end_index]: - try: - edit_type, path, offset, length, replacement = line.split(':::', 4) - replacement = replacement.replace('\0', '\n') - # Normalize the file path emitted by the clang tool. - path = os.path.realpath(os.path.join(build_directory, path)) - edits[path].append(Edit(edit_type, int(offset), int(length), replacement)) - except ValueError: - print 'Unable to parse edit: %s' % line - return edits - - -def _ExecuteTool(toolname, build_directory, filename): - """Executes the tool. +def _ExecuteTool(toolname, tool_args, build_directory, filename): + """Executes the clang tool. This is defined outside the class so it can be pickled for the multiprocessing module. Args: - toolname: Path to the tool to execute. + toolname: Name of the clang tool to execute. + tool_args: Arguments to be passed to the clang tool. Can be None. build_directory: Directory that contains the compile database. - filename: The file to run the tool over. + filename: The file to run the clang tool over. Returns: A dictionary that must contain the key "status" and a boolean value associated with it. - If status is True, then the generated edits are stored with the key "edits" - in the dictionary. + If status is True, then the generated output is stored with the key + "stdout_text" in the dictionary. Otherwise, the filename and the output from stderr are associated with the - keys "filename" and "stderr" respectively. + keys "filename" and "stderr_text" respectively. """ + args = [toolname, '-p', build_directory, filename] + if (tool_args): + args.extend(tool_args) command = subprocess.Popen( - (toolname, '-p', build_directory, filename), - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - stdout, stderr = command.communicate() + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + stdout_text, stderr_text = command.communicate() + stderr_text = re.sub( + r"^warning: .*'linker' input unused \[-Wunused-command-line-argument\]\n", + "", stderr_text, flags=re.MULTILINE) if command.returncode != 0: - return {'status': False, 'filename': filename, 'stderr': stderr} + return {'status': False, 'filename': filename, 'stderr_text': stderr_text} else: - return {'status': True, - 'edits': _ExtractEditsFromStdout(build_directory, stdout)} + return {'status': True, 'filename': filename, 'stdout_text': stdout_text, + 'stderr_text': stderr_text} class _CompilerDispatcher(object): """Multiprocessing controller for running clang tools in parallel.""" - def __init__(self, toolname, build_directory, filenames): + def __init__(self, toolname, tool_args, build_directory, filenames): """Initializer method. Args: toolname: Path to the tool to execute. + tool_args: Arguments to be passed to the tool. Can be None. build_directory: Directory that contains the compile database. filenames: The files to run the tool over. """ self.__toolname = toolname + self.__tool_args = tool_args self.__build_directory = build_directory self.__filenames = filenames self.__success_count = 0 self.__failed_count = 0 - self.__edit_count = 0 - self.__edits = collections.defaultdict(list) - - @property - def edits(self): - return self.__edits @property def failed_count(self): @@ -178,12 +157,12 @@ class _CompilerDispatcher(object): """Does the grunt work.""" pool = multiprocessing.Pool() result_iterator = pool.imap_unordered( - functools.partial(_ExecuteTool, self.__toolname, - self.__build_directory), self.__filenames) + functools.partial(_ExecuteTool, self.__toolname, self.__tool_args, + self.__build_directory), + self.__filenames) for result in result_iterator: self.__ProcessResult(result) - sys.stdout.write('\n') - sys.stdout.flush() + sys.stderr.write('\n') def __ProcessResult(self, result): """Handles result processing. @@ -193,95 +172,18 @@ class _CompilerDispatcher(object): """ if result['status']: self.__success_count += 1 - for k, v in result['edits'].iteritems(): - self.__edits[k].extend(v) - self.__edit_count += len(v) + sys.stdout.write(result['stdout_text']) + sys.stderr.write(result['stderr_text']) else: self.__failed_count += 1 - sys.stdout.write('\nFailed to process %s\n' % result['filename']) - sys.stdout.write(result['stderr']) - sys.stdout.write('\n') - percentage = (float(self.__success_count + self.__failed_count) / - len(self.__filenames)) * 100 - sys.stdout.write('Succeeded: %d, Failed: %d, Edits: %d [%.2f%%]\r' % - (self.__success_count, self.__failed_count, - self.__edit_count, percentage)) - sys.stdout.flush() - - -def _ApplyEdits(edits): - """Apply the generated edits. - - Args: - edits: A dict mapping filenames to Edit instances that apply to that file. - """ - edit_count = 0 - for k, v in edits.iteritems(): - # Sort the edits and iterate through them in reverse order. Sorting allows - # duplicate edits to be quickly skipped, while reversing means that - # subsequent edits don't need to have their offsets updated with each edit - # applied. - v.sort() - last_edit = None - with open(k, 'rb+') as f: - contents = bytearray(f.read()) - for edit in reversed(v): - if edit == last_edit: - continue - last_edit = edit - contents[edit.offset:edit.offset + edit.length] = edit.replacement - if not edit.replacement: - _ExtendDeletionIfElementIsInList(contents, edit.offset) - edit_count += 1 - f.seek(0) - f.truncate() - f.write(contents) - print 'Applied %d edits to %d files' % (edit_count, len(edits)) - - -_WHITESPACE_BYTES = frozenset((ord('\t'), ord('\n'), ord('\r'), ord(' '))) - - -def _ExtendDeletionIfElementIsInList(contents, offset): - """Extends the range of a deletion if the deleted element was part of a list. - - This rewriter helper makes it easy for refactoring tools to remove elements - from a list. Even if a matcher callback knows that it is removing an element - from a list, it may not have enough information to accurately remove the list - element; for example, another matcher callback may end up removing an adjacent - list element, or all the list elements may end up being removed. - - With this helper, refactoring tools can simply remove the list element and not - worry about having to include the comma in the replacement. - - Args: - contents: A bytearray with the deletion already applied. - offset: The offset in the bytearray where the deleted range used to be. - """ - char_before = char_after = None - left_trim_count = 0 - for byte in reversed(contents[:offset]): - left_trim_count += 1 - if byte in _WHITESPACE_BYTES: - continue - if byte in (ord(','), ord(':'), ord('('), ord('{')): - char_before = chr(byte) - break - - right_trim_count = 0 - for byte in contents[offset:]: - right_trim_count += 1 - if byte in _WHITESPACE_BYTES: - continue - if byte == ord(','): - char_after = chr(byte) - break - - if char_before: - if char_after: - del contents[offset:offset + right_trim_count] - elif char_before in (',', ':'): - del contents[offset - left_trim_count:offset] + sys.stderr.write('\nFailed to process %s\n' % result['filename']) + sys.stderr.write(result['stderr_text']) + sys.stderr.write('\n') + done_count = self.__success_count + self.__failed_count + percentage = (float(done_count) / len(self.__filenames)) * 100 + sys.stderr.write( + 'Processed %d files with %s tool (%d failures) [%.2f%%]\r' % + (done_count, self.__toolname, self.__failed_count, percentage)) def main(): @@ -299,6 +201,9 @@ def main(): 'path_filter', nargs='*', help='optional paths to filter what files the tool is run on') + parser.add_argument( + '--tool-args', nargs='*', + help='optional arguments passed to the tool') args = parser.parse_args() os.environ['PATH'] = '%s%s%s' % ( @@ -312,24 +217,19 @@ def main(): compile_db.GenerateWithNinja(args.compile_database) if args.all: - filenames = set(_GetFilesFromCompileDB(args.compile_database)) - source_filenames = filenames + source_filenames = set(_GetFilesFromCompileDB(args.compile_database)) else: - filenames = set(_GetFilesFromGit(args.path_filter)) + git_filenames = set(_GetFilesFromGit(args.path_filter)) # Filter out files that aren't C/C++/Obj-C/Obj-C++. extensions = frozenset(('.c', '.cc', '.cpp', '.m', '.mm')) source_filenames = [f - for f in filenames + for f in git_filenames if os.path.splitext(f)[1] in extensions] - dispatcher = _CompilerDispatcher(args.tool, args.compile_database, + + dispatcher = _CompilerDispatcher(args.tool, args.tool_args, + args.compile_database, source_filenames) dispatcher.Run() - # Filter out edits to files that aren't in the git repository, since it's not - # useful to modify files that aren't under source control--typically, these - # are generated files or files in a git submodule that's not part of Chromium. - _ApplyEdits({k: v - for k, v in dispatcher.edits.iteritems() - if os.path.realpath(k) in filenames}) return -dispatcher.failed_count |