aboutsummaryrefslogtreecommitdiff
path: root/llvm_tools
diff options
context:
space:
mode:
authorGeorge Burgess IV <gbiv@google.com>2020-04-25 15:07:24 -0700
committerGeorge Burgess <gbiv@chromium.org>2020-04-28 19:02:01 +0000
commit1324de51897326e383c620027ffd525c7b6c747f (patch)
tree94d0c52a88b765c1c00cd178a3a3be2cfa918142 /llvm_tools
parentb999ceb903e3f163818e045d8710d6fe487157e0 (diff)
downloadtoolchain-utils-1324de51897326e383c620027ffd525c7b6c747f.tar.gz
llvm_tools: add a revert checker
This CL adds a revert checker script. The intent is to build on it for later automation, but it's also usable on its own. Please see the new docs for details on usage. BUG=chromium:1046988 TEST=unittests Change-Id: Ibc3ef1d2e3b5a3301366e971cacf53396a6ca2aa Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/toolchain-utils/+/2165513 Reviewed-by: Tiancong Wang <tcwang@google.com> Tested-by: George Burgess <gbiv@chromium.org>
Diffstat (limited to 'llvm_tools')
-rw-r--r--llvm_tools/README.md23
-rwxr-xr-xllvm_tools/revert_checker.py241
-rwxr-xr-xllvm_tools/revert_checker_test.py110
3 files changed, 374 insertions, 0 deletions
diff --git a/llvm_tools/README.md b/llvm_tools/README.md
index b0fbea5a..58a7c3d6 100644
--- a/llvm_tools/README.md
+++ b/llvm_tools/README.md
@@ -511,3 +511,26 @@ Usage:
It tries to autodetect a lot of things (e.g., sys-devel/llvm's path, the
"start"/"end" revisions to set, etc.) For more information, please see the
`--help`
+
+### `revert_checker.py`
+
+This script reports reverts which happen 'across' a certain LLVM commit.
+
+To clarify the meaning of 'across' with an example, if we had the following
+commit history (where `a -> b` notes that `b` is a direct child of `a`):
+
+123abc -> 223abc -> 323abc -> 423abc -> 523abc
+
+And where 423abc is a revert of 223abc, this revert is considered to be 'across'
+323abc. More generally, a revert A of a parent commit B is considered to be
+'across' a commit C if C is a parent of A and B is a parent of C.
+
+Usage example:
+
+```
+./revert_checker.py -C llvm-project-copy 123abc 223abc 323abc
+```
+
+In the above example, the tool will scan all commits between 123abc and 223abc,
+and all commits between 123abc and 323abc for reverts of commits which are
+parents of 123abc.
diff --git a/llvm_tools/revert_checker.py b/llvm_tools/revert_checker.py
new file mode 100755
index 00000000..bb9182b0
--- /dev/null
+++ b/llvm_tools/revert_checker.py
@@ -0,0 +1,241 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Copyright 2020 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Checks for reverts of commits across a given git commit.
+
+To clarify the meaning of 'across' with an example, if we had the following
+commit history (where `a -> b` notes that `b` is a direct child of `a`):
+
+123abc -> 223abc -> 323abc -> 423abc -> 523abc
+
+And where 423abc is a revert of 223abc, this revert is considered to be 'across'
+323abc. More generally, a revert A of a parent commit B is considered to be
+'across' a commit C if C is a parent of A and B is a parent of C.
+
+Please note that revert detection in general is really difficult, since merge
+conflicts/etc always introduce _some_ amount of fuzziness. This script just
+uses a bundle of heuristics, and is bound to ignore / incorrectly flag some
+reverts. The hope is that it'll easily catch the vast majority (>90%) of them,
+though.
+"""
+
+# pylint: disable=cros-logging-import
+
+from __future__ import print_function
+
+import argparse
+import collections
+import logging
+import re
+import subprocess
+import sys
+import typing as t
+
+# People are creative with their reverts, and heuristics are a bit difficult.
+# Like 90% of of reverts have "This reverts commit ${full_sha}".
+# Some lack that entirely, while others have many of them specified in ad-hoc
+# ways, while others use short SHAs and whatever.
+#
+# The 90% case is trivial to handle (and 100% free + automatic). The extra 10%
+# starts involving human intervention, which is probably not worth it for now.
+
+
+def _try_parse_reverts_from_commit_message(commit_message: str) -> t.List[str]:
+ if not commit_message:
+ return []
+
+ results = re.findall(r'This reverts commit ([a-f0-9]{40})\b', commit_message)
+
+ first_line = commit_message.splitlines()[0]
+ initial_revert = re.match(r'Revert ([a-f0-9]{6,}) "', first_line)
+ if initial_revert:
+ results.append(initial_revert.group(1))
+ return results
+
+
+def _stream_stdout(command: t.List[str]) -> t.Generator[str, None, None]:
+ with subprocess.Popen(
+ command, stdout=subprocess.PIPE, encoding='utf-8', errors='replace') as p:
+ yield from p.stdout
+
+
+def _resolve_sha(git_dir: str, sha: str) -> str:
+ if len(sha) == 40:
+ return sha
+
+ return subprocess.check_output(
+ ['git', '-C', git_dir, 'rev-parse', sha],
+ encoding='utf-8',
+ stderr=subprocess.DEVNULL,
+ ).strip()
+
+
+_LogEntry = t.NamedTuple('_LogEntry', [
+ ('sha', str),
+ ('commit_message', t.List[str]),
+])
+
+
+def _log_stream(git_dir: str, root_sha: str,
+ end_at_sha: str) -> t.Iterable[_LogEntry]:
+ sep = 50 * '<>'
+ log_command = [
+ 'git',
+ '-C',
+ git_dir,
+ 'log',
+ '^' + end_at_sha,
+ root_sha,
+ '--format=' + sep + '%n%H%n%B%n',
+ ]
+
+ stdout_stream = iter(_stream_stdout(log_command))
+
+ # Find the next separator line. If there's nothing to log, it may not exist.
+ # It might not be the first line if git feels complainy.
+ found_commit_header = False
+ for line in stdout_stream:
+ if line.rstrip() == sep:
+ found_commit_header = True
+ break
+
+ while found_commit_header:
+ # crbug.com/1041148
+ # pylint: disable=stop-iteration-return
+ sha = next(stdout_stream, None)
+ assert sha is not None, 'git died?'
+ sha = sha.rstrip()
+
+ commit_message = []
+
+ found_commit_header = False
+ for line in stdout_stream:
+ line = line.rstrip()
+ if line.rstrip() == sep:
+ found_commit_header = True
+ break
+ commit_message.append(line)
+
+ yield _LogEntry(sha, '\n'.join(commit_message).rstrip())
+
+
+def _shas_between(git_dir: str, base_ref: str,
+ head_ref: str) -> t.Iterable[str]:
+ rev_list = [
+ 'git',
+ '-C',
+ git_dir,
+ 'rev-list',
+ '--first-parent',
+ '%s..%s' % (base_ref, head_ref),
+ ]
+ return (x.strip() for x in _stream_stdout(rev_list))
+
+
+def _rev_parse(git_dir: str, ref: str) -> str:
+ result = subprocess.check_output(
+ ['git', '-C', git_dir, 'rev-parse', ref],
+ encoding='utf-8',
+ ).strip()
+ return t.cast(str, result)
+
+
+Revert = t.NamedTuple('Revert', [
+ ('sha', str),
+ ('reverted_sha', str),
+])
+
+
+def find_common_parent_commit(git_dir: str, ref_a: str, ref_b: str) -> str:
+ return subprocess.check_output(
+ ['git', '-C', git_dir, 'merge-base', ref_a, ref_b],
+ encoding='utf-8',
+ ).strip()
+
+
+def find_reverts(git_dir: str, across_ref: str, root: str) -> t.List[Revert]:
+ """Finds reverts across `across_ref` in `git_dir`, starting from `root`."""
+ across_sha = _rev_parse(git_dir, across_ref)
+ root_sha = _rev_parse(git_dir, root)
+
+ common_ancestor = find_common_parent_commit(git_dir, across_sha, root_sha)
+ if common_ancestor != across_sha:
+ raise ValueError("%s isn't an ancestor of %s (common ancestor: %s)" %
+ (across_sha, root_sha, common_ancestor))
+
+ intermediate_commits = set(_shas_between(git_dir, across_sha, root_sha))
+ assert across_ref not in intermediate_commits
+
+ logging.debug('%d commits appear between %s and %s',
+ len(intermediate_commits), across_sha, root_sha)
+
+ all_reverts = []
+ for sha, commit_message in _log_stream(git_dir, root_sha, across_sha):
+ reverts = _try_parse_reverts_from_commit_message(commit_message)
+ if not reverts:
+ continue
+
+ resolved_reverts = sorted(set(_resolve_sha(git_dir, x) for x in reverts))
+ for reverted_sha in resolved_reverts:
+ if reverted_sha in intermediate_commits:
+ logging.debug('Commit %s reverts %s, which happened after %s', sha,
+ reverted_sha, across_sha)
+ continue
+
+ try:
+ object_type = subprocess.check_output(
+ ['git', '-C', git_dir, 'cat-file', '-t', reverted_sha],
+ encoding='utf-8',
+ stderr=subprocess.DEVNULL,
+ ).strip()
+ except subprocess.CalledProcessError:
+ logging.warning(
+ 'Failed to resolve reverted object %s (claimed to be reverted '
+ 'by sha %s)', reverted_sha, sha)
+ continue
+
+ if object_type == 'commit':
+ all_reverts.append(Revert(sha, reverted_sha))
+ continue
+
+ logging.error("%s claims to revert %s -- which isn't a commit -- %s", sha,
+ object_type, reverted_sha)
+
+ return all_reverts
+
+
+def main(args: t.List[str]) -> int:
+ parser = argparse.ArgumentParser(
+ description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
+ parser.add_argument(
+ 'base_ref', help='Git ref or sha to check for reverts around.')
+ parser.add_argument(
+ '-C', '--git_dir', default='.', help='Git directory to use.')
+ parser.add_argument(
+ 'root', nargs='+', help='Root(s) to search for commits from.')
+ parser.add_argument('--debug', action='store_true')
+ opts = parser.parse_args(args)
+
+ logging.basicConfig(
+ format='%(asctime)s: %(levelname)s: %(filename)s:%(lineno)d: %(message)s',
+ level=logging.DEBUG if opts.debug else logging.INFO,
+ )
+
+ # `root`s can have related history, so we want to filter duplicate commits
+ # out. The overwhelmingly common case is also to have one root, and it's way
+ # easier to reason about output that comes in an order that's meaningful to
+ # git.
+ all_reverts = collections.OrderedDict()
+ for root in opts.root:
+ for revert in find_reverts(opts.git_dir, opts.base_ref, root):
+ all_reverts[revert] = None
+
+ for revert in all_reverts.keys():
+ print('%s claims to revert %s' % (revert.sha, revert.reverted_sha))
+
+
+if __name__ == '__main__':
+ sys.exit(main(sys.argv[1:]))
diff --git a/llvm_tools/revert_checker_test.py b/llvm_tools/revert_checker_test.py
new file mode 100755
index 00000000..16b3c3f6
--- /dev/null
+++ b/llvm_tools/revert_checker_test.py
@@ -0,0 +1,110 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Copyright 2020 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Tests for revert_checker."""
+
+from __future__ import print_function
+
+# pylint: disable=cros-logging-import
+import logging
+import unittest
+
+import llvm_project
+import revert_checker
+
+# pylint: disable=protected-access
+
+
+class _SilencingFilter(object):
+ """Silences all log messages.
+
+ Also collects info about log messages that would've been emitted.
+ """
+
+ def __init__(self):
+ self.messages = []
+
+ def filter(self, record):
+ self.messages.append(record.getMessage())
+ return 0
+
+
+class Test(unittest.TestCase):
+ """Tests for revert_checker."""
+
+ def silence_logging(self):
+ root = logging.getLogger()
+ filt = _SilencingFilter()
+ root.addFilter(filt)
+ self.addCleanup(root.removeFilter, filt)
+ return filt
+
+ def test_known_log_stream(self):
+ start_sha = 'e241573d5972d34a323fa5c64774c4207340beb3'
+ end_sha = 'a7a37517751ffb0f5529011b4ba96e67fcb27510'
+ commits = [
+ revert_checker._LogEntry(
+ 'e241573d5972d34a323fa5c64774c4207340beb3', '\n'.join((
+ '[mlir] NFC: remove IntegerValueSet / MutableIntegerSet',
+ '',
+ 'Summary:',
+ '- these are unused and really not needed now given flat '
+ 'affine',
+ ' constraints',
+ '',
+ 'Differential Revision: https://reviews.llvm.org/D75792',
+ ))),
+ revert_checker._LogEntry(
+ '97572fa6e9daecd648873496fd11f7d1e25a55f0',
+ '[NFC] use hasAnyOperatorName and hasAnyOverloadedOperatorName '
+ 'functions in clang-tidy matchers',
+ ),
+ ]
+
+ logs = list(
+ revert_checker._log_stream(
+ llvm_project.get_location(),
+ root_sha=start_sha,
+ end_at_sha=end_sha,
+ ))
+ self.assertEqual(commits, logs)
+
+ def test_reverted_noncommit_object_is_a_nop(self):
+ log_filter = self.silence_logging()
+ # c9944df916e41b1014dff5f6f75d52297b48ecdc mentions reverting a non-commit
+ # object. It sits between the given base_ref and root.
+ reverts = revert_checker.find_reverts(
+ git_dir=llvm_project.get_location(),
+ across_ref='c9944df916e41b1014dff5f6f75d52297b48ecdc~',
+ root='c9944df916e41b1014dff5f6f75d52297b48ecdc')
+ self.assertEqual(reverts, [])
+
+ complaint = ('Failed to resolve reverted object '
+ 'edd18355be574122aaa9abf58c15d8c50fb085a1')
+ self.assertTrue(
+ any(x.startswith(complaint) for x in log_filter.messages),
+ log_filter.messages)
+
+ def test_known_reverts_across_previous_llvm_next_rev(self):
+ # c9944df916e41b1014dff5f6f75d52297b48ecdc mentions reverting a non-commit
+ # object. It sits between the given base_ref and root.
+ reverts = revert_checker.find_reverts(
+ git_dir=llvm_project.get_location(),
+ across_ref='c47f971694be0159ffddfee8a75ae515eba91439',
+ root='9f981e9adf9c8d29bb80306daf08d2770263ade6')
+ self.assertEqual(reverts, [
+ revert_checker.Revert(
+ sha='9f981e9adf9c8d29bb80306daf08d2770263ade6',
+ reverted_sha='4060016fce3e6a0b926ee9fc59e440a612d3a2ec'),
+ revert_checker.Revert(
+ sha='4e0fe038f438ae1679eae9e156e1f248595b2373',
+ reverted_sha='65b21282c710afe9c275778820c6e3c1cf46734b'),
+ ])
+
+
+if __name__ == '__main__':
+ llvm_project.ensure_up_to_date()
+ unittest.main()