aboutsummaryrefslogtreecommitdiff
path: root/llvm_tools/revert_checker.py
diff options
context:
space:
mode:
Diffstat (limited to 'llvm_tools/revert_checker.py')
-rwxr-xr-xllvm_tools/revert_checker.py241
1 files changed, 0 insertions, 241 deletions
diff --git a/llvm_tools/revert_checker.py b/llvm_tools/revert_checker.py
deleted file mode 100755
index bb9182b0..00000000
--- a/llvm_tools/revert_checker.py
+++ /dev/null
@@ -1,241 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-# Copyright 2020 The Chromium OS Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""Checks for reverts of commits across a given git commit.
-
-To clarify the meaning of 'across' with an example, if we had the following
-commit history (where `a -> b` notes that `b` is a direct child of `a`):
-
-123abc -> 223abc -> 323abc -> 423abc -> 523abc
-
-And where 423abc is a revert of 223abc, this revert is considered to be 'across'
-323abc. More generally, a revert A of a parent commit B is considered to be
-'across' a commit C if C is a parent of A and B is a parent of C.
-
-Please note that revert detection in general is really difficult, since merge
-conflicts/etc always introduce _some_ amount of fuzziness. This script just
-uses a bundle of heuristics, and is bound to ignore / incorrectly flag some
-reverts. The hope is that it'll easily catch the vast majority (>90%) of them,
-though.
-"""
-
-# pylint: disable=cros-logging-import
-
-from __future__ import print_function
-
-import argparse
-import collections
-import logging
-import re
-import subprocess
-import sys
-import typing as t
-
-# People are creative with their reverts, and heuristics are a bit difficult.
-# Like 90% of of reverts have "This reverts commit ${full_sha}".
-# Some lack that entirely, while others have many of them specified in ad-hoc
-# ways, while others use short SHAs and whatever.
-#
-# The 90% case is trivial to handle (and 100% free + automatic). The extra 10%
-# starts involving human intervention, which is probably not worth it for now.
-
-
-def _try_parse_reverts_from_commit_message(commit_message: str) -> t.List[str]:
- if not commit_message:
- return []
-
- results = re.findall(r'This reverts commit ([a-f0-9]{40})\b', commit_message)
-
- first_line = commit_message.splitlines()[0]
- initial_revert = re.match(r'Revert ([a-f0-9]{6,}) "', first_line)
- if initial_revert:
- results.append(initial_revert.group(1))
- return results
-
-
-def _stream_stdout(command: t.List[str]) -> t.Generator[str, None, None]:
- with subprocess.Popen(
- command, stdout=subprocess.PIPE, encoding='utf-8', errors='replace') as p:
- yield from p.stdout
-
-
-def _resolve_sha(git_dir: str, sha: str) -> str:
- if len(sha) == 40:
- return sha
-
- return subprocess.check_output(
- ['git', '-C', git_dir, 'rev-parse', sha],
- encoding='utf-8',
- stderr=subprocess.DEVNULL,
- ).strip()
-
-
-_LogEntry = t.NamedTuple('_LogEntry', [
- ('sha', str),
- ('commit_message', t.List[str]),
-])
-
-
-def _log_stream(git_dir: str, root_sha: str,
- end_at_sha: str) -> t.Iterable[_LogEntry]:
- sep = 50 * '<>'
- log_command = [
- 'git',
- '-C',
- git_dir,
- 'log',
- '^' + end_at_sha,
- root_sha,
- '--format=' + sep + '%n%H%n%B%n',
- ]
-
- stdout_stream = iter(_stream_stdout(log_command))
-
- # Find the next separator line. If there's nothing to log, it may not exist.
- # It might not be the first line if git feels complainy.
- found_commit_header = False
- for line in stdout_stream:
- if line.rstrip() == sep:
- found_commit_header = True
- break
-
- while found_commit_header:
- # crbug.com/1041148
- # pylint: disable=stop-iteration-return
- sha = next(stdout_stream, None)
- assert sha is not None, 'git died?'
- sha = sha.rstrip()
-
- commit_message = []
-
- found_commit_header = False
- for line in stdout_stream:
- line = line.rstrip()
- if line.rstrip() == sep:
- found_commit_header = True
- break
- commit_message.append(line)
-
- yield _LogEntry(sha, '\n'.join(commit_message).rstrip())
-
-
-def _shas_between(git_dir: str, base_ref: str,
- head_ref: str) -> t.Iterable[str]:
- rev_list = [
- 'git',
- '-C',
- git_dir,
- 'rev-list',
- '--first-parent',
- '%s..%s' % (base_ref, head_ref),
- ]
- return (x.strip() for x in _stream_stdout(rev_list))
-
-
-def _rev_parse(git_dir: str, ref: str) -> str:
- result = subprocess.check_output(
- ['git', '-C', git_dir, 'rev-parse', ref],
- encoding='utf-8',
- ).strip()
- return t.cast(str, result)
-
-
-Revert = t.NamedTuple('Revert', [
- ('sha', str),
- ('reverted_sha', str),
-])
-
-
-def find_common_parent_commit(git_dir: str, ref_a: str, ref_b: str) -> str:
- return subprocess.check_output(
- ['git', '-C', git_dir, 'merge-base', ref_a, ref_b],
- encoding='utf-8',
- ).strip()
-
-
-def find_reverts(git_dir: str, across_ref: str, root: str) -> t.List[Revert]:
- """Finds reverts across `across_ref` in `git_dir`, starting from `root`."""
- across_sha = _rev_parse(git_dir, across_ref)
- root_sha = _rev_parse(git_dir, root)
-
- common_ancestor = find_common_parent_commit(git_dir, across_sha, root_sha)
- if common_ancestor != across_sha:
- raise ValueError("%s isn't an ancestor of %s (common ancestor: %s)" %
- (across_sha, root_sha, common_ancestor))
-
- intermediate_commits = set(_shas_between(git_dir, across_sha, root_sha))
- assert across_ref not in intermediate_commits
-
- logging.debug('%d commits appear between %s and %s',
- len(intermediate_commits), across_sha, root_sha)
-
- all_reverts = []
- for sha, commit_message in _log_stream(git_dir, root_sha, across_sha):
- reverts = _try_parse_reverts_from_commit_message(commit_message)
- if not reverts:
- continue
-
- resolved_reverts = sorted(set(_resolve_sha(git_dir, x) for x in reverts))
- for reverted_sha in resolved_reverts:
- if reverted_sha in intermediate_commits:
- logging.debug('Commit %s reverts %s, which happened after %s', sha,
- reverted_sha, across_sha)
- continue
-
- try:
- object_type = subprocess.check_output(
- ['git', '-C', git_dir, 'cat-file', '-t', reverted_sha],
- encoding='utf-8',
- stderr=subprocess.DEVNULL,
- ).strip()
- except subprocess.CalledProcessError:
- logging.warning(
- 'Failed to resolve reverted object %s (claimed to be reverted '
- 'by sha %s)', reverted_sha, sha)
- continue
-
- if object_type == 'commit':
- all_reverts.append(Revert(sha, reverted_sha))
- continue
-
- logging.error("%s claims to revert %s -- which isn't a commit -- %s", sha,
- object_type, reverted_sha)
-
- return all_reverts
-
-
-def main(args: t.List[str]) -> int:
- parser = argparse.ArgumentParser(
- description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
- parser.add_argument(
- 'base_ref', help='Git ref or sha to check for reverts around.')
- parser.add_argument(
- '-C', '--git_dir', default='.', help='Git directory to use.')
- parser.add_argument(
- 'root', nargs='+', help='Root(s) to search for commits from.')
- parser.add_argument('--debug', action='store_true')
- opts = parser.parse_args(args)
-
- logging.basicConfig(
- format='%(asctime)s: %(levelname)s: %(filename)s:%(lineno)d: %(message)s',
- level=logging.DEBUG if opts.debug else logging.INFO,
- )
-
- # `root`s can have related history, so we want to filter duplicate commits
- # out. The overwhelmingly common case is also to have one root, and it's way
- # easier to reason about output that comes in an order that's meaningful to
- # git.
- all_reverts = collections.OrderedDict()
- for root in opts.root:
- for revert in find_reverts(opts.git_dir, opts.base_ref, root):
- all_reverts[revert] = None
-
- for revert in all_reverts.keys():
- print('%s claims to revert %s' % (revert.sha, revert.reverted_sha))
-
-
-if __name__ == '__main__':
- sys.exit(main(sys.argv[1:]))