diff options
Diffstat (limited to 'llvm_tools/revert_checker.py')
-rwxr-xr-x | llvm_tools/revert_checker.py | 409 |
1 files changed, 230 insertions, 179 deletions
diff --git a/llvm_tools/revert_checker.py b/llvm_tools/revert_checker.py index acc8b5fa..17914ba8 100755 --- a/llvm_tools/revert_checker.py +++ b/llvm_tools/revert_checker.py @@ -1,12 +1,12 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -#===----------------------------------------------------------------------===## +# ===----------------------------------------------------------------------===## # # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # -#===----------------------------------------------------------------------===## +# ===----------------------------------------------------------------------===## # # !!!!!!!!!!!! NOTE !!!!!!!!!!!! # This is copied directly from upstream LLVM. Please make any changes upstream, @@ -51,9 +51,10 @@ import logging import re import subprocess import sys -from typing import Generator, List, NamedTuple, Iterable +from typing import Generator, Iterable, List, NamedTuple -assert sys.version_info >= (3, 6), 'Only Python 3.6+ is supported.' + +assert sys.version_info >= (3, 6), "Only Python 3.6+ is supported." # People are creative with their reverts, and heuristics are a bit difficult. # Like 90% of of reverts have "This reverts commit ${full_sha}". @@ -65,206 +66,256 @@ assert sys.version_info >= (3, 6), 'Only Python 3.6+ is supported.' def _try_parse_reverts_from_commit_message(commit_message: str) -> List[str]: - if not commit_message: - return [] + if not commit_message: + return [] - results = re.findall(r'This reverts commit ([a-f0-9]{40})\b', commit_message) + results = re.findall( + r"This reverts commit ([a-f0-9]{40})\b", commit_message + ) - first_line = commit_message.splitlines()[0] - initial_revert = re.match(r'Revert ([a-f0-9]{6,}) "', first_line) - if initial_revert: - results.append(initial_revert.group(1)) - return results + first_line = commit_message.splitlines()[0] + initial_revert = re.match(r'Revert ([a-f0-9]{6,}) "', first_line) + if initial_revert: + results.append(initial_revert.group(1)) + return results def _stream_stdout(command: List[str]) -> Generator[str, None, None]: - with subprocess.Popen( - command, stdout=subprocess.PIPE, encoding='utf-8', errors='replace') as p: - assert p.stdout is not None # for mypy's happiness. - yield from p.stdout + with subprocess.Popen( + command, stdout=subprocess.PIPE, encoding="utf-8", errors="replace" + ) as p: + assert p.stdout is not None # for mypy's happiness. + yield from p.stdout def _resolve_sha(git_dir: str, sha: str) -> str: - if len(sha) == 40: - return sha - - return subprocess.check_output( - ['git', '-C', git_dir, 'rev-parse', sha], - encoding='utf-8', - stderr=subprocess.DEVNULL, - ).strip() - - -_LogEntry = NamedTuple('_LogEntry', [ - ('sha', str), - ('commit_message', str), -]) - - -def _log_stream(git_dir: str, root_sha: str, - end_at_sha: str) -> Iterable[_LogEntry]: - sep = 50 * '<>' - log_command = [ - 'git', - '-C', - git_dir, - 'log', - '^' + end_at_sha, - root_sha, - '--format=' + sep + '%n%H%n%B%n', - ] - - stdout_stream = iter(_stream_stdout(log_command)) - - # Find the next separator line. If there's nothing to log, it may not exist. - # It might not be the first line if git feels complainy. - found_commit_header = False - for line in stdout_stream: - if line.rstrip() == sep: - found_commit_header = True - break - - while found_commit_header: - sha = next(stdout_stream, None) - assert sha is not None, 'git died?' - sha = sha.rstrip() - - commit_message = [] - + if len(sha) == 40: + return sha + + return subprocess.check_output( + ["git", "-C", git_dir, "rev-parse", sha], + encoding="utf-8", + stderr=subprocess.DEVNULL, + ).strip() + + +_LogEntry = NamedTuple( + "_LogEntry", + [ + ("sha", str), + ("commit_message", str), + ], +) + + +def _log_stream( + git_dir: str, root_sha: str, end_at_sha: str +) -> Iterable[_LogEntry]: + sep = 50 * "<>" + log_command = [ + "git", + "-C", + git_dir, + "log", + "^" + end_at_sha, + root_sha, + "--format=" + sep + "%n%H%n%B%n", + ] + + stdout_stream = iter(_stream_stdout(log_command)) + + # Find the next separator line. If there's nothing to log, it may not exist. + # It might not be the first line if git feels complainy. found_commit_header = False for line in stdout_stream: - line = line.rstrip() - if line.rstrip() == sep: - found_commit_header = True - break - commit_message.append(line) + if line.rstrip() == sep: + found_commit_header = True + break + + while found_commit_header: + sha = next(stdout_stream, None) + assert sha is not None, "git died?" + sha = sha.rstrip() + + commit_message = [] + + found_commit_header = False + for line in stdout_stream: + line = line.rstrip() + if line.rstrip() == sep: + found_commit_header = True + break + commit_message.append(line) - yield _LogEntry(sha, '\n'.join(commit_message).rstrip()) + yield _LogEntry(sha, "\n".join(commit_message).rstrip()) def _shas_between(git_dir: str, base_ref: str, head_ref: str) -> Iterable[str]: - rev_list = [ - 'git', - '-C', - git_dir, - 'rev-list', - '--first-parent', - f'{base_ref}..{head_ref}', - ] - return (x.strip() for x in _stream_stdout(rev_list)) + rev_list = [ + "git", + "-C", + git_dir, + "rev-list", + "--first-parent", + f"{base_ref}..{head_ref}", + ] + return (x.strip() for x in _stream_stdout(rev_list)) def _rev_parse(git_dir: str, ref: str) -> str: - return subprocess.check_output( - ['git', '-C', git_dir, 'rev-parse', ref], - encoding='utf-8', - ).strip() + return subprocess.check_output( + ["git", "-C", git_dir, "rev-parse", ref], + encoding="utf-8", + ).strip() -Revert = NamedTuple('Revert', [ - ('sha', str), - ('reverted_sha', str), -]) +Revert = NamedTuple( + "Revert", + [ + ("sha", str), + ("reverted_sha", str), + ], +) def _find_common_parent_commit(git_dir: str, ref_a: str, ref_b: str) -> str: - """Finds the closest common parent commit between `ref_a` and `ref_b`.""" - return subprocess.check_output( - ['git', '-C', git_dir, 'merge-base', ref_a, ref_b], - encoding='utf-8', - ).strip() + """Finds the closest common parent commit between `ref_a` and `ref_b`.""" + return subprocess.check_output( + ["git", "-C", git_dir, "merge-base", ref_a, ref_b], + encoding="utf-8", + ).strip() def find_reverts(git_dir: str, across_ref: str, root: str) -> List[Revert]: - """Finds reverts across `across_ref` in `git_dir`, starting from `root`. - - These reverts are returned in order of oldest reverts first. - """ - across_sha = _rev_parse(git_dir, across_ref) - root_sha = _rev_parse(git_dir, root) - - common_ancestor = _find_common_parent_commit(git_dir, across_sha, root_sha) - if common_ancestor != across_sha: - raise ValueError(f"{across_sha} isn't an ancestor of {root_sha} " - '(common ancestor: {common_ancestor})') - - intermediate_commits = set(_shas_between(git_dir, across_sha, root_sha)) - assert across_sha not in intermediate_commits - - logging.debug('%d commits appear between %s and %s', - len(intermediate_commits), across_sha, root_sha) - - all_reverts = [] - for sha, commit_message in _log_stream(git_dir, root_sha, across_sha): - reverts = _try_parse_reverts_from_commit_message(commit_message) - if not reverts: - continue - - resolved_reverts = sorted(set(_resolve_sha(git_dir, x) for x in reverts)) - for reverted_sha in resolved_reverts: - if reverted_sha in intermediate_commits: - logging.debug('Commit %s reverts %s, which happened after %s', sha, - reverted_sha, across_sha) - continue - - try: - object_type = subprocess.check_output( - ['git', '-C', git_dir, 'cat-file', '-t', reverted_sha], - encoding='utf-8', - stderr=subprocess.DEVNULL, - ).strip() - except subprocess.CalledProcessError: - logging.warning( - 'Failed to resolve reverted object %s (claimed to be reverted ' - 'by sha %s)', reverted_sha, sha) - continue - - if object_type == 'commit': - all_reverts.append(Revert(sha, reverted_sha)) - continue - - logging.error("%s claims to revert %s -- which isn't a commit -- %s", sha, - object_type, reverted_sha) - - # Since `all_reverts` contains reverts in log order (e.g., newer comes before - # older), we need to reverse this to keep with our guarantee of older = - # earlier in the result. - all_reverts.reverse() - return all_reverts + """Finds reverts across `across_ref` in `git_dir`, starting from `root`. + + These reverts are returned in order of oldest reverts first. + """ + across_sha = _rev_parse(git_dir, across_ref) + root_sha = _rev_parse(git_dir, root) + + common_ancestor = _find_common_parent_commit(git_dir, across_sha, root_sha) + if common_ancestor != across_sha: + raise ValueError( + f"{across_sha} isn't an ancestor of {root_sha} " + "(common ancestor: {common_ancestor})" + ) + + intermediate_commits = set(_shas_between(git_dir, across_sha, root_sha)) + assert across_sha not in intermediate_commits + + logging.debug( + "%d commits appear between %s and %s", + len(intermediate_commits), + across_sha, + root_sha, + ) + + all_reverts = [] + for sha, commit_message in _log_stream(git_dir, root_sha, across_sha): + reverts = _try_parse_reverts_from_commit_message(commit_message) + if not reverts: + continue + + resolved_reverts = sorted( + set(_resolve_sha(git_dir, x) for x in reverts) + ) + for reverted_sha in resolved_reverts: + if reverted_sha in intermediate_commits: + logging.debug( + "Commit %s reverts %s, which happened after %s", + sha, + reverted_sha, + across_sha, + ) + continue + + try: + object_type = subprocess.check_output( + ["git", "-C", git_dir, "cat-file", "-t", reverted_sha], + encoding="utf-8", + stderr=subprocess.DEVNULL, + ).strip() + except subprocess.CalledProcessError: + logging.warning( + "Failed to resolve reverted object %s (claimed to be reverted " + "by sha %s)", + reverted_sha, + sha, + ) + continue + + if object_type == "commit": + all_reverts.append(Revert(sha, reverted_sha)) + continue + + logging.error( + "%s claims to revert %s -- which isn't a commit -- %s", + sha, + object_type, + reverted_sha, + ) + + # Since `all_reverts` contains reverts in log order (e.g., newer comes before + # older), we need to reverse this to keep with our guarantee of older = + # earlier in the result. + all_reverts.reverse() + return all_reverts def _main() -> None: - parser = argparse.ArgumentParser( - description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) - parser.add_argument( - 'base_ref', help='Git ref or sha to check for reverts around.') - parser.add_argument( - '-C', '--git_dir', default='.', help='Git directory to use.') - parser.add_argument( - 'root', nargs='+', help='Root(s) to search for commits from.') - parser.add_argument('--debug', action='store_true') - opts = parser.parse_args() - - logging.basicConfig( - format='%(asctime)s: %(levelname)s: %(filename)s:%(lineno)d: %(message)s', - level=logging.DEBUG if opts.debug else logging.INFO, - ) - - # `root`s can have related history, so we want to filter duplicate commits - # out. The overwhelmingly common case is also to have one root, and it's way - # easier to reason about output that comes in an order that's meaningful to - # git. - seen_reverts = set() - all_reverts = [] - for root in opts.root: - for revert in find_reverts(opts.git_dir, opts.base_ref, root): - if revert not in seen_reverts: - seen_reverts.add(revert) - all_reverts.append(revert) - - for revert in all_reverts: - print(f'{revert.sha} claims to revert {revert.reverted_sha}') - - -if __name__ == '__main__': - _main() + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument( + "base_ref", help="Git ref or sha to check for reverts around." + ) + parser.add_argument( + "-C", "--git_dir", default=".", help="Git directory to use." + ) + parser.add_argument( + "root", nargs="+", help="Root(s) to search for commits from." + ) + parser.add_argument("--debug", action="store_true") + parser.add_argument( + "-u", + "--review_url", + action="store_true", + help="Format SHAs as llvm review URLs", + ) + opts = parser.parse_args() + + logging.basicConfig( + format="%(asctime)s: %(levelname)s: %(filename)s:%(lineno)d: %(message)s", + level=logging.DEBUG if opts.debug else logging.INFO, + ) + + # `root`s can have related history, so we want to filter duplicate commits + # out. The overwhelmingly common case is also to have one root, and it's way + # easier to reason about output that comes in an order that's meaningful to + # git. + seen_reverts = set() + all_reverts = [] + for root in opts.root: + for revert in find_reverts(opts.git_dir, opts.base_ref, root): + if revert not in seen_reverts: + seen_reverts.add(revert) + all_reverts.append(revert) + + for revert in all_reverts: + sha_fmt = ( + f"https://reviews.llvm.org/rG{revert.sha}" + if opts.review_url + else revert.sha + ) + reverted_sha_fmt = ( + f"https://reviews.llvm.org/rG{revert.reverted_sha}" + if opts.review_url + else revert.reverted_sha + ) + print(f"{sha_fmt} claims to revert {reverted_sha_fmt}") + + +if __name__ == "__main__": + _main() |