diff options
Diffstat (limited to 'llvm_tools')
-rwxr-xr-x | llvm_tools/git_llvm_rev.py | 352 | ||||
-rwxr-xr-x | llvm_tools/git_llvm_rev_test.py | 122 |
2 files changed, 474 insertions, 0 deletions
diff --git a/llvm_tools/git_llvm_rev.py b/llvm_tools/git_llvm_rev.py new file mode 100755 index 00000000..4c9fed12 --- /dev/null +++ b/llvm_tools/git_llvm_rev.py @@ -0,0 +1,352 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# Copyright 2019 The Chromium OS Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""Maps LLVM git SHAs to synthetic revision numbers and back. + +Revision numbers are all of the form '(branch_name, r1234)'. As a shorthand, +r1234 is parsed as '(master, 1234)'. +""" + +import argparse +import re +import subprocess +import sys +import typing as t + +# Note that after base_llvm_sha, we reach The Wild West(TM) of commits. +# So reasonable input that could break us includes: +# +# Revert foo +# +# This reverts foo, which had the commit message: +# +# bar +# llvm-svn: 375505 +# +# While saddening, this is something we should probably try to handle +# reasonably. +base_llvm_revision = 375505 +base_llvm_sha = '186155b89c2d2a2f62337081e3ca15f676c9434b' + +# Represents an LLVM git checkout: +# - |dir| is the directory of the LLVM checkout +# - |remote| is the name of the LLVM remote. Generally it's "origin". +LLVMConfig = t.NamedTuple('LLVMConfig', (('remote', str), ('dir', str))) + + +class Rev(t.NamedTuple('Rev', (('branch', str), ('number', int)))): + """Represents a LLVM 'revision', a shorthand identifies a LLVM commit.""" + + @staticmethod + def parse(rev: str) -> 'Rev': + """Parses a Rev from the given string. + + Raises a ValueError on a failed parse. + """ + # Revs are parsed into (${branch_name}, r${commits_since_base_commit}) + # pairs. + # + # We support r${commits_since_base_commit} as shorthand for + # (master, r${commits_since_base_commit}). + if rev.startswith('r'): + branch_name = 'master' + rev_string = rev[1:] + else: + match = re.match(r'\((.+), r(\d+)\)', rev) + if not match: + raise ValueError("%r isn't a valid revision" % rev) + + branch_name, rev_string = match.groups() + + return Rev(branch=branch_name, number=int(rev_string)) + + def __str__(self) -> str: + branch_name, number = self + if branch_name == 'master': + return 'r%d' % number + return '(%s, r%d)' % (branch_name, number) + + +def is_git_sha(xs: str) -> bool: + """Returns whether the given string looks like a valid git commit SHA.""" + return len(xs) > 6 and len(xs) <= 40 and all( + x.isdigit() or 'a' <= x.lower() <= 'f' for x in xs) + + +def check_output(command: t.List[str], cwd: str) -> str: + """Shorthand for subprocess.check_output. Auto-decodes any stdout.""" + result = subprocess.run( + command, + cwd=cwd, + check=True, + stdin=subprocess.DEVNULL, + stdout=subprocess.PIPE, + encoding='utf-8', + ) + return result.stdout + + +def translate_prebase_sha_to_rev_number(llvm_config: LLVMConfig, + sha: str) -> int: + """Translates a sha to a revision number (e.g., "llvm-svn: 1234"). + + This function assumes that the given SHA is an ancestor of |base_llvm_sha|. + """ + commit_message = check_output( + ['git', 'log', '-n1', '--format=%B', sha], + cwd=llvm_config.dir, + ) + last_line = commit_message.strip().splitlines()[-1] + svn_match = re.match(r'^llvm-svn: (\d+)$', last_line) + + if not svn_match: + raise ValueError( + f"No llvm-svn line found for {sha}, which... shouldn't happen?") + + return int(svn_match.group(1)) + + +def translate_sha_to_rev(llvm_config: LLVMConfig, sha_or_ref: str) -> Rev: + """Translates a sha or git ref to a Rev.""" + + if is_git_sha(sha_or_ref): + sha = sha_or_ref + else: + sha = check_output( + ['git', 'rev-parse', sha_or_ref], + cwd=llvm_config.dir, + ) + sha = sha.strip() + + merge_base = check_output( + ['git', 'merge-base', base_llvm_sha, sha], + cwd=llvm_config.dir, + ) + merge_base = merge_base.strip() + + if merge_base == base_llvm_sha: + result = check_output( + ['git', 'rev-list', '--count', f'{base_llvm_sha}..{sha}'], + cwd=llvm_config.dir, + ) + count = int(result.strip()) + return Rev(branch='master', number=count + base_llvm_revision) + + # Otherwise, either: + # - |merge_base| is |sha| (we have a guaranteed llvm-svn number on |sha|) + # - |merge_base| is neither (we have a guaranteed llvm-svn number on + # |merge_base|, but not |sha|) + merge_base_number = translate_prebase_sha_to_rev_number( + llvm_config, merge_base) + if merge_base == sha: + return Rev(branch='master', number=merge_base_number) + + distance_from_base = check_output( + ['git', 'rev-list', '--count', f'{merge_base}..{sha}'], + cwd=llvm_config.dir, + ) + + revision_number = merge_base_number + int(distance_from_base.strip()) + branches_containing = check_output( + ['git', 'branch', '-r', '--contains', sha], + cwd=llvm_config.dir, + ) + + candidates = [] + + prefix = llvm_config.remote + '/' + for branch in branches_containing.splitlines(): + branch = branch.strip() + if branch.startswith(prefix): + candidates.append(branch[len(prefix):]) + + if not candidates: + raise ValueError( + f'No viable branches found from {llvm_config.remote} with {sha}') + + if len(candidates) != 1: + raise ValueError( + f'Ambiguity: multiple branches from {llvm_config.remote} have {sha}: ' + f'{sorted(candidates)}') + + branch, = candidates + return Rev(branch=branch, number=revision_number) + + +def parse_git_commit_messages(stream: t.Iterable[str], + separator: str) -> t.Iterable[t.Tuple[str, str]]: + """Parses a stream of git log messages. + + These are expected to be in the format: + + 40 character sha + commit + message + body + separator + 40 character sha + commit + message + body + separator + """ + + lines = iter(stream) + while True: + sha = next(lines, None) + if sha is None: + return + + sha = sha.strip() + assert is_git_sha(sha), f'Invalid git SHA: {sha}' + + message = [] + for line in lines: + if line.strip() == separator: + break + message.append(line) + + yield sha, ''.join(message) + + +def translate_prebase_rev_to_sha(llvm_config: LLVMConfig, rev: Rev) -> str: + """Translates a Rev to a SHA. + + This function assumes that the given rev refers to a commit that's an + ancestor of |base_llvm_sha|. + """ + # Because reverts may include reverted commit messages, we can't just |-n1| + # and pick that. + separator = '>!' * 80 + looking_for = f'llvm-svn: {rev.number}' + + git_command = [ + 'git', 'log', '--grep', f'^{looking_for}$', f'--format=%H%n%B{separator}', + base_llvm_sha + ] + + subp = subprocess.Popen( + git_command, + cwd=llvm_config.dir, + stdin=subprocess.DEVNULL, + stdout=subprocess.PIPE, + encoding='utf-8', + ) + + with subp: + for sha, message in parse_git_commit_messages(subp.stdout, separator): + last_line = message.splitlines()[-1] + if last_line.strip() == looking_for: + subp.terminate() + return sha + + if subp.returncode: + raise subprocess.CalledProcessError(subp.returncode, git_command) + raise ValueError(f'No commit with revision {rev} found') + + +def translate_rev_to_sha(llvm_config: LLVMConfig, rev: Rev) -> str: + """Translates a Rev to a SHA. + + Raises a ValueError if the given Rev doesn't exist in the given config. + """ + branch, number = rev + + if branch == 'master': + if number < base_llvm_revision: + return translate_prebase_rev_to_sha(llvm_config, rev) + base_sha = base_llvm_sha + base_revision_number = base_llvm_revision + else: + base_sha = check_output( + ['git', 'merge-base', base_llvm_sha, f'{llvm_config.remote}/{branch}'], + cwd=llvm_config.dir, + ) + base_sha = base_sha.strip() + if base_sha == base_llvm_sha: + base_revision_number = base_llvm_revision + else: + base_revision_number = translate_prebase_sha_to_rev_number( + llvm_config, base_sha) + + # Alternatively, we could |git log --format=%H|, but git is *super* fast + # about rev walking/counting locally compared to long |log|s, so we walk back + # twice. + head = check_output( + ['git', 'rev-parse', f'{llvm_config.remote}/{branch}'], + cwd=llvm_config.dir, + ) + branch_head_sha = head.strip() + + commit_number = number - base_revision_number + revs_between_str = check_output( + ['git', 'rev-list', '--count', f'{base_sha}..{branch_head_sha}'], + cwd=llvm_config.dir, + ) + revs_between = int(revs_between_str.strip()) + + commits_behind_head = revs_between - commit_number + if commits_behind_head < 0: + raise ValueError( + f'Revision {rev} is past {llvm_config.remote}/{branch}. Try updating ' + 'your tree?') + + result = check_output( + ['git', 'rev-parse', f'{branch_head_sha}~{commits_behind_head}'], + cwd=llvm_config.dir, + ) + + return result.strip() + + +def find_root_llvm_dir(root_dir: str = '.') -> str: + """Finds the root of an LLVM directory starting at |root_dir|. + + Raises a subprocess.CalledProcessError if no git directory is found. + """ + result = check_output( + ['git', 'rev-parse', '--show-toplevel'], + cwd=root_dir, + ) + return result.strip() + + +def main(argv: t.List[str]) -> None: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + '--llvm_dir', + help='LLVM directory to consult for git history, etc. Autodetected ' + 'if cwd is inside of an LLVM tree') + parser.add_argument( + '--upstream', + default='origin', + help="LLVM upstream's remote name. Defaults to %(default)s.") + sha_or_rev = parser.add_mutually_exclusive_group(required=True) + sha_or_rev.add_argument( + '--sha', help='A git SHA (or ref) to convert to a rev') + sha_or_rev.add_argument('--rev', help='A rev to convert into a sha') + opts = parser.parse_args(argv) + + llvm_dir = opts.llvm_dir + if llvm_dir is None: + try: + llvm_dir = find_root_llvm_dir() + except subprocess.CalledProcessError: + parser.error("Couldn't autodetect an LLVM tree; please use --llvm_dir") + + config = LLVMConfig( + remote=opts.upstream, + dir=opts.llvm_dir or find_root_llvm_dir(), + ) + + if opts.sha: + print(translate_sha_to_rev(config, opts.sha)) + else: + print(translate_rev_to_sha(config, Rev.parse(opts.rev))) + + +if __name__ == '__main__': + main(sys.argv[1:]) diff --git a/llvm_tools/git_llvm_rev_test.py b/llvm_tools/git_llvm_rev_test.py new file mode 100755 index 00000000..ebb654ff --- /dev/null +++ b/llvm_tools/git_llvm_rev_test.py @@ -0,0 +1,122 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# Copyright 2019 The Chromium OS Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import os +import sys +import unittest + +import git_llvm_rev + + +def get_llvm_checkout() -> str: + my_dir = os.path.dirname(os.path.abspath(__file__)) + return os.path.join(my_dir, 'llvm-project-copy') + + +def get_llvm_config() -> git_llvm_rev.LLVMConfig: + return git_llvm_rev.LLVMConfig(dir=get_llvm_checkout(), remote='origin') + + +class Test(unittest.TestCase): + + def rev_to_sha_with_round_trip(self, rev: git_llvm_rev.Rev) -> str: + config = get_llvm_config() + sha = git_llvm_rev.translate_rev_to_sha(config, rev) + roundtrip_rev = git_llvm_rev.translate_sha_to_rev(config, sha) + self.assertEqual(roundtrip_rev, rev) + return sha + + def test_sha_to_rev_on_base_sha_works(self) -> None: + sha = self.rev_to_sha_with_round_trip( + git_llvm_rev.Rev( + branch='master', number=git_llvm_rev.base_llvm_revision)) + self.assertEqual(sha, git_llvm_rev.base_llvm_sha) + + def test_sha_to_rev_prior_to_base_rev_works(self) -> None: + sha = self.rev_to_sha_with_round_trip( + git_llvm_rev.Rev(branch='master', number=375000)) + self.assertEqual(sha, '2f6da767f13b8fd81f840c211d405fea32ac9db7') + + def test_sha_to_rev_after_base_rev_works(self) -> None: + sha = self.rev_to_sha_with_round_trip( + git_llvm_rev.Rev(branch='master', number=375506)) + self.assertEqual(sha, '3bf7fddeb05655d9baed4cc69e13535c677ed1dd') + + def test_llvm_svn_parsing_runs_ignore_reverts(self) -> None: + # This commit has a revert that mentions the reverted llvm-svn in the + # commit message. + + # Commit which performed the revert + sha = self.rev_to_sha_with_round_trip( + git_llvm_rev.Rev(branch='master', number=374895)) + self.assertEqual(sha, '1731fc88d1fa1fa55edd056db73a339b415dd5d6') + + # Commit that was reverted + sha = self.rev_to_sha_with_round_trip( + git_llvm_rev.Rev(branch='master', number=374841)) + self.assertEqual(sha, '2a1386c81de504b5bda44fbecf3f7b4cdfd748fc') + + def test_imaginary_revs_raise(self) -> None: + with self.assertRaises(ValueError) as r: + git_llvm_rev.translate_rev_to_sha( + get_llvm_config(), git_llvm_rev.Rev(branch='master', number=9999999)) + + self.assertIn('Try updating your tree?', str(r.exception)) + + # NOTE: The below tests have _zz_ in their name as an optimization. Iterating + # on a quick test is painful when these larger tests come before it and take + # 7secs to run. Python's unittest module guarantees tests are run in + # alphabetical order by their method name, so... + # + # If you're wondering, the slow part is `git branch -r --contains`. I imagine + # it's going to be very cold code, so I'm not inclined to optimize it much. + + def test_zz_branch_revs_work_after_merge_points_and_svn_cutoff(self) -> None: + # Arbitrary 9.x commit without an attached llvm-svn: value. + sha = self.rev_to_sha_with_round_trip( + git_llvm_rev.Rev(branch='release/9.x', number=366670)) + self.assertEqual(sha, '4e858e4ac00b59f064da4e1f7e276916e7d296aa') + + def test_zz_branch_revs_work_at_merge_points(self) -> None: + rev_number = 366426 + backing_sha = 'c89a3d78f43d81b9cff7b9248772ddf14d21b749' + + sha = self.rev_to_sha_with_round_trip( + git_llvm_rev.Rev(branch='master', number=rev_number)) + self.assertEqual(sha, backing_sha) + + # Note that this won't round-trip: since this commit is on the master + # branch, we'll pick master for this. That's fine + sha = git_llvm_rev.translate_rev_to_sha( + get_llvm_config(), + git_llvm_rev.Rev(branch='release/9.x', number=rev_number)) + self.assertEqual(sha, backing_sha) + + def test_zz_branch_revs_work_after_merge_points(self) -> None: + # Picking the commit on the 9.x branch after the merge-base for that + + # master. Note that this is where llvm-svn numbers should diverge from + # ours, and are therefore untrustworthy. The commit for this *does* have a + # different `llvm-svn:` string than we should have. + sha = self.rev_to_sha_with_round_trip( + git_llvm_rev.Rev(branch='release/9.x', number=366427)) + self.assertEqual(sha, '2cf681a11aea459b50d712abc7136f7129e4d57f') + + +# FIXME: When release/10.x happens, it may be nice to have a test-case +# generally covering that, since it's the first branch that we have to travel +# back to the base commit for. + +if __name__ == '__main__': + # We have exactly one concrete target that we often have checked out anyway. + # Rather than building tests that hopefully match that target, use it + # directly. + if not os.path.isdir(get_llvm_checkout()): + print( + 'Please checkout llvm-project-copy to run these tests. A simple way ' + 'to do that is running `./get_llvm_hash.py --llvm_version 370000`', + file=sys.stderr) + sys.exit(1) + unittest.main() |