aboutsummaryrefslogtreecommitdiff
path: root/llvm_tools
diff options
context:
space:
mode:
Diffstat (limited to 'llvm_tools')
-rwxr-xr-xllvm_tools/git_llvm_rev.py352
-rwxr-xr-xllvm_tools/git_llvm_rev_test.py122
2 files changed, 474 insertions, 0 deletions
diff --git a/llvm_tools/git_llvm_rev.py b/llvm_tools/git_llvm_rev.py
new file mode 100755
index 00000000..4c9fed12
--- /dev/null
+++ b/llvm_tools/git_llvm_rev.py
@@ -0,0 +1,352 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Copyright 2019 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Maps LLVM git SHAs to synthetic revision numbers and back.
+
+Revision numbers are all of the form '(branch_name, r1234)'. As a shorthand,
+r1234 is parsed as '(master, 1234)'.
+"""
+
+import argparse
+import re
+import subprocess
+import sys
+import typing as t
+
+# Note that after base_llvm_sha, we reach The Wild West(TM) of commits.
+# So reasonable input that could break us includes:
+#
+# Revert foo
+#
+# This reverts foo, which had the commit message:
+#
+# bar
+# llvm-svn: 375505
+#
+# While saddening, this is something we should probably try to handle
+# reasonably.
+base_llvm_revision = 375505
+base_llvm_sha = '186155b89c2d2a2f62337081e3ca15f676c9434b'
+
+# Represents an LLVM git checkout:
+# - |dir| is the directory of the LLVM checkout
+# - |remote| is the name of the LLVM remote. Generally it's "origin".
+LLVMConfig = t.NamedTuple('LLVMConfig', (('remote', str), ('dir', str)))
+
+
+class Rev(t.NamedTuple('Rev', (('branch', str), ('number', int)))):
+ """Represents a LLVM 'revision', a shorthand identifies a LLVM commit."""
+
+ @staticmethod
+ def parse(rev: str) -> 'Rev':
+ """Parses a Rev from the given string.
+
+ Raises a ValueError on a failed parse.
+ """
+ # Revs are parsed into (${branch_name}, r${commits_since_base_commit})
+ # pairs.
+ #
+ # We support r${commits_since_base_commit} as shorthand for
+ # (master, r${commits_since_base_commit}).
+ if rev.startswith('r'):
+ branch_name = 'master'
+ rev_string = rev[1:]
+ else:
+ match = re.match(r'\((.+), r(\d+)\)', rev)
+ if not match:
+ raise ValueError("%r isn't a valid revision" % rev)
+
+ branch_name, rev_string = match.groups()
+
+ return Rev(branch=branch_name, number=int(rev_string))
+
+ def __str__(self) -> str:
+ branch_name, number = self
+ if branch_name == 'master':
+ return 'r%d' % number
+ return '(%s, r%d)' % (branch_name, number)
+
+
+def is_git_sha(xs: str) -> bool:
+ """Returns whether the given string looks like a valid git commit SHA."""
+ return len(xs) > 6 and len(xs) <= 40 and all(
+ x.isdigit() or 'a' <= x.lower() <= 'f' for x in xs)
+
+
+def check_output(command: t.List[str], cwd: str) -> str:
+ """Shorthand for subprocess.check_output. Auto-decodes any stdout."""
+ result = subprocess.run(
+ command,
+ cwd=cwd,
+ check=True,
+ stdin=subprocess.DEVNULL,
+ stdout=subprocess.PIPE,
+ encoding='utf-8',
+ )
+ return result.stdout
+
+
+def translate_prebase_sha_to_rev_number(llvm_config: LLVMConfig,
+ sha: str) -> int:
+ """Translates a sha to a revision number (e.g., "llvm-svn: 1234").
+
+ This function assumes that the given SHA is an ancestor of |base_llvm_sha|.
+ """
+ commit_message = check_output(
+ ['git', 'log', '-n1', '--format=%B', sha],
+ cwd=llvm_config.dir,
+ )
+ last_line = commit_message.strip().splitlines()[-1]
+ svn_match = re.match(r'^llvm-svn: (\d+)$', last_line)
+
+ if not svn_match:
+ raise ValueError(
+ f"No llvm-svn line found for {sha}, which... shouldn't happen?")
+
+ return int(svn_match.group(1))
+
+
+def translate_sha_to_rev(llvm_config: LLVMConfig, sha_or_ref: str) -> Rev:
+ """Translates a sha or git ref to a Rev."""
+
+ if is_git_sha(sha_or_ref):
+ sha = sha_or_ref
+ else:
+ sha = check_output(
+ ['git', 'rev-parse', sha_or_ref],
+ cwd=llvm_config.dir,
+ )
+ sha = sha.strip()
+
+ merge_base = check_output(
+ ['git', 'merge-base', base_llvm_sha, sha],
+ cwd=llvm_config.dir,
+ )
+ merge_base = merge_base.strip()
+
+ if merge_base == base_llvm_sha:
+ result = check_output(
+ ['git', 'rev-list', '--count', f'{base_llvm_sha}..{sha}'],
+ cwd=llvm_config.dir,
+ )
+ count = int(result.strip())
+ return Rev(branch='master', number=count + base_llvm_revision)
+
+ # Otherwise, either:
+ # - |merge_base| is |sha| (we have a guaranteed llvm-svn number on |sha|)
+ # - |merge_base| is neither (we have a guaranteed llvm-svn number on
+ # |merge_base|, but not |sha|)
+ merge_base_number = translate_prebase_sha_to_rev_number(
+ llvm_config, merge_base)
+ if merge_base == sha:
+ return Rev(branch='master', number=merge_base_number)
+
+ distance_from_base = check_output(
+ ['git', 'rev-list', '--count', f'{merge_base}..{sha}'],
+ cwd=llvm_config.dir,
+ )
+
+ revision_number = merge_base_number + int(distance_from_base.strip())
+ branches_containing = check_output(
+ ['git', 'branch', '-r', '--contains', sha],
+ cwd=llvm_config.dir,
+ )
+
+ candidates = []
+
+ prefix = llvm_config.remote + '/'
+ for branch in branches_containing.splitlines():
+ branch = branch.strip()
+ if branch.startswith(prefix):
+ candidates.append(branch[len(prefix):])
+
+ if not candidates:
+ raise ValueError(
+ f'No viable branches found from {llvm_config.remote} with {sha}')
+
+ if len(candidates) != 1:
+ raise ValueError(
+ f'Ambiguity: multiple branches from {llvm_config.remote} have {sha}: '
+ f'{sorted(candidates)}')
+
+ branch, = candidates
+ return Rev(branch=branch, number=revision_number)
+
+
+def parse_git_commit_messages(stream: t.Iterable[str],
+ separator: str) -> t.Iterable[t.Tuple[str, str]]:
+ """Parses a stream of git log messages.
+
+ These are expected to be in the format:
+
+ 40 character sha
+ commit
+ message
+ body
+ separator
+ 40 character sha
+ commit
+ message
+ body
+ separator
+ """
+
+ lines = iter(stream)
+ while True:
+ sha = next(lines, None)
+ if sha is None:
+ return
+
+ sha = sha.strip()
+ assert is_git_sha(sha), f'Invalid git SHA: {sha}'
+
+ message = []
+ for line in lines:
+ if line.strip() == separator:
+ break
+ message.append(line)
+
+ yield sha, ''.join(message)
+
+
+def translate_prebase_rev_to_sha(llvm_config: LLVMConfig, rev: Rev) -> str:
+ """Translates a Rev to a SHA.
+
+ This function assumes that the given rev refers to a commit that's an
+ ancestor of |base_llvm_sha|.
+ """
+ # Because reverts may include reverted commit messages, we can't just |-n1|
+ # and pick that.
+ separator = '>!' * 80
+ looking_for = f'llvm-svn: {rev.number}'
+
+ git_command = [
+ 'git', 'log', '--grep', f'^{looking_for}$', f'--format=%H%n%B{separator}',
+ base_llvm_sha
+ ]
+
+ subp = subprocess.Popen(
+ git_command,
+ cwd=llvm_config.dir,
+ stdin=subprocess.DEVNULL,
+ stdout=subprocess.PIPE,
+ encoding='utf-8',
+ )
+
+ with subp:
+ for sha, message in parse_git_commit_messages(subp.stdout, separator):
+ last_line = message.splitlines()[-1]
+ if last_line.strip() == looking_for:
+ subp.terminate()
+ return sha
+
+ if subp.returncode:
+ raise subprocess.CalledProcessError(subp.returncode, git_command)
+ raise ValueError(f'No commit with revision {rev} found')
+
+
+def translate_rev_to_sha(llvm_config: LLVMConfig, rev: Rev) -> str:
+ """Translates a Rev to a SHA.
+
+ Raises a ValueError if the given Rev doesn't exist in the given config.
+ """
+ branch, number = rev
+
+ if branch == 'master':
+ if number < base_llvm_revision:
+ return translate_prebase_rev_to_sha(llvm_config, rev)
+ base_sha = base_llvm_sha
+ base_revision_number = base_llvm_revision
+ else:
+ base_sha = check_output(
+ ['git', 'merge-base', base_llvm_sha, f'{llvm_config.remote}/{branch}'],
+ cwd=llvm_config.dir,
+ )
+ base_sha = base_sha.strip()
+ if base_sha == base_llvm_sha:
+ base_revision_number = base_llvm_revision
+ else:
+ base_revision_number = translate_prebase_sha_to_rev_number(
+ llvm_config, base_sha)
+
+ # Alternatively, we could |git log --format=%H|, but git is *super* fast
+ # about rev walking/counting locally compared to long |log|s, so we walk back
+ # twice.
+ head = check_output(
+ ['git', 'rev-parse', f'{llvm_config.remote}/{branch}'],
+ cwd=llvm_config.dir,
+ )
+ branch_head_sha = head.strip()
+
+ commit_number = number - base_revision_number
+ revs_between_str = check_output(
+ ['git', 'rev-list', '--count', f'{base_sha}..{branch_head_sha}'],
+ cwd=llvm_config.dir,
+ )
+ revs_between = int(revs_between_str.strip())
+
+ commits_behind_head = revs_between - commit_number
+ if commits_behind_head < 0:
+ raise ValueError(
+ f'Revision {rev} is past {llvm_config.remote}/{branch}. Try updating '
+ 'your tree?')
+
+ result = check_output(
+ ['git', 'rev-parse', f'{branch_head_sha}~{commits_behind_head}'],
+ cwd=llvm_config.dir,
+ )
+
+ return result.strip()
+
+
+def find_root_llvm_dir(root_dir: str = '.') -> str:
+ """Finds the root of an LLVM directory starting at |root_dir|.
+
+ Raises a subprocess.CalledProcessError if no git directory is found.
+ """
+ result = check_output(
+ ['git', 'rev-parse', '--show-toplevel'],
+ cwd=root_dir,
+ )
+ return result.strip()
+
+
+def main(argv: t.List[str]) -> None:
+ parser = argparse.ArgumentParser(description=__doc__)
+ parser.add_argument(
+ '--llvm_dir',
+ help='LLVM directory to consult for git history, etc. Autodetected '
+ 'if cwd is inside of an LLVM tree')
+ parser.add_argument(
+ '--upstream',
+ default='origin',
+ help="LLVM upstream's remote name. Defaults to %(default)s.")
+ sha_or_rev = parser.add_mutually_exclusive_group(required=True)
+ sha_or_rev.add_argument(
+ '--sha', help='A git SHA (or ref) to convert to a rev')
+ sha_or_rev.add_argument('--rev', help='A rev to convert into a sha')
+ opts = parser.parse_args(argv)
+
+ llvm_dir = opts.llvm_dir
+ if llvm_dir is None:
+ try:
+ llvm_dir = find_root_llvm_dir()
+ except subprocess.CalledProcessError:
+ parser.error("Couldn't autodetect an LLVM tree; please use --llvm_dir")
+
+ config = LLVMConfig(
+ remote=opts.upstream,
+ dir=opts.llvm_dir or find_root_llvm_dir(),
+ )
+
+ if opts.sha:
+ print(translate_sha_to_rev(config, opts.sha))
+ else:
+ print(translate_rev_to_sha(config, Rev.parse(opts.rev)))
+
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff --git a/llvm_tools/git_llvm_rev_test.py b/llvm_tools/git_llvm_rev_test.py
new file mode 100755
index 00000000..ebb654ff
--- /dev/null
+++ b/llvm_tools/git_llvm_rev_test.py
@@ -0,0 +1,122 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Copyright 2019 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import os
+import sys
+import unittest
+
+import git_llvm_rev
+
+
+def get_llvm_checkout() -> str:
+ my_dir = os.path.dirname(os.path.abspath(__file__))
+ return os.path.join(my_dir, 'llvm-project-copy')
+
+
+def get_llvm_config() -> git_llvm_rev.LLVMConfig:
+ return git_llvm_rev.LLVMConfig(dir=get_llvm_checkout(), remote='origin')
+
+
+class Test(unittest.TestCase):
+
+ def rev_to_sha_with_round_trip(self, rev: git_llvm_rev.Rev) -> str:
+ config = get_llvm_config()
+ sha = git_llvm_rev.translate_rev_to_sha(config, rev)
+ roundtrip_rev = git_llvm_rev.translate_sha_to_rev(config, sha)
+ self.assertEqual(roundtrip_rev, rev)
+ return sha
+
+ def test_sha_to_rev_on_base_sha_works(self) -> None:
+ sha = self.rev_to_sha_with_round_trip(
+ git_llvm_rev.Rev(
+ branch='master', number=git_llvm_rev.base_llvm_revision))
+ self.assertEqual(sha, git_llvm_rev.base_llvm_sha)
+
+ def test_sha_to_rev_prior_to_base_rev_works(self) -> None:
+ sha = self.rev_to_sha_with_round_trip(
+ git_llvm_rev.Rev(branch='master', number=375000))
+ self.assertEqual(sha, '2f6da767f13b8fd81f840c211d405fea32ac9db7')
+
+ def test_sha_to_rev_after_base_rev_works(self) -> None:
+ sha = self.rev_to_sha_with_round_trip(
+ git_llvm_rev.Rev(branch='master', number=375506))
+ self.assertEqual(sha, '3bf7fddeb05655d9baed4cc69e13535c677ed1dd')
+
+ def test_llvm_svn_parsing_runs_ignore_reverts(self) -> None:
+ # This commit has a revert that mentions the reverted llvm-svn in the
+ # commit message.
+
+ # Commit which performed the revert
+ sha = self.rev_to_sha_with_round_trip(
+ git_llvm_rev.Rev(branch='master', number=374895))
+ self.assertEqual(sha, '1731fc88d1fa1fa55edd056db73a339b415dd5d6')
+
+ # Commit that was reverted
+ sha = self.rev_to_sha_with_round_trip(
+ git_llvm_rev.Rev(branch='master', number=374841))
+ self.assertEqual(sha, '2a1386c81de504b5bda44fbecf3f7b4cdfd748fc')
+
+ def test_imaginary_revs_raise(self) -> None:
+ with self.assertRaises(ValueError) as r:
+ git_llvm_rev.translate_rev_to_sha(
+ get_llvm_config(), git_llvm_rev.Rev(branch='master', number=9999999))
+
+ self.assertIn('Try updating your tree?', str(r.exception))
+
+ # NOTE: The below tests have _zz_ in their name as an optimization. Iterating
+ # on a quick test is painful when these larger tests come before it and take
+ # 7secs to run. Python's unittest module guarantees tests are run in
+ # alphabetical order by their method name, so...
+ #
+ # If you're wondering, the slow part is `git branch -r --contains`. I imagine
+ # it's going to be very cold code, so I'm not inclined to optimize it much.
+
+ def test_zz_branch_revs_work_after_merge_points_and_svn_cutoff(self) -> None:
+ # Arbitrary 9.x commit without an attached llvm-svn: value.
+ sha = self.rev_to_sha_with_round_trip(
+ git_llvm_rev.Rev(branch='release/9.x', number=366670))
+ self.assertEqual(sha, '4e858e4ac00b59f064da4e1f7e276916e7d296aa')
+
+ def test_zz_branch_revs_work_at_merge_points(self) -> None:
+ rev_number = 366426
+ backing_sha = 'c89a3d78f43d81b9cff7b9248772ddf14d21b749'
+
+ sha = self.rev_to_sha_with_round_trip(
+ git_llvm_rev.Rev(branch='master', number=rev_number))
+ self.assertEqual(sha, backing_sha)
+
+ # Note that this won't round-trip: since this commit is on the master
+ # branch, we'll pick master for this. That's fine
+ sha = git_llvm_rev.translate_rev_to_sha(
+ get_llvm_config(),
+ git_llvm_rev.Rev(branch='release/9.x', number=rev_number))
+ self.assertEqual(sha, backing_sha)
+
+ def test_zz_branch_revs_work_after_merge_points(self) -> None:
+ # Picking the commit on the 9.x branch after the merge-base for that +
+ # master. Note that this is where llvm-svn numbers should diverge from
+ # ours, and are therefore untrustworthy. The commit for this *does* have a
+ # different `llvm-svn:` string than we should have.
+ sha = self.rev_to_sha_with_round_trip(
+ git_llvm_rev.Rev(branch='release/9.x', number=366427))
+ self.assertEqual(sha, '2cf681a11aea459b50d712abc7136f7129e4d57f')
+
+
+# FIXME: When release/10.x happens, it may be nice to have a test-case
+# generally covering that, since it's the first branch that we have to travel
+# back to the base commit for.
+
+if __name__ == '__main__':
+ # We have exactly one concrete target that we often have checked out anyway.
+ # Rather than building tests that hopefully match that target, use it
+ # directly.
+ if not os.path.isdir(get_llvm_checkout()):
+ print(
+ 'Please checkout llvm-project-copy to run these tests. A simple way '
+ 'to do that is running `./get_llvm_hash.py --llvm_version 370000`',
+ file=sys.stderr)
+ sys.exit(1)
+ unittest.main()