#!/usr/bin/env python3 # -*- coding: utf-8 -*- # Copyright 2019 The ChromiumOS Authors # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. """Maps LLVM git SHAs to synthetic revision numbers and back. Revision numbers are all of the form '(branch_name, r1234)'. As a shorthand, r1234 is parsed as '(main, 1234)'. """ import argparse import re import subprocess import sys import typing as t MAIN_BRANCH = "main" # Note that after base_llvm_sha, we reach The Wild West(TM) of commits. # So reasonable input that could break us includes: # # Revert foo # # This reverts foo, which had the commit message: # # bar # llvm-svn: 375505 # # While saddening, this is something we should probably try to handle # reasonably. base_llvm_revision = 375505 base_llvm_sha = "186155b89c2d2a2f62337081e3ca15f676c9434b" # Represents an LLVM git checkout: # - |dir| is the directory of the LLVM checkout # - |remote| is the name of the LLVM remote. Generally it's "origin". LLVMConfig = t.NamedTuple("LLVMConfig", (("remote", str), ("dir", str))) class Rev(t.NamedTuple("Rev", (("branch", str), ("number", int)))): """Represents a LLVM 'revision', a shorthand identifies a LLVM commit.""" @staticmethod def parse(rev: str) -> "Rev": """Parses a Rev from the given string. Raises a ValueError on a failed parse. """ # Revs are parsed into (${branch_name}, r${commits_since_base_commit}) # pairs. # # We support r${commits_since_base_commit} as shorthand for # (main, r${commits_since_base_commit}). if rev.startswith("r"): branch_name = MAIN_BRANCH rev_string = rev[1:] else: match = re.match(r"\((.+), r(\d+)\)", rev) if not match: raise ValueError("%r isn't a valid revision" % rev) branch_name, rev_string = match.groups() return Rev(branch=branch_name, number=int(rev_string)) def __str__(self) -> str: branch_name, number = self if branch_name == MAIN_BRANCH: return "r%d" % number return "(%s, r%d)" % (branch_name, number) def is_git_sha(xs: str) -> bool: """Returns whether the given string looks like a valid git commit SHA.""" return ( len(xs) > 6 and len(xs) <= 40 and all(x.isdigit() or "a" <= x.lower() <= "f" for x in xs) ) def check_output(command: t.List[str], cwd: str) -> str: """Shorthand for subprocess.check_output. Auto-decodes any stdout.""" result = subprocess.run( command, cwd=cwd, check=True, stdin=subprocess.DEVNULL, stdout=subprocess.PIPE, encoding="utf-8", ) return result.stdout def translate_prebase_sha_to_rev_number( llvm_config: LLVMConfig, sha: str ) -> int: """Translates a sha to a revision number (e.g., "llvm-svn: 1234"). This function assumes that the given SHA is an ancestor of |base_llvm_sha|. """ commit_message = check_output( ["git", "log", "-n1", "--format=%B", sha], cwd=llvm_config.dir, ) last_line = commit_message.strip().splitlines()[-1] svn_match = re.match(r"^llvm-svn: (\d+)$", last_line) if not svn_match: raise ValueError( f"No llvm-svn line found for {sha}, which... shouldn't happen?" ) return int(svn_match.group(1)) def translate_sha_to_rev(llvm_config: LLVMConfig, sha_or_ref: str) -> Rev: """Translates a sha or git ref to a Rev.""" if is_git_sha(sha_or_ref): sha = sha_or_ref else: sha = check_output( ["git", "rev-parse", sha_or_ref], cwd=llvm_config.dir, ) sha = sha.strip() merge_base = check_output( ["git", "merge-base", base_llvm_sha, sha], cwd=llvm_config.dir, ) merge_base = merge_base.strip() if merge_base == base_llvm_sha: result = check_output( [ "git", "rev-list", "--count", "--first-parent", f"{base_llvm_sha}..{sha}", ], cwd=llvm_config.dir, ) count = int(result.strip()) return Rev(branch=MAIN_BRANCH, number=count + base_llvm_revision) # Otherwise, either: # - |merge_base| is |sha| (we have a guaranteed llvm-svn number on |sha|) # - |merge_base| is neither (we have a guaranteed llvm-svn number on # |merge_base|, but not |sha|) merge_base_number = translate_prebase_sha_to_rev_number( llvm_config, merge_base ) if merge_base == sha: return Rev(branch=MAIN_BRANCH, number=merge_base_number) distance_from_base = check_output( [ "git", "rev-list", "--count", "--first-parent", f"{merge_base}..{sha}", ], cwd=llvm_config.dir, ) revision_number = merge_base_number + int(distance_from_base.strip()) branches_containing = check_output( ["git", "branch", "-r", "--contains", sha], cwd=llvm_config.dir, ) candidates = [] prefix = llvm_config.remote + "/" for branch in branches_containing.splitlines(): branch = branch.strip() if branch.startswith(prefix): candidates.append(branch[len(prefix) :]) if not candidates: raise ValueError( f"No viable branches found from {llvm_config.remote} with {sha}" ) # It seems that some `origin/release/.*` branches have # `origin/upstream/release/.*` equivalents, which is... awkward to deal with. # Prefer the latter, since that seems to have newer commits than the former. # Technically n^2, but len(elements) should be like, tens in the worst case. candidates = [x for x in candidates if f"upstream/{x}" not in candidates] if len(candidates) != 1: raise ValueError( f"Ambiguity: multiple branches from {llvm_config.remote} have {sha}: " f"{sorted(candidates)}" ) return Rev(branch=candidates[0], number=revision_number) def parse_git_commit_messages( stream: t.Iterable[str], separator: str ) -> t.Iterable[t.Tuple[str, str]]: """Parses a stream of git log messages. These are expected to be in the format: 40 character sha commit message body separator 40 character sha commit message body separator """ lines = iter(stream) while True: # Looks like a potential bug in pylint? crbug.com/1041148 # pylint: disable=stop-iteration-return sha = next(lines, None) if sha is None: return sha = sha.strip() assert is_git_sha(sha), f"Invalid git SHA: {sha}" message = [] for line in lines: if line.strip() == separator: break message.append(line) yield sha, "".join(message) def translate_prebase_rev_to_sha(llvm_config: LLVMConfig, rev: Rev) -> str: """Translates a Rev to a SHA. This function assumes that the given rev refers to a commit that's an ancestor of |base_llvm_sha|. """ # Because reverts may include reverted commit messages, we can't just |-n1| # and pick that. separator = ">!" * 80 looking_for = f"llvm-svn: {rev.number}" git_command = [ "git", "log", "--grep", f"^{looking_for}$", f"--format=%H%n%B{separator}", base_llvm_sha, ] subp = subprocess.Popen( git_command, cwd=llvm_config.dir, stdin=subprocess.DEVNULL, stdout=subprocess.PIPE, encoding="utf-8", ) with subp: for sha, message in parse_git_commit_messages(subp.stdout, separator): last_line = message.splitlines()[-1] if last_line.strip() == looking_for: subp.terminate() return sha if subp.returncode: raise subprocess.CalledProcessError(subp.returncode, git_command) raise ValueError(f"No commit with revision {rev} found") def translate_rev_to_sha(llvm_config: LLVMConfig, rev: Rev) -> str: """Translates a Rev to a SHA. Raises a ValueError if the given Rev doesn't exist in the given config. """ branch, number = rev if branch == MAIN_BRANCH: if number < base_llvm_revision: return translate_prebase_rev_to_sha(llvm_config, rev) base_sha = base_llvm_sha base_revision_number = base_llvm_revision else: base_sha = check_output( [ "git", "merge-base", base_llvm_sha, f"{llvm_config.remote}/{branch}", ], cwd=llvm_config.dir, ) base_sha = base_sha.strip() if base_sha == base_llvm_sha: base_revision_number = base_llvm_revision else: base_revision_number = translate_prebase_sha_to_rev_number( llvm_config, base_sha ) # Alternatively, we could |git log --format=%H|, but git is *super* fast # about rev walking/counting locally compared to long |log|s, so we walk back # twice. head = check_output( ["git", "rev-parse", f"{llvm_config.remote}/{branch}"], cwd=llvm_config.dir, ) branch_head_sha = head.strip() commit_number = number - base_revision_number revs_between_str = check_output( [ "git", "rev-list", "--count", "--first-parent", f"{base_sha}..{branch_head_sha}", ], cwd=llvm_config.dir, ) revs_between = int(revs_between_str.strip()) commits_behind_head = revs_between - commit_number if commits_behind_head < 0: raise ValueError( f"Revision {rev} is past {llvm_config.remote}/{branch}. Try updating " "your tree?" ) result = check_output( ["git", "rev-parse", f"{branch_head_sha}~{commits_behind_head}"], cwd=llvm_config.dir, ) return result.strip() def find_root_llvm_dir(root_dir: str = ".") -> str: """Finds the root of an LLVM directory starting at |root_dir|. Raises a subprocess.CalledProcessError if no git directory is found. """ result = check_output( ["git", "rev-parse", "--show-toplevel"], cwd=root_dir, ) return result.strip() def main(argv: t.List[str]) -> None: parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( "--llvm_dir", help="LLVM directory to consult for git history, etc. Autodetected " "if cwd is inside of an LLVM tree", ) parser.add_argument( "--upstream", default="origin", help="LLVM upstream's remote name. Defaults to %(default)s.", ) sha_or_rev = parser.add_mutually_exclusive_group(required=True) sha_or_rev.add_argument( "--sha", help="A git SHA (or ref) to convert to a rev" ) sha_or_rev.add_argument("--rev", help="A rev to convert into a sha") opts = parser.parse_args(argv) llvm_dir = opts.llvm_dir if llvm_dir is None: try: llvm_dir = find_root_llvm_dir() except subprocess.CalledProcessError: parser.error( "Couldn't autodetect an LLVM tree; please use --llvm_dir" ) config = LLVMConfig( remote=opts.upstream, dir=opts.llvm_dir or find_root_llvm_dir(), ) if opts.sha: rev = translate_sha_to_rev(config, opts.sha) print(rev) else: sha = translate_rev_to_sha(config, Rev.parse(opts.rev)) print(sha) if __name__ == "__main__": main(sys.argv[1:])