diff options
Diffstat (limited to 'llvm_tools/git_llvm_rev.py')
-rwxr-xr-x | llvm_tools/git_llvm_rev.py | 647 |
1 files changed, 336 insertions, 311 deletions
diff --git a/llvm_tools/git_llvm_rev.py b/llvm_tools/git_llvm_rev.py index b62b26e2..3dc34fce 100755 --- a/llvm_tools/git_llvm_rev.py +++ b/llvm_tools/git_llvm_rev.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -# Copyright 2019 The Chromium OS Authors. All rights reserved. +# Copyright 2019 The ChromiumOS Authors # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. @@ -10,7 +10,6 @@ Revision numbers are all of the form '(branch_name, r1234)'. As a shorthand, r1234 is parsed as '(main, 1234)'. """ -from __future__ import print_function import argparse import re @@ -18,7 +17,8 @@ import subprocess import sys import typing as t -MAIN_BRANCH = 'main' + +MAIN_BRANCH = "main" # Note that after base_llvm_sha, we reach The Wild West(TM) of commits. # So reasonable input that could break us includes: @@ -33,350 +33,375 @@ MAIN_BRANCH = 'main' # While saddening, this is something we should probably try to handle # reasonably. base_llvm_revision = 375505 -base_llvm_sha = '186155b89c2d2a2f62337081e3ca15f676c9434b' +base_llvm_sha = "186155b89c2d2a2f62337081e3ca15f676c9434b" # Represents an LLVM git checkout: # - |dir| is the directory of the LLVM checkout # - |remote| is the name of the LLVM remote. Generally it's "origin". -LLVMConfig = t.NamedTuple('LLVMConfig', (('remote', str), ('dir', str))) +LLVMConfig = t.NamedTuple("LLVMConfig", (("remote", str), ("dir", str))) -class Rev(t.NamedTuple('Rev', (('branch', str), ('number', int)))): - """Represents a LLVM 'revision', a shorthand identifies a LLVM commit.""" +class Rev(t.NamedTuple("Rev", (("branch", str), ("number", int)))): + """Represents a LLVM 'revision', a shorthand identifies a LLVM commit.""" - @staticmethod - def parse(rev: str) -> 'Rev': - """Parses a Rev from the given string. + @staticmethod + def parse(rev: str) -> "Rev": + """Parses a Rev from the given string. - Raises a ValueError on a failed parse. - """ - # Revs are parsed into (${branch_name}, r${commits_since_base_commit}) - # pairs. - # - # We support r${commits_since_base_commit} as shorthand for - # (main, r${commits_since_base_commit}). - if rev.startswith('r'): - branch_name = MAIN_BRANCH - rev_string = rev[1:] - else: - match = re.match(r'\((.+), r(\d+)\)', rev) - if not match: - raise ValueError("%r isn't a valid revision" % rev) + Raises a ValueError on a failed parse. + """ + # Revs are parsed into (${branch_name}, r${commits_since_base_commit}) + # pairs. + # + # We support r${commits_since_base_commit} as shorthand for + # (main, r${commits_since_base_commit}). + if rev.startswith("r"): + branch_name = MAIN_BRANCH + rev_string = rev[1:] + else: + match = re.match(r"\((.+), r(\d+)\)", rev) + if not match: + raise ValueError("%r isn't a valid revision" % rev) - branch_name, rev_string = match.groups() + branch_name, rev_string = match.groups() - return Rev(branch=branch_name, number=int(rev_string)) + return Rev(branch=branch_name, number=int(rev_string)) - def __str__(self) -> str: - branch_name, number = self - if branch_name == MAIN_BRANCH: - return 'r%d' % number - return '(%s, r%d)' % (branch_name, number) + def __str__(self) -> str: + branch_name, number = self + if branch_name == MAIN_BRANCH: + return "r%d" % number + return "(%s, r%d)" % (branch_name, number) def is_git_sha(xs: str) -> bool: - """Returns whether the given string looks like a valid git commit SHA.""" - return len(xs) > 6 and len(xs) <= 40 and all( - x.isdigit() or 'a' <= x.lower() <= 'f' for x in xs) + """Returns whether the given string looks like a valid git commit SHA.""" + return ( + len(xs) > 6 + and len(xs) <= 40 + and all(x.isdigit() or "a" <= x.lower() <= "f" for x in xs) + ) def check_output(command: t.List[str], cwd: str) -> str: - """Shorthand for subprocess.check_output. Auto-decodes any stdout.""" - result = subprocess.run( - command, - cwd=cwd, - check=True, - stdin=subprocess.DEVNULL, - stdout=subprocess.PIPE, - encoding='utf-8', - ) - return result.stdout - - -def translate_prebase_sha_to_rev_number(llvm_config: LLVMConfig, - sha: str) -> int: - """Translates a sha to a revision number (e.g., "llvm-svn: 1234"). - - This function assumes that the given SHA is an ancestor of |base_llvm_sha|. - """ - commit_message = check_output( - ['git', 'log', '-n1', '--format=%B', sha], - cwd=llvm_config.dir, - ) - last_line = commit_message.strip().splitlines()[-1] - svn_match = re.match(r'^llvm-svn: (\d+)$', last_line) - - if not svn_match: - raise ValueError( - f"No llvm-svn line found for {sha}, which... shouldn't happen?") - - return int(svn_match.group(1)) + """Shorthand for subprocess.check_output. Auto-decodes any stdout.""" + result = subprocess.run( + command, + cwd=cwd, + check=True, + stdin=subprocess.DEVNULL, + stdout=subprocess.PIPE, + encoding="utf-8", + ) + return result.stdout -def translate_sha_to_rev(llvm_config: LLVMConfig, sha_or_ref: str) -> Rev: - """Translates a sha or git ref to a Rev.""" +def translate_prebase_sha_to_rev_number( + llvm_config: LLVMConfig, sha: str +) -> int: + """Translates a sha to a revision number (e.g., "llvm-svn: 1234"). - if is_git_sha(sha_or_ref): - sha = sha_or_ref - else: - sha = check_output( - ['git', 'rev-parse', sha_or_ref], + This function assumes that the given SHA is an ancestor of |base_llvm_sha|. + """ + commit_message = check_output( + ["git", "log", "-n1", "--format=%B", sha], cwd=llvm_config.dir, ) - sha = sha.strip() + last_line = commit_message.strip().splitlines()[-1] + svn_match = re.match(r"^llvm-svn: (\d+)$", last_line) - merge_base = check_output( - ['git', 'merge-base', base_llvm_sha, sha], - cwd=llvm_config.dir, - ) - merge_base = merge_base.strip() + if not svn_match: + raise ValueError( + f"No llvm-svn line found for {sha}, which... shouldn't happen?" + ) - if merge_base == base_llvm_sha: - result = check_output( + return int(svn_match.group(1)) + + +def translate_sha_to_rev(llvm_config: LLVMConfig, sha_or_ref: str) -> Rev: + """Translates a sha or git ref to a Rev.""" + + if is_git_sha(sha_or_ref): + sha = sha_or_ref + else: + sha = check_output( + ["git", "rev-parse", sha_or_ref], + cwd=llvm_config.dir, + ) + sha = sha.strip() + + merge_base = check_output( + ["git", "merge-base", base_llvm_sha, sha], + cwd=llvm_config.dir, + ) + merge_base = merge_base.strip() + + if merge_base == base_llvm_sha: + result = check_output( + [ + "git", + "rev-list", + "--count", + "--first-parent", + f"{base_llvm_sha}..{sha}", + ], + cwd=llvm_config.dir, + ) + count = int(result.strip()) + return Rev(branch=MAIN_BRANCH, number=count + base_llvm_revision) + + # Otherwise, either: + # - |merge_base| is |sha| (we have a guaranteed llvm-svn number on |sha|) + # - |merge_base| is neither (we have a guaranteed llvm-svn number on + # |merge_base|, but not |sha|) + merge_base_number = translate_prebase_sha_to_rev_number( + llvm_config, merge_base + ) + if merge_base == sha: + return Rev(branch=MAIN_BRANCH, number=merge_base_number) + + distance_from_base = check_output( [ - 'git', - 'rev-list', - '--count', - '--first-parent', - f'{base_llvm_sha}..{sha}', + "git", + "rev-list", + "--count", + "--first-parent", + f"{merge_base}..{sha}", ], cwd=llvm_config.dir, ) - count = int(result.strip()) - return Rev(branch=MAIN_BRANCH, number=count + base_llvm_revision) - - # Otherwise, either: - # - |merge_base| is |sha| (we have a guaranteed llvm-svn number on |sha|) - # - |merge_base| is neither (we have a guaranteed llvm-svn number on - # |merge_base|, but not |sha|) - merge_base_number = translate_prebase_sha_to_rev_number( - llvm_config, merge_base) - if merge_base == sha: - return Rev(branch=MAIN_BRANCH, number=merge_base_number) - - distance_from_base = check_output( - [ - 'git', - 'rev-list', - '--count', - '--first-parent', - f'{merge_base}..{sha}', - ], - cwd=llvm_config.dir, - ) - - revision_number = merge_base_number + int(distance_from_base.strip()) - branches_containing = check_output( - ['git', 'branch', '-r', '--contains', sha], - cwd=llvm_config.dir, - ) - - candidates = [] - - prefix = llvm_config.remote + '/' - for branch in branches_containing.splitlines(): - branch = branch.strip() - if branch.startswith(prefix): - candidates.append(branch[len(prefix):]) - - if not candidates: - raise ValueError( - f'No viable branches found from {llvm_config.remote} with {sha}') - - # It seems that some `origin/release/.*` branches have - # `origin/upstream/release/.*` equivalents, which is... awkward to deal with. - # Prefer the latter, since that seems to have newer commits than the former. - # Technically n^2, but len(elements) should be like, tens in the worst case. - candidates = [x for x in candidates if f'upstream/{x}' not in candidates] - if len(candidates) != 1: - raise ValueError( - f'Ambiguity: multiple branches from {llvm_config.remote} have {sha}: ' - f'{sorted(candidates)}') - - return Rev(branch=candidates[0], number=revision_number) - - -def parse_git_commit_messages(stream: t.Iterable[str], - separator: str) -> t.Iterable[t.Tuple[str, str]]: - """Parses a stream of git log messages. - - These are expected to be in the format: - - 40 character sha - commit - message - body - separator - 40 character sha - commit - message - body - separator - """ - - lines = iter(stream) - while True: - # Looks like a potential bug in pylint? crbug.com/1041148 - # pylint: disable=stop-iteration-return - sha = next(lines, None) - if sha is None: - return - - sha = sha.strip() - assert is_git_sha(sha), f'Invalid git SHA: {sha}' - - message = [] - for line in lines: - if line.strip() == separator: - break - message.append(line) - - yield sha, ''.join(message) + + revision_number = merge_base_number + int(distance_from_base.strip()) + branches_containing = check_output( + ["git", "branch", "-r", "--contains", sha], + cwd=llvm_config.dir, + ) + + candidates = [] + + prefix = llvm_config.remote + "/" + for branch in branches_containing.splitlines(): + branch = branch.strip() + if branch.startswith(prefix): + candidates.append(branch[len(prefix) :]) + + if not candidates: + raise ValueError( + f"No viable branches found from {llvm_config.remote} with {sha}" + ) + + # It seems that some `origin/release/.*` branches have + # `origin/upstream/release/.*` equivalents, which is... awkward to deal with. + # Prefer the latter, since that seems to have newer commits than the former. + # Technically n^2, but len(elements) should be like, tens in the worst case. + candidates = [x for x in candidates if f"upstream/{x}" not in candidates] + if len(candidates) != 1: + raise ValueError( + f"Ambiguity: multiple branches from {llvm_config.remote} have {sha}: " + f"{sorted(candidates)}" + ) + + return Rev(branch=candidates[0], number=revision_number) + + +def parse_git_commit_messages( + stream: t.Iterable[str], separator: str +) -> t.Iterable[t.Tuple[str, str]]: + """Parses a stream of git log messages. + + These are expected to be in the format: + + 40 character sha + commit + message + body + separator + 40 character sha + commit + message + body + separator + """ + + lines = iter(stream) + while True: + # Looks like a potential bug in pylint? crbug.com/1041148 + # pylint: disable=stop-iteration-return + sha = next(lines, None) + if sha is None: + return + + sha = sha.strip() + assert is_git_sha(sha), f"Invalid git SHA: {sha}" + + message = [] + for line in lines: + if line.strip() == separator: + break + message.append(line) + + yield sha, "".join(message) def translate_prebase_rev_to_sha(llvm_config: LLVMConfig, rev: Rev) -> str: - """Translates a Rev to a SHA. - - This function assumes that the given rev refers to a commit that's an - ancestor of |base_llvm_sha|. - """ - # Because reverts may include reverted commit messages, we can't just |-n1| - # and pick that. - separator = '>!' * 80 - looking_for = f'llvm-svn: {rev.number}' - - git_command = [ - 'git', 'log', '--grep', f'^{looking_for}$', f'--format=%H%n%B{separator}', - base_llvm_sha - ] - - subp = subprocess.Popen( - git_command, - cwd=llvm_config.dir, - stdin=subprocess.DEVNULL, - stdout=subprocess.PIPE, - encoding='utf-8', - ) - - with subp: - for sha, message in parse_git_commit_messages(subp.stdout, separator): - last_line = message.splitlines()[-1] - if last_line.strip() == looking_for: - subp.terminate() - return sha - - if subp.returncode: - raise subprocess.CalledProcessError(subp.returncode, git_command) - raise ValueError(f'No commit with revision {rev} found') + """Translates a Rev to a SHA. + + This function assumes that the given rev refers to a commit that's an + ancestor of |base_llvm_sha|. + """ + # Because reverts may include reverted commit messages, we can't just |-n1| + # and pick that. + separator = ">!" * 80 + looking_for = f"llvm-svn: {rev.number}" + + git_command = [ + "git", + "log", + "--grep", + f"^{looking_for}$", + f"--format=%H%n%B{separator}", + base_llvm_sha, + ] + + subp = subprocess.Popen( + git_command, + cwd=llvm_config.dir, + stdin=subprocess.DEVNULL, + stdout=subprocess.PIPE, + encoding="utf-8", + ) + + with subp: + for sha, message in parse_git_commit_messages(subp.stdout, separator): + last_line = message.splitlines()[-1] + if last_line.strip() == looking_for: + subp.terminate() + return sha + + if subp.returncode: + raise subprocess.CalledProcessError(subp.returncode, git_command) + raise ValueError(f"No commit with revision {rev} found") def translate_rev_to_sha(llvm_config: LLVMConfig, rev: Rev) -> str: - """Translates a Rev to a SHA. - - Raises a ValueError if the given Rev doesn't exist in the given config. - """ - branch, number = rev - - if branch == MAIN_BRANCH: - if number < base_llvm_revision: - return translate_prebase_rev_to_sha(llvm_config, rev) - base_sha = base_llvm_sha - base_revision_number = base_llvm_revision - else: - base_sha = check_output( - ['git', 'merge-base', base_llvm_sha, f'{llvm_config.remote}/{branch}'], + """Translates a Rev to a SHA. + + Raises a ValueError if the given Rev doesn't exist in the given config. + """ + branch, number = rev + + if branch == MAIN_BRANCH: + if number < base_llvm_revision: + return translate_prebase_rev_to_sha(llvm_config, rev) + base_sha = base_llvm_sha + base_revision_number = base_llvm_revision + else: + base_sha = check_output( + [ + "git", + "merge-base", + base_llvm_sha, + f"{llvm_config.remote}/{branch}", + ], + cwd=llvm_config.dir, + ) + base_sha = base_sha.strip() + if base_sha == base_llvm_sha: + base_revision_number = base_llvm_revision + else: + base_revision_number = translate_prebase_sha_to_rev_number( + llvm_config, base_sha + ) + + # Alternatively, we could |git log --format=%H|, but git is *super* fast + # about rev walking/counting locally compared to long |log|s, so we walk back + # twice. + head = check_output( + ["git", "rev-parse", f"{llvm_config.remote}/{branch}"], + cwd=llvm_config.dir, + ) + branch_head_sha = head.strip() + + commit_number = number - base_revision_number + revs_between_str = check_output( + [ + "git", + "rev-list", + "--count", + "--first-parent", + f"{base_sha}..{branch_head_sha}", + ], + cwd=llvm_config.dir, + ) + revs_between = int(revs_between_str.strip()) + + commits_behind_head = revs_between - commit_number + if commits_behind_head < 0: + raise ValueError( + f"Revision {rev} is past {llvm_config.remote}/{branch}. Try updating " + "your tree?" + ) + + result = check_output( + ["git", "rev-parse", f"{branch_head_sha}~{commits_behind_head}"], cwd=llvm_config.dir, ) - base_sha = base_sha.strip() - if base_sha == base_llvm_sha: - base_revision_number = base_llvm_revision - else: - base_revision_number = translate_prebase_sha_to_rev_number( - llvm_config, base_sha) - - # Alternatively, we could |git log --format=%H|, but git is *super* fast - # about rev walking/counting locally compared to long |log|s, so we walk back - # twice. - head = check_output( - ['git', 'rev-parse', f'{llvm_config.remote}/{branch}'], - cwd=llvm_config.dir, - ) - branch_head_sha = head.strip() - - commit_number = number - base_revision_number - revs_between_str = check_output( - [ - 'git', - 'rev-list', - '--count', - '--first-parent', - f'{base_sha}..{branch_head_sha}', - ], - cwd=llvm_config.dir, - ) - revs_between = int(revs_between_str.strip()) - - commits_behind_head = revs_between - commit_number - if commits_behind_head < 0: - raise ValueError( - f'Revision {rev} is past {llvm_config.remote}/{branch}. Try updating ' - 'your tree?') - - result = check_output( - ['git', 'rev-parse', f'{branch_head_sha}~{commits_behind_head}'], - cwd=llvm_config.dir, - ) - - return result.strip() - - -def find_root_llvm_dir(root_dir: str = '.') -> str: - """Finds the root of an LLVM directory starting at |root_dir|. - - Raises a subprocess.CalledProcessError if no git directory is found. - """ - result = check_output( - ['git', 'rev-parse', '--show-toplevel'], - cwd=root_dir, - ) - return result.strip() + + return result.strip() + + +def find_root_llvm_dir(root_dir: str = ".") -> str: + """Finds the root of an LLVM directory starting at |root_dir|. + + Raises a subprocess.CalledProcessError if no git directory is found. + """ + result = check_output( + ["git", "rev-parse", "--show-toplevel"], + cwd=root_dir, + ) + return result.strip() def main(argv: t.List[str]) -> None: - parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument( - '--llvm_dir', - help='LLVM directory to consult for git history, etc. Autodetected ' - 'if cwd is inside of an LLVM tree') - parser.add_argument( - '--upstream', - default='origin', - help="LLVM upstream's remote name. Defaults to %(default)s.") - sha_or_rev = parser.add_mutually_exclusive_group(required=True) - sha_or_rev.add_argument( - '--sha', help='A git SHA (or ref) to convert to a rev') - sha_or_rev.add_argument('--rev', help='A rev to convert into a sha') - opts = parser.parse_args(argv) - - llvm_dir = opts.llvm_dir - if llvm_dir is None: - try: - llvm_dir = find_root_llvm_dir() - except subprocess.CalledProcessError: - parser.error("Couldn't autodetect an LLVM tree; please use --llvm_dir") - - config = LLVMConfig( - remote=opts.upstream, - dir=opts.llvm_dir or find_root_llvm_dir(), - ) - - if opts.sha: - rev = translate_sha_to_rev(config, opts.sha) - print(rev) - else: - sha = translate_rev_to_sha(config, Rev.parse(opts.rev)) - print(sha) - - -if __name__ == '__main__': - main(sys.argv[1:]) + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--llvm_dir", + help="LLVM directory to consult for git history, etc. Autodetected " + "if cwd is inside of an LLVM tree", + ) + parser.add_argument( + "--upstream", + default="origin", + help="LLVM upstream's remote name. Defaults to %(default)s.", + ) + sha_or_rev = parser.add_mutually_exclusive_group(required=True) + sha_or_rev.add_argument( + "--sha", help="A git SHA (or ref) to convert to a rev" + ) + sha_or_rev.add_argument("--rev", help="A rev to convert into a sha") + opts = parser.parse_args(argv) + + llvm_dir = opts.llvm_dir + if llvm_dir is None: + try: + llvm_dir = find_root_llvm_dir() + except subprocess.CalledProcessError: + parser.error( + "Couldn't autodetect an LLVM tree; please use --llvm_dir" + ) + + config = LLVMConfig( + remote=opts.upstream, + dir=opts.llvm_dir or find_root_llvm_dir(), + ) + + if opts.sha: + rev = translate_sha_to_rev(config, opts.sha) + print(rev) + else: + sha = translate_rev_to_sha(config, Rev.parse(opts.rev)) + print(sha) + + +if __name__ == "__main__": + main(sys.argv[1:]) |