1 files changed, 124 insertions, 199 deletions
diff --git a/llvm_tools/llvm_bisection.py b/llvm_tools/llvm_bisection.py
index 4ff921d2..b1898ea9 100755
--- a/llvm_tools/llvm_bisection.py
+++ b/llvm_tools/llvm_bisection.py
@@ -11,17 +11,16 @@ from __future__ import print_function
 import argparse
 import enum
 import errno
-import get_llvm_hash
 import json
 import os
+import subprocess
 import sys
 
-from assert_not_in_chroot import VerifyOutsideChroot
-from get_llvm_hash import CreateTempLLVMRepo
-from get_llvm_hash import LLVMHash
-from modify_a_tryjob import AddTryjob
-from update_tryjob_status import FindTryjobIndex
-from update_tryjob_status import TryjobStatus
+import chroot
+import get_llvm_hash
+import git_llvm_rev
+import modify_a_tryjob
+import update_tryjob_status
 
 
 class BisectionExitStatus(enum.Enum):
@@ -31,11 +30,6 @@ class BisectionExitStatus(enum.Enum):
   BISECTION_COMPLETE = 126
 
 
-def is_file_and_json(json_file):
-  """Validates that the file exists and is a JSON file."""
-  return os.path.isfile(json_file) and json_file.endswith('.json')
-
-
 def GetCommandLineArgs():
   """Parses the command line for the command line arguments."""
 
@@ -124,30 +118,27 @@ def GetCommandLineArgs():
       help='display contents of a command to the terminal '
       '(default: %(default)s)')
 
+  # Add argument for whether to display command contents to `stdout`.
+  parser.add_argument(
+      '--nocleanup',
+      action='store_false',
+      dest='cleanup',
+      help='Abandon CLs created for bisectoin')
+
   args_output = parser.parse_args()
 
   assert args_output.start_rev < args_output.end_rev, (
-      'Start revision %d is >= end revision %d' % (args_output.start_rev,
-                                                   args_output.end_rev))
+      'Start revision %d is >= end revision %d' %
+      (args_output.start_rev, args_output.end_rev))
 
   if args_output.last_tested and not args_output.last_tested.endswith('.json'):
-    raise ValueError(
-        'Filed provided %s does not end in ".json"' % args_output.last_tested)
+    raise ValueError('Filed provided %s does not end in ".json"' %
+                     args_output.last_tested)
 
   return args_output
 
 
-def _ValidateStartAndEndAgainstJSONStartAndEnd(start, end, json_start,
-                                               json_end):
-  """Valides that the command line arguments are the same as the JSON."""
-
-  if start != json_start or end != json_end:
-    raise ValueError('The start %d or the end %d version provided is '
-                     'different than "start" %d or "end" %d in the .JSON '
-                     'file' % (start, end, json_start, json_end))
-
-
-def GetStartAndEndRevision(start, end, tryjobs):
+def GetRemainingRange(start, end, tryjobs):
   """Gets the start and end intervals in 'json_file'.
 
   Args:
@@ -182,18 +173,20 @@ def GetStartAndEndRevision(start, end, tryjobs):
                        'go to %s and update it' % cur_tryjob_dict['link'])
 
   all_bad_revisions = [end]
-  all_bad_revisions.extend(cur_tryjob['rev']
-                           for cur_tryjob in tryjobs
-                           if cur_tryjob['status'] == TryjobStatus.BAD.value)
+  all_bad_revisions.extend(
+      cur_tryjob['rev']
+      for cur_tryjob in tryjobs
+      if cur_tryjob['status'] == update_tryjob_status.TryjobStatus.BAD.value)
 
   # The minimum value for the 'bad' field in the tryjobs is the new end
   # version.
   bad_rev = min(all_bad_revisions)
 
   all_good_revisions = [start]
-  all_good_revisions.extend(cur_tryjob['rev']
-                            for cur_tryjob in tryjobs
-                            if cur_tryjob['status'] == TryjobStatus.GOOD.value)
+  all_good_revisions.extend(
+      cur_tryjob['rev']
+      for cur_tryjob in tryjobs
+      if cur_tryjob['status'] == update_tryjob_status.TryjobStatus.GOOD.value)
 
   # The maximum value for the 'good' field in the tryjobs is the new start
   # version.
@@ -212,7 +205,7 @@ def GetStartAndEndRevision(start, end, tryjobs):
   pending_revisions = {
       tryjob['rev']
       for tryjob in tryjobs
-      if tryjob['status'] == TryjobStatus.PENDING.value and
+      if tryjob['status'] == update_tryjob_status.TryjobStatus.PENDING.value and
       good_rev < tryjob['rev'] < bad_rev
   }
 
@@ -224,152 +217,63 @@ def GetStartAndEndRevision(start, end, tryjobs):
   skip_revisions = {
       tryjob['rev']
       for tryjob in tryjobs
-      if tryjob['status'] == TryjobStatus.SKIP.value and
+      if tryjob['status'] == update_tryjob_status.TryjobStatus.SKIP.value and
       good_rev < tryjob['rev'] < bad_rev
   }
 
   return good_rev, bad_rev, pending_revisions, skip_revisions
 
 
-def GetRevisionsBetweenBisection(start, end, parallel, src_path,
-                                 pending_revisions, skip_revisions):
-  """Gets the revisions between 'start' and 'end'.
-
-  Sometimes, the LLVM source tree's revisions do not increment by 1 (there is
-  a jump), so need to construct a list of all revisions that are NOT missing
-  between 'start' and 'end'. Then, the step amount (i.e. length of the list
-  divided by ('parallel' + 1)) will be used for indexing into the list.
-
-  Args:
-    start: The start revision.
-    end: The end revision.
-    parallel: The number of tryjobs to create between 'start' and 'end'.
-    src_path: The absolute path to the LLVM source tree to use.
-    pending_revisions: A set containing 'pending' revisions that are between
-    'start' and 'end'.
-    skip_revisions: A set containing revisions between 'start' and 'end' that
-    are to be skipped.
-
-  Returns:
-    A list of revisions between 'start' and 'end'.
-  """
-
-  new_llvm = LLVMHash()
-
-  valid_revisions = []
-
-  # Start at ('start' + 1) because 'start' is the good revision.
-  #
-  # FIXME: Searching for each revision from ('start' + 1) up to 'end' in the
-  # LLVM source tree is a quadratic algorithm. It's a good idea to optimize
-  # this.
-  for cur_revision in range(start + 1, end):
-    try:
-      if cur_revision not in pending_revisions and \
-          cur_revision not in skip_revisions:
-        # Verify that the current revision exists by finding its corresponding
-        # git hash in the LLVM source tree.
-        get_llvm_hash.GetGitHashFrom(src_path, cur_revision)
-        valid_revisions.append(cur_revision)
-    except ValueError:
-      # Could not find the git hash for the current revision.
-      continue
-
-  # ('parallel' + 1) so that the last revision in the list is not close to
-  # 'end' (have a bit more coverage).
-  index_step = len(valid_revisions) // (parallel + 1)
-
-  if not index_step:
-    index_step = 1
-
-  result = [valid_revisions[index] \
-            for index in range(0, len(valid_revisions), index_step)]
-
-  return result
-
-
-def GetRevisionsListAndHashList(start, end, parallel, src_path,
-                                pending_revisions, skip_revisions):
+def GetCommitsBetween(start, end, parallel, src_path, pending_revisions,
+                      skip_revisions):
   """Determines the revisions between start and end."""
 
-  new_llvm = LLVMHash()
+  with get_llvm_hash.LLVMHash().CreateTempDirectory() as temp_dir:
+    # We have guaranteed contiguous revision numbers after this,
+    # and that guarnatee simplifies things considerably, so we don't
+    # support anything before it.
+    assert start >= git_llvm_rev.base_llvm_revision, f'{start} was too long ago'
 
-  with new_llvm.CreateTempDirectory() as temp_dir:
-    with CreateTempLLVMRepo(temp_dir) as new_repo:
+    with get_llvm_hash.CreateTempLLVMRepo(temp_dir) as new_repo:
       if not src_path:
         src_path = new_repo
-
-      # Get a list of revisions between start and end.
-      revisions = GetRevisionsBetweenBisection(
-          start, end, parallel, src_path, pending_revisions, skip_revisions)
-
-      git_hashes = [get_llvm_hash.GetGitHashFrom(src_path, rev) for rev in revisions]
-
-  return revisions, git_hashes
-
-
-def DieWithNoRevisionsError(start, end, skip_revisions, pending_revisions):
-  """Raises a ValueError exception with useful information."""
-
-  no_revisions_message = ('No revisions between start %d and end '
-                          '%d to create tryjobs' % (start, end))
-
-  if pending_revisions:
-    no_revisions_message += '\nThe following tryjobs are pending:\n' \
-        + '\n'.join(str(rev) for rev in pending_revisions)
-
-  if skip_revisions:
-    no_revisions_message += '\nThe following tryjobs were skipped:\n' \
-        + '\n'.join(str(rev) for rev in skip_revisions)
-
-  raise ValueError(no_revisions_message)
-
-
-def CheckForExistingTryjobsInRevisionsToLaunch(revisions, jobs):
-  """Checks if a revision in 'revisions' exists in 'jobs' list."""
-
-  for rev in revisions:
-    if FindTryjobIndex(rev, jobs) is not None:
-      raise ValueError('Revision %d exists already in "jobs"' % rev)
-
-
-def UpdateBisection(revisions, git_hashes, bisect_contents, last_tested,
-                    update_packages, chroot_path, patch_metadata_file,
-                    extra_change_lists, options, builder, verbose):
+      index_step = (end - (start + 1)) // (parallel + 1)
+      if not index_step:
+        index_step = 1
+      revisions = [
+          rev for rev in range(start + 1, end, index_step)
+          if rev not in pending_revisions and rev not in skip_revisions
+      ]
+      git_hashes = [
+          get_llvm_hash.GetGitHashFrom(src_path, rev) for rev in revisions
+      ]
+      return revisions, git_hashes
+
+
+def Bisect(revisions, git_hashes, bisect_state, last_tested, update_packages,
+           chroot_path, patch_metadata_file, extra_change_lists, options,
+           builder, verbose):
   """Adds tryjobs and updates the status file with the new tryjobs."""
 
   try:
     for svn_revision, git_hash in zip(revisions, git_hashes):
-      tryjob_dict = AddTryjob(update_packages, git_hash, svn_revision,
-                              chroot_path, patch_metadata_file,
-                              extra_change_lists, options, builder, verbose,
-                              svn_revision)
+      tryjob_dict = modify_a_tryjob.AddTryjob(update_packages, git_hash,
+                                              svn_revision, chroot_path,
+                                              patch_metadata_file,
+                                              extra_change_lists, options,
+                                              builder, verbose, svn_revision)
 
-      bisect_contents['jobs'].append(tryjob_dict)
+      bisect_state['jobs'].append(tryjob_dict)
   finally:
     # Do not want to lose progress if there is an exception.
     if last_tested:
       new_file = '%s.new' % last_tested
       with open(new_file, 'w') as json_file:
-        json.dump(bisect_contents, json_file, indent=4, separators=(',', ': '))
+        json.dump(bisect_state, json_file, indent=4, separators=(',', ': '))
 
       os.rename(new_file, last_tested)
 
 
-def _NoteCompletedBisection(last_tested, src_path, end):
-  """Prints that bisection is complete."""
-
-  print('Finished bisecting for %s' % last_tested)
-
-  if src_path:
-    bad_llvm_hash = get_llvm_hash.GetGitHashFrom(src_path, end)
-  else:
-    bad_llvm_hash = LLVMHash().GetLLVMHash(end)
-
-  print(
-      'The bad revision is %d and its commit hash is %s' % (end, bad_llvm_hash))
-
-
 def LoadStatusFile(last_tested, start, end):
   """Loads the status file for bisection."""
 
@@ -384,37 +288,36 @@ def LoadStatusFile(last_tested, start, end):
 
 
 def main(args_output):
-  """Bisects LLVM based off of a .JSON file.
+  """Bisects LLVM commits.
 
   Raises:
     AssertionError: The script was run inside the chroot.
   """
 
-  VerifyOutsideChroot()
-
+  chroot.VerifyOutsideChroot()
   update_packages = [
       'sys-devel/llvm', 'sys-libs/compiler-rt', 'sys-libs/libcxx',
       'sys-libs/libcxxabi', 'sys-libs/llvm-libunwind'
   ]
-
   patch_metadata_file = 'PATCHES.json'
-
   start = args_output.start_rev
   end = args_output.end_rev
 
-  bisect_contents = LoadStatusFile(args_output.last_tested, start, end)
-
-  _ValidateStartAndEndAgainstJSONStartAndEnd(
-      start, end, bisect_contents['start'], bisect_contents['end'])
+  bisect_state = LoadStatusFile(args_output.last_tested, start, end)
+  if start != bisect_state['start'] or end != bisect_state['end']:
+    raise ValueError(f'The start {start} or the end {end} version provided is '
+                     f'different than "start" {bisect_state["start"]} or "end" '
+                     f'{bisect_state["end"]} in the .JSON file')
 
   # Pending and skipped revisions are between 'start_revision' and
   # 'end_revision'.
   start_revision, end_revision, pending_revisions, skip_revisions = \
-      GetStartAndEndRevision(start, end, bisect_contents['jobs'])
+      GetRemainingRange(start, end, bisect_state['jobs'])
 
-  revisions, git_hashes = GetRevisionsListAndHashList(
-      start_revision, end_revision, args_output.parallel, args_output.src_path,
-      pending_revisions, skip_revisions)
+  revisions, git_hashes = GetCommitsBetween(start_revision, end_revision,
+                                            args_output.parallel,
+                                            args_output.src_path,
+                                            pending_revisions, skip_revisions)
 
   # No more revisions between 'start_revision' and 'end_revision', so
   # bisection is complete.
@@ -422,41 +325,63 @@ def main(args_output):
   # This is determined by finding all valid revisions between 'start_revision'
   # and 'end_revision' and that are NOT in the 'pending' and 'skipped' set.
   if not revisions:
-    # Successfully completed bisection where there are 2 cases:
-    # 1) 'start_revision' and 'end_revision' are back-to-back (example:
-    # 'start_revision' is 369410 and 'end_revision' is 369411).
-    #
-    # 2) 'start_revision' and 'end_revision' are NOT back-to-back, so there must
-    # be tryjobs in between which are labeled as 'skip' for their 'status'
-    # value.
-    #
-    # In either case, there are no 'pending' jobs.
-    if not pending_revisions:
-      _NoteCompletedBisection(args_output.last_tested, args_output.src_path,
-                              end_revision)
-
-      if skip_revisions:
-        skip_revisions_message = ('\nThe following revisions were skipped:\n' +
-                                  '\n'.join(str(rev) for rev in skip_revisions))
+    if pending_revisions:
+      # Some tryjobs are not finished which may change the actual bad
+      # commit/revision when those tryjobs are finished.
+      no_revisions_message = (f'No revisions between start {start_revision} '
+                              f'and end {end_revision} to create tryjobs\n')
 
-        print(skip_revisions_message)
+      if pending_revisions:
+        no_revisions_message += (
+            'The following tryjobs are pending:\n' +
+            '\n'.join(str(rev) for rev in pending_revisions) + '\n')
 
-      return BisectionExitStatus.BISECTION_COMPLETE.value
-
-    # Some tryjobs are not finished which may change the actual bad
-    # commit/revision when those tryjobs are finished.
-    DieWithNoRevisionsError(start_revision, end_revision, skip_revisions,
-                            pending_revisions)
+      if skip_revisions:
+        no_revisions_message += ('The following tryjobs were skipped:\n' +
+                                 '\n'.join(str(rev) for rev in skip_revisions) +
+                                 '\n')
+
+      raise ValueError(no_revisions_message)
+
+    print(f'Finished bisecting for {args_output.last_tested}')
+    if args_output.src_path:
+      bad_llvm_hash = get_llvm_hash.GetGitHashFrom(args_output.src_path,
+                                                   end_revision)
+    else:
+      bad_llvm_hash = get_llvm_hash.LLVMHash().GetLLVMHash(end_revision)
+    print(f'The bad revision is {end_revision} and its commit hash is '
+          f'{bad_llvm_hash}')
+    if skip_revisions:
+      skip_revisions_message = ('\nThe following revisions were skipped:\n' +
+                                '\n'.join(str(rev) for rev in skip_revisions))
+      print(skip_revisions_message)
+
+    if args_output.cleanup:
+      # Abandon all the CLs created for bisection
+      gerrit = os.path.join(args_output.chroot_path, 'chromite/bin/gerrit')
+      for build in bisect_state['jobs']:
+        try:
+          subprocess.check_output(
+              [gerrit, 'abandon', str(build['cl'])],
+              stderr=subprocess.STDOUT,
+              encoding='utf-8')
+        except subprocess.CalledProcessError as err:
+          # the CL may have been abandoned
+          if 'chromite.lib.gob_util.GOBError' not in err.output:
+            raise
+
+    return BisectionExitStatus.BISECTION_COMPLETE.value
 
-  CheckForExistingTryjobsInRevisionsToLaunch(revisions, bisect_contents['jobs'])
+  for rev in revisions:
+    if update_tryjob_status.FindTryjobIndex(rev,
+                                            bisect_state['jobs']) is not None:
+      raise ValueError(f'Revision {rev} exists already in "jobs"')
 
-  UpdateBisection(revisions, git_hashes, bisect_contents,
-                  args_output.last_tested, update_packages,
-                  args_output.chroot_path, patch_metadata_file,
-                  args_output.extra_change_lists, args_output.options,
-                  args_output.builder, args_output.verbose)
+  Bisect(revisions, git_hashes, bisect_state, args_output.last_tested,
+         update_packages, args_output.chroot_path, patch_metadata_file,
+         args_output.extra_change_lists, args_output.options,
+         args_output.builder, args_output.verbose)
 
 
 if __name__ == '__main__':
-  args_output = GetCommandLineArgs()
-  sys.exit(main(args_output))
+  sys.exit(main(GetCommandLineArgs()))