1 files changed, 92 insertions, 90 deletions
diff --git a/infra/bisector.py b/infra/bisector.py
index 877110577..d1fd669fa 100644
--- a/infra/bisector.py
+++ b/infra/bisector.py
@@ -18,7 +18,7 @@ where the bug was introduced. It also looks for where the bug was fixed.
 This is done with the following steps:
 
 
-  NOTE: NEEDS TO BE RUN FROM THE OSS-Fuzz HOME directory
+  NOTE: Needs to be run from root of the OSS-Fuzz source checkout.
 
   Typical usage example:
         python3 infra/bisector.py
@@ -31,122 +31,124 @@ This is done with the following steps:
 """
 
 import argparse
-from dataclasses import dataclass
-import os
+import logging
 import tempfile
 
 import build_specified_commit
 import helper
 import repo_manager
-
-
-@dataclass
-class BuildData():
-  """List of data requried for bisection of errors in OSS-Fuzz projects.
-
-  Attributes:
-    project_name: The name of the OSS-Fuzz project that is being checked
-    engine: The fuzzing engine to be used
-    sanitizer: The fuzzing sanitizer to be used
-    architecture: The system architecture being fuzzed
-  """
-  project_name: str
-  engine: str
-  sanitizer: str
-  architecture: str
+import utils
 
 
 def main():
   """Finds the commit SHA where an error was initally introduced."""
+  utils.chdir_to_root()
   parser = argparse.ArgumentParser(
       description='git bisection for finding introduction of bugs')
 
-  parser.add_argument(
-      '--project_name',
-      help='The name of the project where the bug occured',
-      required=True)
-  parser.add_argument(
-      '--commit_new',
-      help='The newest commit SHA to be bisected',
-      required=True)
-  parser.add_argument(
-      '--commit_old',
-      help='The oldest commit SHA to be bisected',
-      required=True)
-  parser.add_argument(
-      '--fuzz_target', help='the name of the fuzzer to be built', required=True)
-  parser.add_argument(
-      '--testcase', help='the testcase to be reproduced', required=True)
-  parser.add_argument('--engine', default='libfuzzer')
-  parser.add_argument(
-      '--sanitizer',
-      default='address',
-      help='the default is "address"; "dataflow" for "dataflow" engine')
+  parser.add_argument('--project_name',
+                      help='The name of the project where the bug occurred.',
+                      required=True)
+  parser.add_argument('--new_commit',
+                      help='The newest commit SHA to be bisected.',
+                      required=True)
+  parser.add_argument('--old_commit',
+                      help='The oldest commit SHA to be bisected.',
+                      required=True)
+  parser.add_argument('--fuzz_target',
+                      help='The name of the fuzzer to be built.',
+                      required=True)
+  parser.add_argument('--test_case_path',
+                      help='The path to test case.',
+                      required=True)
+  parser.add_argument('--engine',
+                      help='The default is "libfuzzer".',
+                      default='libfuzzer')
+  parser.add_argument('--sanitizer',
+                      default='address',
+                      help='The default is "address".')
   parser.add_argument('--architecture', default='x86_64')
   args = parser.parse_args()
-  build_data = BuildData(args.project_name, args.engine, args.sanitizer,
-                         args.architecture)
-  if os.getcwd() != os.path.dirname(
-      os.path.dirname(os.path.realpath(__file__))):
-    print("Error: bisector.py needs to be run from the OSS-Fuzz home directory")
-    return 1
-  error_sha = bisect(args.commit_old, args.commit_new, args.testcase,
+
+  build_data = build_specified_commit.BuildData(project_name=args.project_name,
+                                                engine=args.engine,
+                                                sanitizer=args.sanitizer,
+                                                architecture=args.architecture)
+
+  error_sha = bisect(args.old_commit, args.new_commit, args.test_case_path,
                      args.fuzz_target, build_data)
   if not error_sha:
-    print('No error was found in commit range %s:%s' %
-          (args.commit_old, args.commit_new))
+    logging.error('No error was found in commit range %s:%s', args.commit_old,
+                  args.commit_new)
+    return 1
+  if error_sha == args.commit_old:
+    logging.error(
+        'Bisection Error: Both the first and the last commits in'
+        'the given range have the same behavior, bisection is not possible. ')
     return 1
   print('Error was introduced at commit %s' % error_sha)
   return 0
 
 
-def bisect(commit_old, commit_new, testcase, fuzz_target, build_data):
+def bisect(old_commit, new_commit, test_case_path, fuzz_target, build_data):
   """From a commit range, this function caluclates which introduced a
-  specific error from a fuzz testcase.
+  specific error from a fuzz test_case_path.
 
   Args:
-    commit_old: The oldest commit in the error regression range
-    commit_new: The newest commit in the error regression range
-    testcase: The file path of the test case that triggers the error
-    fuzz_target: The name of the fuzzer to be tested
-    build_data: a class holding all of the input parameters for bisection
+    old_commit: The oldest commit in the error regression range.
+    new_commit: The newest commit in the error regression range.
+    test_case_path: The file path of the test case that triggers the error
+    fuzz_target: The name of the fuzzer to be tested.
+    build_data: a class holding all of the input parameters for bisection.
 
   Returns:
-    The commit SHA that introduced the error or None
+    The commit SHA that introduced the error or None.
+
+  Raises:
+    ValueError: when a repo url can't be determine from the project.
   """
-  local_store_path = tempfile.mkdtemp()
-  repo_url = build_specified_commit.infer_main_repo(build_data.project_name,
-                                                    local_store_path,
-                                                    commit_old)
-  bisect_repo_manager = repo_manager.RepoManager(repo_url, local_store_path)
-  commit_list = bisect_repo_manager.get_commit_list(commit_old, commit_new)
-  build_specified_commit.build_fuzzer_from_commit(
-      build_data.project_name, commit_list[0], bisect_repo_manager.repo_dir,
-      build_data.engine, build_data.sanitizer, build_data.architecture,
-      bisect_repo_manager)
-  error_code = helper.reproduce_impl(build_data.project_name, fuzz_target,
-                                     False, [], [], testcase)
-  old_idx = len(commit_list) - 1
-  new_idx = 0
-  if len(commit_list) == 1:
-    if not error_code:
-      return None
-    return commit_list[0]
-
-  while old_idx - new_idx != 1:
-    curr_idx = (old_idx + new_idx) // 2
-    build_specified_commit.build_fuzzer_from_commit(
-        build_data.project_name, commit_list[curr_idx],
-        bisect_repo_manager.repo_dir, build_data.engine, build_data.sanitizer,
-        build_data.architecture, bisect_repo_manager)
-    error_exists = (
-        helper.reproduce_impl(build_data.project_name, fuzz_target, False, [],
-                              [], testcase) == error_code)
-    if error_exists == error_code:
-      new_idx = curr_idx
-    else:
-      old_idx = curr_idx
-  return commit_list[new_idx]
+  with tempfile.TemporaryDirectory() as tmp_dir:
+    repo_url, repo_name = build_specified_commit.detect_main_repo(
+        build_data.project_name, commit=old_commit)
+    if not repo_url or not repo_name:
+      raise ValueError('Main git repo can not be determined.')
+    bisect_repo_manager = repo_manager.RepoManager(repo_url,
+                                                   tmp_dir,
+                                                   repo_name=repo_name)
+    commit_list = bisect_repo_manager.get_commit_list(old_commit, new_commit)
+    old_idx = len(commit_list) - 1
+    new_idx = 0
+    build_specified_commit.build_fuzzers_from_commit(commit_list[new_idx],
+                                                     bisect_repo_manager,
+                                                     build_data)
+    expected_error_code = helper.reproduce_impl(build_data.project_name,
+                                                fuzz_target, False, [], [],
+                                                test_case_path)
+
+    # Check if the error is persistent through the commit range
+    build_specified_commit.build_fuzzers_from_commit(
+        commit_list[old_idx],
+        bisect_repo_manager,
+        build_data,
+    )
+
+    if expected_error_code == helper.reproduce_impl(build_data.project_name,
+                                                    fuzz_target, False, [], [],
+                                                    test_case_path):
+      return commit_list[old_idx]
+
+    while old_idx - new_idx > 1:
+      curr_idx = (old_idx + new_idx) // 2
+      build_specified_commit.build_fuzzers_from_commit(commit_list[curr_idx],
+                                                       bisect_repo_manager,
+                                                       build_data)
+      error_code = helper.reproduce_impl(build_data.project_name, fuzz_target,
+                                         False, [], [], test_case_path)
+      if expected_error_code == error_code:
+        new_idx = curr_idx
+      else:
+        old_idx = curr_idx
+    return commit_list[new_idx]
 
 
 if __name__ == '__main__':