AFDO-Bisect: Make range search bisect O(logn) instead of O(n)

This CL changes the formerly-titled 'non_bisecting_search' to run faster by using bisection, and has thus been renamed to 'range_search' because it searches for a range of problematic functions. This CL also adds some basic status logging for the user's benefit. Change-Id: Ib37dd4817665f8378bfcc1335a2e82367d73db4f Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/toolchain-utils/+/1703125 Reviewed-by: George Burgess <gbiv@chromium.org> Tested-by: Emma Vukelj <emmavukelj@google.com>
author: Emma Vukelj <emmavukelj@google.com> 2019-07-15 16:40:01 -0700
committer: Emma Vukelj <emmavukelj@google.com> 2019-07-17 23:01:45 +0000
commit: 79122e798df36764298e636a410c4f909fcbb52f (patch)
tree: db2aa08989322479565df69c8dd78f206909634f /afdo_tools/bisection
parent: cf65aac3fc2d8e249711087a5239cea6ed68a999 (diff)
download: toolchain-utils-79122e798df36764298e636a410c4f909fcbb52f.tar.gz
3 files changed, 170 insertions, 125 deletions
diff --git a/afdo_tools/bisection/afdo_prof_analysis.py b/afdo_tools/bisection/afdo_prof_analysis.py
index b713425b..35e109a0 100755
--- a/afdo_tools/bisection/afdo_prof_analysis.py
+++ b/afdo_tools/bisection/afdo_prof_analysis.py
@@ -10,19 +10,23 @@ This script takes a good AFDO profile, a bad AFDO profile, and an external
 script which deems particular AFDO profiles as GOOD/BAD/SKIP, and an output
 file as arguments. Given these pieces of information, it analyzes the profiles
 to try and determine what exactly is bad about the bad profile. It does this
-with three main techniques: bisecting search, non-bisecting search, and rough
-diff-ing.
+with three main techniques: bisecting search, range search, and rough diff-ing.
 """
 
 from __future__ import print_function
 
 from absl import app
 from absl import flags
+from enum import IntEnum
 from tempfile import mkstemp
 
 from afdo_parse import parse_afdo
 
 import json
+# Pylint recommends we use "from chromite.lib import cros_logging as logging".
+# Chromite specific policy message, we want to keep using the standard logging
+# pylint: disable=cros-logging-import
+import logging
 import os
 import random
 import subprocess
@@ -40,17 +44,18 @@ flags.DEFINE_string('analysis_output_file', None,
 flags.DEFINE_integer('seed', None, 'Integer specifying seed for randomness')
 FLAGS = flags.FLAGS
 
-GOOD_STATUS = 0
-BAD_STATUS = 1
-SKIP_STATUS = 125
-PROBLEM_STATUS = 127
 
-# Variables for non_bisecting search
-# for how many times non-bisecting search should run its algo
-_NUM_RUNS = 20
-# for how many new functions it should add at once
-# note that this will be unnecessary once non_bisecting_search TODO addressed
-_FUNC_STEP = 5
+class status_enum(IntEnum):
+  """Enum of valid statuses returned by profile decider."""
+  GOOD_STATUS = 0
+  BAD_STATUS = 1
+  SKIP_STATUS = 125
+  PROBLEM_STATUS = 127
+
+
+statuses = status_enum.__members__.values()
+
+_NUM_RUNS_RANGE_SEARCH = 20  # how many times range search should run its algo
 
 
 def json_to_text(json_prof):
@@ -71,29 +76,49 @@ def prof_to_tmp(prof):
   return temp_path
 
 
-def run_external(prof):
-  """Run the external deciding script on the given profile."""
-  filename = prof_to_tmp(prof)
+def generate_decider():
+  """create the decider function with counter for total number of calls
+
+  generate_decider is a function which returns an inner function (and the
+  inner one does the work of running the external decider). This has been
+  structured as a function which returns a function so we can keep track of the
+  global/total number of runs as a member of the outer function, as opposed to
+  using a global variable.
+  """
+
+  # Inner functions can only modify, and not rebind, nonlocal variables
+  # which is why this uses a list to represent a single integer
+  _num_runs = [0]
+
+  def run(prof, increment_counter=True):
+    """Run the external deciding script on the given profile."""
+    filename = prof_to_tmp(prof)
 
-  try:
-    child = subprocess.Popen([FLAGS.external_decider, filename])
-    child.communicate()
-  except:
-    raise  # the error of try block
-  finally:
-    os.remove(filename)
+    try:
+      return_code = subprocess.call([FLAGS.external_decider, filename])
+    finally:
+      os.remove(filename)
 
-  return_code = child.returncode
-  if return_code in (GOOD_STATUS, BAD_STATUS, SKIP_STATUS, PROBLEM_STATUS):
-    return return_code
-  raise ValueError(
-      'Provided external script had unexpected return code %d' % return_code)
+    if return_code in statuses:
+      if increment_counter:
+        _num_runs[0] += 1
 
+      status = status_enum(return_code)
+      logging.info('Run %d of external script %s returned %s', _num_runs[0],
+                   FLAGS.external_decider, status.name)
+      return status
+    raise ValueError(
+        'Provided external script had unexpected return code %d' % return_code)
 
-def bisect_profiles(good, bad, common_funcs, lo, hi):
+  return run
+
+
+def bisect_profiles(decider, good, bad, common_funcs, lo, hi):
   """Recursive function which bisects good and bad profiles.
 
   Args:
+    decider: function which, given a JSON-based AFDO profile, returns an
+      element of 'statuses' based on the status of the profile
     good: JSON-based good AFDO profile
     bad: JSON-based bad AFDO profile
     common_funcs: the list of functions which have top-level profiles in both
@@ -111,6 +136,7 @@ def bisect_profiles(good, bad, common_funcs, lo, hi):
 
   results = {'individuals': [], 'ranges': []}
   if hi - lo <= 1:
+    logging.info('Found %s as a problematic function profile', common_funcs[lo])
     results['individuals'].append(common_funcs[lo])
     return results
 
@@ -122,37 +148,39 @@ def bisect_profiles(good, bad, common_funcs, lo, hi):
   for func in common_funcs[mid:hi]:
     mid_hi_prof[func] = bad[func]
 
-  lo_mid_verdict = run_external(lo_mid_prof)
-  mid_hi_verdict = run_external(mid_hi_prof)
+  lo_mid_verdict = decider(lo_mid_prof)
+  mid_hi_verdict = decider(mid_hi_prof)
 
-  if lo_mid_verdict == BAD_STATUS:
-    result = bisect_profiles(good, bad, common_funcs, lo, mid)
+  if lo_mid_verdict == status_enum.BAD_STATUS:
+    result = bisect_profiles(decider, good, bad, common_funcs, lo, mid)
     results['individuals'].extend(result['individuals'])
     results['ranges'].extend(result['ranges'])
-  if mid_hi_verdict == BAD_STATUS:
-    result = bisect_profiles(good, bad, common_funcs, mid, hi)
+  if mid_hi_verdict == status_enum.BAD_STATUS:
+    result = bisect_profiles(decider, good, bad, common_funcs, mid, hi)
     results['individuals'].extend(result['individuals'])
     results['ranges'].extend(result['ranges'])
 
   # neither half is bad -> the issue is caused by several things occuring
   # in conjunction, and this combination crosses 'mid'
-  if lo_mid_verdict == mid_hi_verdict == GOOD_STATUS:
-    problem_range = non_bisecting_search(good, bad, common_funcs, lo, hi)
+  if lo_mid_verdict == mid_hi_verdict == status_enum.GOOD_STATUS:
+    problem_range = range_search(decider, good, bad, common_funcs, lo, hi)
     if problem_range:
+      logging.info('Found %s as a problematic combination of profiles',
+                   str(problem_range))
       results['ranges'].append(problem_range)
 
   return results
 
 
-def bisect_profiles_wrapper(good, bad, perform_check=True):
+def bisect_profiles_wrapper(decider, good, bad, perform_check=True):
   """Wrapper for recursive profile bisection."""
 
   # Validate good and bad profiles are such, otherwise bisection reports noise
-  # Note that while run_external is a random mock, these assertions may fail.
+  # Note that while decider is a random mock, these assertions may fail.
   if perform_check:
-    if run_external(good) != GOOD_STATUS:
+    if decider(good, increment_counter=False) != status_enum.GOOD_STATUS:
       raise ValueError("Supplied good profile is not actually GOOD")
-    if run_external(bad) != BAD_STATUS:
+    if decider(bad, increment_counter=False) != status_enum.BAD_STATUS:
       raise ValueError("Supplied bad profile is not actually BAD")
 
   common_funcs = sorted(func for func in good if func in bad)
@@ -164,37 +192,71 @@ def bisect_profiles_wrapper(good, bad, perform_check=True):
   # of finding new, potentially interesting results are increased each time
   # the program is run
   random.shuffle(common_funcs)
-  results = bisect_profiles(good, bad, common_funcs, 0, len(common_funcs))
+  results = bisect_profiles(decider, good, bad, common_funcs, 0,
+                            len(common_funcs))
   results['ranges'].sort()
   results['individuals'].sort()
   return results
 
 
-# TODO(emmavukelj): make this O(logn) instead of O(n) by halving the searched
-# range instead of linearly incrementing. And also search from middle of range
-def non_bisecting_search(good, bad, common_funcs, lo, hi):
-  """Searches for problematic range without bisecting.
+def range_search(decider, good, bad, common_funcs, lo, hi):
+  """Searches for problematic range crossing mid border.
 
   The main inner algorithm is the following, which looks for the smallest
   possible ranges with problematic combinations. It starts the upper bound at
-  the midpoint, and increments this until it gets a BAD profile.
-  Then, it increments the lower bound until the resultant profile is GOOD, and
-  then we have a range that causes 'BAD'ness.
+  the midpoint, and increments in halves until it gets a BAD profile.
+  Then, it increments the lower bound (in halves) until the resultant profile
+  is GOOD, and then we have a range that causes 'BAD'ness.
 
-  It does this _NUM_NONBISECT_RUNS times, and shuffles the functions being
+  It does this _NUM_RUNS_RANGE_SEARCH times, and shuffles the functions being
   looked at uniquely each time to try and get the smallest possible range
-  of functions but in a reasonable timeframe.
+  of functions in a reasonable timeframe.
   """
 
+  average = lambda x, y: int(round((x + y) / 2.0))
+
+  def find_upper_border(good_copy, funcs, lo, hi, last_bad_val=None):
+    """Finds the upper border of problematic range."""
+    mid = average(lo, hi)
+    if mid == lo or mid == hi:
+      return last_bad_val or hi
+
+    for func in funcs[lo:mid]:
+      good_copy[func] = bad[func]
+    verdict = decider(good_copy)
+
+    # reset for next iteration
+    for func in funcs:
+      good_copy[func] = good[func]
+
+    if verdict == status_enum.BAD_STATUS:
+      return find_upper_border(good_copy, funcs, lo, mid, mid)
+    return find_upper_border(good_copy, funcs, mid, hi, last_bad_val)
+
+  def find_lower_border(good_copy, funcs, lo, hi, last_bad_val=None):
+    """Finds the lower border of problematic range."""
+    mid = average(lo, hi)
+    if mid == lo or mid == hi:
+      return last_bad_val or lo
+
+    for func in funcs[lo:mid]:
+      good_copy[func] = good[func]
+    verdict = decider(good_copy)
+
+    # reset for next iteration
+    for func in funcs:
+      good_copy[func] = bad[func]
+
+    if verdict == status_enum.BAD_STATUS:
+      return find_lower_border(good_copy, funcs, mid, hi, lo)
+    return find_lower_border(good_copy, funcs, lo, mid, last_bad_val)
+
   lo_mid_funcs = []
   mid_hi_funcs = []
   min_range_funcs = []
-  for _ in range(_NUM_RUNS):
+  for _ in range(_NUM_RUNS_RANGE_SEARCH):
 
     if min_range_funcs:  # only examine range we've already narrowed to
-      lo_mid_funcs = lo_mid_funcs[lo_mid_funcs.index(min_range_funcs[0]):]
-      mid_hi_funcs = mid_hi_funcs[:mid_hi_funcs.index(min_range_funcs[-1]) + 1]
-      # shuffle to hopefully find smaller problem range
       random.shuffle(lo_mid_funcs)
       random.shuffle(mid_hi_funcs)
     else:  # consider lo-mid and mid-hi separately bc must cross border
@@ -208,52 +270,45 @@ def non_bisecting_search(good, bad, common_funcs, lo, hi):
     lo = 0
 
     # because we need the problematic pair to pop up before we can narrow it
-    good_copy = good.copy()
+    prof = good.copy()
     for func in lo_mid_funcs:
-      good_copy[func] = bad[func]
+      prof[func] = bad[func]
+
+    upper_border = find_upper_border(prof, funcs, mid, hi)
+    for func in lo_mid_funcs + funcs[mid:upper_border]:
+      prof[func] = bad[func]
 
-    for upper in range(mid, hi):  # find upper border of bad combination
-      upper_func = funcs[upper]
-      good_copy[upper_func] = bad[upper_func]
-      if ((upper % _FUNC_STEP == 0 or upper == hi - 1) and
-          run_external(good_copy) == BAD_STATUS):
-
-        # find lower border of bad combination
-        for lower in range(mid):
-          lower_func = funcs[lower]
-          good_copy[lower_func] = good[lower_func]
-          if ((lower % _FUNC_STEP == 0 or lower == mid - 1) and
-              run_external(good_copy) == GOOD_STATUS):
-
-            lower = max(0, lower - _FUNC_STEP + 1)  # +1 b/c current included
-            if not min_range_funcs or (upper - lower) < len(min_range_funcs):
-              # b/c upper func included
-              min_range_funcs = funcs[lower:upper + 1]
-              if len(min_range_funcs) == 2:
-                min_range_funcs.sort()
-                return min_range_funcs  # can't get any smaller
-              break
+    lower_border = find_lower_border(prof, funcs, lo, mid)
+    curr_range_funcs = funcs[lower_border:upper_border]
+
+    if not min_range_funcs or len(curr_range_funcs) < len(min_range_funcs):
+      min_range_funcs = curr_range_funcs
+      lo_mid_funcs = lo_mid_funcs[lo_mid_funcs.index(min_range_funcs[0]):]
+      mid_hi_funcs = mid_hi_funcs[:mid_hi_funcs.index(min_range_funcs[-1]) + 1]
+      if len(min_range_funcs) == 2:
+        min_range_funcs.sort()
+        return min_range_funcs  # can't get any smaller
 
   min_range_funcs.sort()
   return min_range_funcs
 
 
-def check_good_not_bad(good, bad):
+def check_good_not_bad(decider, good, bad):
   """Check if bad prof becomes GOOD by adding funcs it lacks from good prof"""
   bad_copy = bad.copy()
   for func in good:
     if func not in bad:
       bad_copy[func] = good[func]
-  return run_external(bad_copy) == GOOD_STATUS
+  return decider(bad_copy) == status_enum.GOOD_STATUS
 
 
-def check_bad_not_good(good, bad):
+def check_bad_not_good(decider, good, bad):
   """Check if good prof BAD after adding funcs bad prof has that good doesnt"""
   good_copy = good.copy()
   for func in bad:
     if func not in good:
       good_copy[func] = bad[func]
-  return run_external(good_copy) == BAD_STATUS
+  return decider(good_copy) == status_enum.BAD_STATUS
 
 
 def main(_):
@@ -267,9 +322,10 @@ def main(_):
     seed = time.time()
   random.seed(seed)
 
-  bisect_results = bisect_profiles_wrapper(good_items, bad_items)
-  gnb_result = check_good_not_bad(good_items, bad_items)
-  bng_result = check_bad_not_good(good_items, bad_items)
+  decider = generate_decider()
+  bisect_results = bisect_profiles_wrapper(decider, good_items, bad_items)
+  gnb_result = check_good_not_bad(decider, good_items, bad_items)
+  bng_result = check_bad_not_good(decider, good_items, bad_items)
 
   results = {
       'seed': seed,
diff --git a/afdo_tools/bisection/afdo_prof_analysis_e2e_test.py b/afdo_tools/bisection/afdo_prof_analysis_e2e_test.py
index 6f04724a..e757cb11 100755
--- a/afdo_tools/bisection/afdo_prof_analysis_e2e_test.py
+++ b/afdo_tools/bisection/afdo_prof_analysis_e2e_test.py
@@ -63,10 +63,6 @@ class AfdoProfAnalysisE2ETest(unittest.TestCase):
         'ranges': [['func_b', 'func_c', 'func_d']]
     }
 
-    # Since the range is so small, we want to finetune func interval
-    # to confirm algo runs as expected
-    # pylint:disable=protected-access
-    analysis._FUNC_STEP = 1
     self.run_check(good_prof, bad_prof, expected)
 
   def run_check(self, good_prof, bad_prof, expected):
diff --git a/afdo_tools/bisection/afdo_prof_analysis_test.py b/afdo_tools/bisection/afdo_prof_analysis_test.py
index bbad85b9..ba1b3ab0 100755
--- a/afdo_tools/bisection/afdo_prof_analysis_test.py
+++ b/afdo_tools/bisection/afdo_prof_analysis_test.py
@@ -10,7 +10,6 @@ from __future__ import print_function
 
 import afdo_prof_analysis as analysis
 
-import mock
 import random
 import unittest
 
@@ -37,31 +36,31 @@ class AfdoProfAnalysisTest(unittest.TestCase):
     expected_text = 'func_a:1\ndata\nfunc_b:2\nmore data\n'
     self.assertEqual(analysis.json_to_text(example_prof), expected_text)
 
-  @mock.patch.object(analysis, 'run_external')
-  def test_bisect_profiles(self, mock_run_external):
+  def test_bisect_profiles(self):
 
     # mock run of external script with arbitrarily-chosen bad profile vals
-    def run_external(prof):
+    # increment_counter specified and unused b/c afdo_prof_analysis.py
+    # will call with argument explicitly specified
+    # pylint: disable=unused-argument
+    def decider(prof, increment_counter=True):
       if '1' in prof['func_a'] or '3' in prof['func_b']:
-        return analysis.BAD_STATUS
-      return analysis.GOOD_STATUS
+        return analysis.status_enum.BAD_STATUS
+      return analysis.status_enum.GOOD_STATUS
 
-    mock_run_external.side_effect = run_external
-    results = analysis.bisect_profiles_wrapper(self.good_items, self.bad_items)
+    results = analysis.bisect_profiles_wrapper(decider, self.good_items,
+                                               self.bad_items)
     self.assertEqual(results['individuals'], sorted(['func_a', 'func_b']))
     self.assertEqual(results['ranges'], [])
 
-  @mock.patch.object(analysis, 'run_external')
-  def test_non_bisecting_search(self, mock_run_external):
+  def test_range_search(self):
 
     # arbitrarily chosen functions whose values in the bad profile constitute
     # a problematic pair
-    def run_external(prof):
+    # pylint: disable=unused-argument
+    def decider(prof, increment_counter=True):
       if '1' in prof['func_a'] and '3' in prof['func_b']:
-        return analysis.BAD_STATUS
-      return analysis.GOOD_STATUS
-
-    mock_run_external.side_effect = run_external
+        return analysis.status_enum.BAD_STATUS
+      return analysis.status_enum.GOOD_STATUS
 
     # put the problematic combination in separate halves of the common funcs
     # so that non-bisecting search is invoked for its actual use case
@@ -71,41 +70,35 @@ class AfdoProfAnalysisTest(unittest.TestCase):
     common_funcs.remove('func_b')
     common_funcs.append('func_b')
 
-    problem_range = analysis.non_bisecting_search(
-        self.good_items, self.bad_items, common_funcs, 0, len(common_funcs))
+    problem_range = analysis.range_search(decider, self.good_items,
+                                          self.bad_items, common_funcs, 0,
+                                          len(common_funcs))
 
-    # we cannot test for the range being exactly these two functions because
-    # the output is slightly random and, if unlucky, the range could end up
-    # being bigger. But it is guaranteed that the output will at least
-    # *contain* the problematic pair created here
-    self.assertIn('func_a', problem_range)
-    self.assertIn('func_b', problem_range)
+    self.assertEquals(['func_a', 'func_b'], problem_range)
 
-  @mock.patch.object(analysis, 'run_external')
-  def test_check_good_not_bad(self, mock_run_external):
+  def test_check_good_not_bad(self):
     func_in_good = 'func_c'
 
-    def run_external(prof):
+    # pylint: disable=unused-argument
+    def decider(prof, increment_counter=True):
       if func_in_good in prof:
-        return analysis.GOOD_STATUS
-      return analysis.BAD_STATUS
+        return analysis.status_enum.GOOD_STATUS
+      return analysis.status_enum.BAD_STATUS
 
-    mock_run_external.side_effect = run_external
     self.assertTrue(
-        analysis.check_good_not_bad(self.good_items, self.bad_items))
+        analysis.check_good_not_bad(decider, self.good_items, self.bad_items))
 
-  @mock.patch.object(analysis, 'run_external')
-  def test_check_bad_not_good(self, mock_run_external):
+  def test_check_bad_not_good(self):
     func_in_bad = 'func_d'
 
-    def run_external(prof):
+    # pylint: disable=unused-argument
+    def decider(prof, increment_counter=True):
       if func_in_bad in prof:
-        return analysis.BAD_STATUS
-      return analysis.GOOD_STATUS
+        return analysis.status_enum.BAD_STATUS
+      return analysis.status_enum.GOOD_STATUS
 
-    mock_run_external.side_effect = run_external
     self.assertTrue(
-        analysis.check_bad_not_good(self.good_items, self.bad_items))
+        analysis.check_bad_not_good(decider, self.good_items, self.bad_items))
 
 
 if __name__ == '__main__':
author	Emma Vukelj <emmavukelj@google.com>	2019-07-15 16:40:01 -0700
committer	Emma Vukelj <emmavukelj@google.com>	2019-07-17 23:01:45 +0000
commit	79122e798df36764298e636a410c4f909fcbb52f (patch)
tree	db2aa08989322479565df69c8dd78f206909634f /afdo_tools/bisection
parent	cf65aac3fc2d8e249711087a5239cea6ed68a999 (diff)
download	toolchain-utils-79122e798df36764298e636a410c4f909fcbb52f.tar.gz