AFDO-Bisect: Initial draft of analysis algorithm

This CL is an initial draft of the AFDO profile analyzer at the heart of this project. It employs three different sorts of analysis (bisecting search, non-bisecting search, and by function diffs). BUG=None TEST=All added tests succeed. Change-Id: If584f30ef5a208c3da71123fe9212d8c3eac89c0 Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/toolchain-utils/+/1687775 Reviewed-by: Caroline Tice <cmtice@chromium.org> Tested-by: Emma Vukelj <emmavukelj@google.com>
author: Emma Vukelj <emmavukelj@google.com> 2019-07-03 09:47:13 -0700
committer: Emma Vukelj <emmavukelj@google.com> 2019-07-15 17:06:44 +0000
commit: bb3993a3cfa94253993679ae1a64b12b0d2b3f68 (patch)
tree: 768d4c3813f72b946cdbedf4236dcb00d6d6b849 /afdo_tools
parent: 584728183f90cf6ef27fa09b52c8ee5203c87e4a (diff)
download: toolchain-utils-bb3993a3cfa94253993679ae1a64b12b0d2b3f68.tar.gz
6 files changed, 541 insertions, 2 deletions
diff --git a/afdo_tools/bisection/afdo_parse.py b/afdo_tools/bisection/afdo_parse.py
index b9da2174..876c2c8f 100755
--- a/afdo_tools/bisection/afdo_parse.py
+++ b/afdo_tools/bisection/afdo_parse.py
@@ -37,7 +37,9 @@ def parse_afdo(f):
       if curr_func:
         results[curr_func] = ''.join(curr_data)
         curr_data = []
-      curr_func = line.split(':')[0].strip()
+      curr_func, rest = line.split(':', 1)
+      curr_func = curr_func.strip()
+      curr_data.append(':' + rest)
     else:
       curr_data.append(line)
 
diff --git a/afdo_tools/bisection/afdo_parse_test.py b/afdo_tools/bisection/afdo_parse_test.py
index 3e17b8f9..3c9288a0 100755
--- a/afdo_tools/bisection/afdo_parse_test.py
+++ b/afdo_tools/bisection/afdo_parse_test.py
@@ -30,7 +30,8 @@ class SimpleAfdoParseTest(unittest.TestCase):
                                   ' 46: 13942\n'
                                   ' 46.1: 14003\n')
     expected = {
-        'deflate_slow': ' 3: 24\n'
+        'deflate_slow': ':87460059:3\n'
+                        ' 3: 24\n'
                         ' 14: 54767\n'
                         ' 15: 664 fill_window:22\n'
                         ' 16: 661\n'
diff --git a/afdo_tools/bisection/afdo_prof_analysis.py b/afdo_tools/bisection/afdo_prof_analysis.py
new file mode 100755
index 00000000..b713425b
--- /dev/null
+++ b/afdo_tools/bisection/afdo_prof_analysis.py
@@ -0,0 +1,292 @@
+#!/usr/bin/env python2
+# -*- coding: utf-8 -*-
+# Copyright 2019 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""AFDO Profile analysis tool.
+
+This script takes a good AFDO profile, a bad AFDO profile, and an external
+script which deems particular AFDO profiles as GOOD/BAD/SKIP, and an output
+file as arguments. Given these pieces of information, it analyzes the profiles
+to try and determine what exactly is bad about the bad profile. It does this
+with three main techniques: bisecting search, non-bisecting search, and rough
+diff-ing.
+"""
+
+from __future__ import print_function
+
+from absl import app
+from absl import flags
+from tempfile import mkstemp
+
+from afdo_parse import parse_afdo
+
+import json
+import os
+import random
+import subprocess
+import sys
+import time
+
+flags.DEFINE_string('good_prof', None, 'Text-based "Good" profile for'
+                    'analysis')
+flags.DEFINE_string('bad_prof', None, 'Text-based "Bad" profile for' 'analysis')
+flags.DEFINE_string(
+    'external_decider', None, 'External script that, given an'
+    'AFDO profile, returns GOOD/BAD/SKIP')
+flags.DEFINE_string('analysis_output_file', None,
+                    'File to output JSON results to')
+flags.DEFINE_integer('seed', None, 'Integer specifying seed for randomness')
+FLAGS = flags.FLAGS
+
+GOOD_STATUS = 0
+BAD_STATUS = 1
+SKIP_STATUS = 125
+PROBLEM_STATUS = 127
+
+# Variables for non_bisecting search
+# for how many times non-bisecting search should run its algo
+_NUM_RUNS = 20
+# for how many new functions it should add at once
+# note that this will be unnecessary once non_bisecting_search TODO addressed
+_FUNC_STEP = 5
+
+
+def json_to_text(json_prof):
+  text_profile = []
+  for func in json_prof:
+    text_profile.append(func)
+    text_profile.append(json_prof[func])
+  return ''.join(text_profile)
+
+
+def prof_to_tmp(prof):
+  """Creates (and returns) temp filename for given JSON-based AFDO profile."""
+  fd, temp_path = mkstemp()
+  text_profile = json_to_text(prof)
+  with open(temp_path, 'w') as f:
+    f.write(text_profile)
+  os.close(fd)
+  return temp_path
+
+
+def run_external(prof):
+  """Run the external deciding script on the given profile."""
+  filename = prof_to_tmp(prof)
+
+  try:
+    child = subprocess.Popen([FLAGS.external_decider, filename])
+    child.communicate()
+  except:
+    raise  # the error of try block
+  finally:
+    os.remove(filename)
+
+  return_code = child.returncode
+  if return_code in (GOOD_STATUS, BAD_STATUS, SKIP_STATUS, PROBLEM_STATUS):
+    return return_code
+  raise ValueError(
+      'Provided external script had unexpected return code %d' % return_code)
+
+
+def bisect_profiles(good, bad, common_funcs, lo, hi):
+  """Recursive function which bisects good and bad profiles.
+
+  Args:
+    good: JSON-based good AFDO profile
+    bad: JSON-based bad AFDO profile
+    common_funcs: the list of functions which have top-level profiles in both
+      'good' and 'bad'
+    lo: lower bound of range being bisected on
+    hi: upper bound of range being bisected on
+
+  Returns a dictionary with two keys: 'individuals' and 'ranges'.
+  'individuals': a list of individual functions found to make the profile BAD
+  'ranges': a list of lists of function names. Each list of functions is a list
+    such that including all of those from the bad profile makes the good
+    profile BAD. It may not be the smallest problematic combination, but
+    definitely contains a problematic combination of profiles.
+  """
+
+  results = {'individuals': [], 'ranges': []}
+  if hi - lo <= 1:
+    results['individuals'].append(common_funcs[lo])
+    return results
+
+  mid = (lo + hi) / 2
+  lo_mid_prof = good.copy()  # covers bad from lo:mid
+  mid_hi_prof = good.copy()  # covers bad from mid:hi
+  for func in common_funcs[lo:mid]:
+    lo_mid_prof[func] = bad[func]
+  for func in common_funcs[mid:hi]:
+    mid_hi_prof[func] = bad[func]
+
+  lo_mid_verdict = run_external(lo_mid_prof)
+  mid_hi_verdict = run_external(mid_hi_prof)
+
+  if lo_mid_verdict == BAD_STATUS:
+    result = bisect_profiles(good, bad, common_funcs, lo, mid)
+    results['individuals'].extend(result['individuals'])
+    results['ranges'].extend(result['ranges'])
+  if mid_hi_verdict == BAD_STATUS:
+    result = bisect_profiles(good, bad, common_funcs, mid, hi)
+    results['individuals'].extend(result['individuals'])
+    results['ranges'].extend(result['ranges'])
+
+  # neither half is bad -> the issue is caused by several things occuring
+  # in conjunction, and this combination crosses 'mid'
+  if lo_mid_verdict == mid_hi_verdict == GOOD_STATUS:
+    problem_range = non_bisecting_search(good, bad, common_funcs, lo, hi)
+    if problem_range:
+      results['ranges'].append(problem_range)
+
+  return results
+
+
+def bisect_profiles_wrapper(good, bad, perform_check=True):
+  """Wrapper for recursive profile bisection."""
+
+  # Validate good and bad profiles are such, otherwise bisection reports noise
+  # Note that while run_external is a random mock, these assertions may fail.
+  if perform_check:
+    if run_external(good) != GOOD_STATUS:
+      raise ValueError("Supplied good profile is not actually GOOD")
+    if run_external(bad) != BAD_STATUS:
+      raise ValueError("Supplied bad profile is not actually BAD")
+
+  common_funcs = sorted(func for func in good if func in bad)
+  if not common_funcs:
+    return {'ranges': [], 'individuals': []}
+
+  # shuffle because the results of our analysis can be quite order-dependent
+  # but this list has no inherent ordering. By shuffling each time, the chances
+  # of finding new, potentially interesting results are increased each time
+  # the program is run
+  random.shuffle(common_funcs)
+  results = bisect_profiles(good, bad, common_funcs, 0, len(common_funcs))
+  results['ranges'].sort()
+  results['individuals'].sort()
+  return results
+
+
+# TODO(emmavukelj): make this O(logn) instead of O(n) by halving the searched
+# range instead of linearly incrementing. And also search from middle of range
+def non_bisecting_search(good, bad, common_funcs, lo, hi):
+  """Searches for problematic range without bisecting.
+
+  The main inner algorithm is the following, which looks for the smallest
+  possible ranges with problematic combinations. It starts the upper bound at
+  the midpoint, and increments this until it gets a BAD profile.
+  Then, it increments the lower bound until the resultant profile is GOOD, and
+  then we have a range that causes 'BAD'ness.
+
+  It does this _NUM_NONBISECT_RUNS times, and shuffles the functions being
+  looked at uniquely each time to try and get the smallest possible range
+  of functions but in a reasonable timeframe.
+  """
+
+  lo_mid_funcs = []
+  mid_hi_funcs = []
+  min_range_funcs = []
+  for _ in range(_NUM_RUNS):
+
+    if min_range_funcs:  # only examine range we've already narrowed to
+      lo_mid_funcs = lo_mid_funcs[lo_mid_funcs.index(min_range_funcs[0]):]
+      mid_hi_funcs = mid_hi_funcs[:mid_hi_funcs.index(min_range_funcs[-1]) + 1]
+      # shuffle to hopefully find smaller problem range
+      random.shuffle(lo_mid_funcs)
+      random.shuffle(mid_hi_funcs)
+    else:  # consider lo-mid and mid-hi separately bc must cross border
+      mid = (lo + hi) / 2
+      lo_mid_funcs = common_funcs[lo:mid]
+      mid_hi_funcs = common_funcs[mid:hi]
+
+    funcs = lo_mid_funcs + mid_hi_funcs
+    hi = len(funcs)
+    mid = len(lo_mid_funcs)
+    lo = 0
+
+    # because we need the problematic pair to pop up before we can narrow it
+    good_copy = good.copy()
+    for func in lo_mid_funcs:
+      good_copy[func] = bad[func]
+
+    for upper in range(mid, hi):  # find upper border of bad combination
+      upper_func = funcs[upper]
+      good_copy[upper_func] = bad[upper_func]
+      if ((upper % _FUNC_STEP == 0 or upper == hi - 1) and
+          run_external(good_copy) == BAD_STATUS):
+
+        # find lower border of bad combination
+        for lower in range(mid):
+          lower_func = funcs[lower]
+          good_copy[lower_func] = good[lower_func]
+          if ((lower % _FUNC_STEP == 0 or lower == mid - 1) and
+              run_external(good_copy) == GOOD_STATUS):
+
+            lower = max(0, lower - _FUNC_STEP + 1)  # +1 b/c current included
+            if not min_range_funcs or (upper - lower) < len(min_range_funcs):
+              # b/c upper func included
+              min_range_funcs = funcs[lower:upper + 1]
+              if len(min_range_funcs) == 2:
+                min_range_funcs.sort()
+                return min_range_funcs  # can't get any smaller
+              break
+
+  min_range_funcs.sort()
+  return min_range_funcs
+
+
+def check_good_not_bad(good, bad):
+  """Check if bad prof becomes GOOD by adding funcs it lacks from good prof"""
+  bad_copy = bad.copy()
+  for func in good:
+    if func not in bad:
+      bad_copy[func] = good[func]
+  return run_external(bad_copy) == GOOD_STATUS
+
+
+def check_bad_not_good(good, bad):
+  """Check if good prof BAD after adding funcs bad prof has that good doesnt"""
+  good_copy = good.copy()
+  for func in bad:
+    if func not in good:
+      good_copy[func] = bad[func]
+  return run_external(good_copy) == BAD_STATUS
+
+
+def main(_):
+  with open(FLAGS.good_prof) as good_f:
+    good_items = parse_afdo(good_f)
+  with open(FLAGS.bad_prof) as bad_f:
+    bad_items = parse_afdo(bad_f)
+
+  seed = FLAGS.seed
+  if not FLAGS.seed:
+    seed = time.time()
+  random.seed(seed)
+
+  bisect_results = bisect_profiles_wrapper(good_items, bad_items)
+  gnb_result = check_good_not_bad(good_items, bad_items)
+  bng_result = check_bad_not_good(good_items, bad_items)
+
+  results = {
+      'seed': seed,
+      'bisect_results': bisect_results,
+      'good_only_functions': gnb_result,
+      'bad_only_functions': bng_result
+  }
+  with open(FLAGS.analysis_output_file, 'wb') as f:
+    json.dump(results, f, indent=2)
+  return results
+
+
+if __name__ == '__main__':
+  flags.mark_flag_as_required('good_prof')
+  flags.mark_flag_as_required('bad_prof')
+  flags.mark_flag_as_required('external_decider')
+  flags.mark_flag_as_required('analysis_output_file')
+  app.run(main)
+else:
+  FLAGS(sys.argv)
diff --git a/afdo_tools/bisection/afdo_prof_analysis_e2e_test.py b/afdo_tools/bisection/afdo_prof_analysis_e2e_test.py
new file mode 100755
index 00000000..6f04724a
--- /dev/null
+++ b/afdo_tools/bisection/afdo_prof_analysis_e2e_test.py
@@ -0,0 +1,103 @@
+#!/usr/bin/env python2
+# -*- coding: utf-8 -*-
+# Copyright 2019 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""End-to-end test for afdo_prof_analysis."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import afdo_prof_analysis as analysis
+
+import shutil
+import tempfile
+import os
+import unittest
+
+
+class AfdoProfAnalysisE2ETest(unittest.TestCase):
+  """Class for end-to-end testing of AFDO Profile Analysis"""
+
+  def test_afdo_prof_analysis(self):
+    # nothing significant about the values, just easier to remember even vs odd
+    good_prof = {
+        'func_a': ':1\n 1: 3\n 3: 5\n 5: 7\n',
+        'func_b': ':3\n 3: 5\n 5: 7\n 7: 9\n',
+        'func_c': ':5\n 5: 7\n 7: 9\n 9: 11\n',
+        'func_d': ':7\n 7: 9\n 9: 11\n 11: 13\n',
+        'good_func_a': ':11\n',
+        'good_func_b': ':13\n'
+    }
+
+    bad_prof = {
+        'func_a': ':2\n 2: 4\n 4: 6\n 6: 8\n',
+        'func_b': ':4\n 4: 6\n 6: 8\n 8: 10\n',
+        'func_c': ':6\n 6: 8\n 8: 10\n 10: 12\n',
+        'func_d': ':8\n 8: 10\n 10: 12\n 12: 14\n',
+        'bad_func_a': ':12\n',
+        'bad_func_b': ':14\n'
+    }
+
+    # Individual issues take precedence by nature of our algos
+    # so first, that should be caught
+    expected = {
+        'good_only_functions': False,
+        'bad_only_functions': True,
+        'bisect_results': {
+            'ranges': [],
+            'individuals': ['func_a']
+        }
+    }
+    self.run_check(good_prof, bad_prof, expected)
+
+    # Now remove individuals and exclusively BAD, and check that range is caught
+    bad_prof['func_a'] = good_prof['func_a']
+    bad_prof.pop('bad_func_a')
+    bad_prof.pop('bad_func_b')
+    expected['bad_only_functions'] = False
+    expected['bisect_results'] = {
+        'individuals': [],
+        'ranges': [['func_b', 'func_c', 'func_d']]
+    }
+
+    # Since the range is so small, we want to finetune func interval
+    # to confirm algo runs as expected
+    # pylint:disable=protected-access
+    analysis._FUNC_STEP = 1
+    self.run_check(good_prof, bad_prof, expected)
+
+  def run_check(self, good_prof, bad_prof, expected):
+    temp_dir = tempfile.mkdtemp()
+
+    def cleanup():
+      shutil.rmtree(temp_dir, ignore_errors=True)
+
+    self.addCleanup(cleanup)
+
+    good_prof_file = '%s/%s' % (temp_dir, 'good_prof.txt')
+    bad_prof_file = '%s/%s' % (temp_dir, 'bad_prof.txt')
+    good_prof_text = analysis.json_to_text(good_prof)
+    bad_prof_text = analysis.json_to_text(bad_prof)
+    with open(good_prof_file, 'w') as f:
+      f.write(good_prof_text)
+    with open(bad_prof_file, 'w') as f:
+      f.write(bad_prof_text)
+
+    analysis.FLAGS.good_prof = good_prof_file
+    analysis.FLAGS.bad_prof = bad_prof_file
+
+    dir_path = os.path.dirname(os.path.realpath(__file__))  # dir of this file
+    external_script = '%s/e2e_external.sh' % (dir_path)
+    analysis.FLAGS.external_decider = external_script
+    analysis.FLAGS.analysis_output_file = '/dev/null'
+
+    actual = analysis.main(None)
+    actual.pop('seed')  # nothing to check
+    self.assertEqual(actual, expected)
+
+
+if __name__ == '__main__':
+  unittest.main()
diff --git a/afdo_tools/bisection/afdo_prof_analysis_test.py b/afdo_tools/bisection/afdo_prof_analysis_test.py
new file mode 100755
index 00000000..bbad85b9
--- /dev/null
+++ b/afdo_tools/bisection/afdo_prof_analysis_test.py
@@ -0,0 +1,112 @@
+#!/usr/bin/env python2
+# -*- coding: utf-8 -*-
+# Copyright 2019 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Tests for afdo_prof_analysis."""
+
+from __future__ import print_function
+
+import afdo_prof_analysis as analysis
+
+import mock
+import random
+import unittest
+
+
+class AfdoProfAnalysisTest(unittest.TestCase):
+  """Class for testing AFDO Profile Analysis"""
+  bad_items = {'func_a': '1', 'func_b': '3', 'func_c': '5'}
+  good_items = {'func_a': '2', 'func_b': '4', 'func_d': '5'}
+  random.seed(13)  # 13 is an arbitrary choice. just for consistency
+  # add some extra info to make tests more reflective of real scenario
+  for num in range(128):
+    func_name = 'func_extra_%d' % num
+    # 1/3 to both, 1/3 only to good, 1/3 only to bad
+    rand_val = random.randint(1, 101)
+    if rand_val < 67:
+      bad_items[func_name] = 'test_data'
+    if rand_val < 34 or rand_val >= 67:
+      good_items[func_name] = 'test_data'
+
+  analysis.random.seed(5)  # 5 is an arbitrary choice. For consistent testing
+
+  def test_json_to_text(self):
+    example_prof = {'func_a': ':1\ndata\n', 'func_b': ':2\nmore data\n'}
+    expected_text = 'func_a:1\ndata\nfunc_b:2\nmore data\n'
+    self.assertEqual(analysis.json_to_text(example_prof), expected_text)
+
+  @mock.patch.object(analysis, 'run_external')
+  def test_bisect_profiles(self, mock_run_external):
+
+    # mock run of external script with arbitrarily-chosen bad profile vals
+    def run_external(prof):
+      if '1' in prof['func_a'] or '3' in prof['func_b']:
+        return analysis.BAD_STATUS
+      return analysis.GOOD_STATUS
+
+    mock_run_external.side_effect = run_external
+    results = analysis.bisect_profiles_wrapper(self.good_items, self.bad_items)
+    self.assertEqual(results['individuals'], sorted(['func_a', 'func_b']))
+    self.assertEqual(results['ranges'], [])
+
+  @mock.patch.object(analysis, 'run_external')
+  def test_non_bisecting_search(self, mock_run_external):
+
+    # arbitrarily chosen functions whose values in the bad profile constitute
+    # a problematic pair
+    def run_external(prof):
+      if '1' in prof['func_a'] and '3' in prof['func_b']:
+        return analysis.BAD_STATUS
+      return analysis.GOOD_STATUS
+
+    mock_run_external.side_effect = run_external
+
+    # put the problematic combination in separate halves of the common funcs
+    # so that non-bisecting search is invoked for its actual use case
+    common_funcs = [func for func in self.good_items if func in self.bad_items]
+    common_funcs.remove('func_a')
+    common_funcs.insert(0, 'func_a')
+    common_funcs.remove('func_b')
+    common_funcs.append('func_b')
+
+    problem_range = analysis.non_bisecting_search(
+        self.good_items, self.bad_items, common_funcs, 0, len(common_funcs))
+
+    # we cannot test for the range being exactly these two functions because
+    # the output is slightly random and, if unlucky, the range could end up
+    # being bigger. But it is guaranteed that the output will at least
+    # *contain* the problematic pair created here
+    self.assertIn('func_a', problem_range)
+    self.assertIn('func_b', problem_range)
+
+  @mock.patch.object(analysis, 'run_external')
+  def test_check_good_not_bad(self, mock_run_external):
+    func_in_good = 'func_c'
+
+    def run_external(prof):
+      if func_in_good in prof:
+        return analysis.GOOD_STATUS
+      return analysis.BAD_STATUS
+
+    mock_run_external.side_effect = run_external
+    self.assertTrue(
+        analysis.check_good_not_bad(self.good_items, self.bad_items))
+
+  @mock.patch.object(analysis, 'run_external')
+  def test_check_bad_not_good(self, mock_run_external):
+    func_in_bad = 'func_d'
+
+    def run_external(prof):
+      if func_in_bad in prof:
+        return analysis.BAD_STATUS
+      return analysis.GOOD_STATUS
+
+    mock_run_external.side_effect = run_external
+    self.assertTrue(
+        analysis.check_bad_not_good(self.good_items, self.bad_items))
+
+
+if __name__ == '__main__':
+  unittest.main()
diff --git a/afdo_tools/bisection/e2e_external.sh b/afdo_tools/bisection/e2e_external.sh
new file mode 100755
index 00000000..1358075f
--- /dev/null
+++ b/afdo_tools/bisection/e2e_external.sh
@@ -0,0 +1,29 @@
+#!/bin/bash -eu
+
+GOOD_STATUS=0
+BAD_STATUS=1
+SKIP_STATUS=125
+PROBLEM_STATUS=127
+
+tmp_file=$(mktemp)
+trap "rm -f '${tmp_file}'" EXIT
+grep -v '^ ' "$1" > "${tmp_file}"
+
+if grep -q bad "${tmp_file}"; then
+  exit $BAD_STATUS
+fi
+
+# func_a containing '2' in its top line is BAD
+if grep -q 'func_a.*2' "${tmp_file}"; then
+  exit $BAD_STATUS
+fi
+
+# func_b, func_c, and func_d with even values are bad in conjunction
+if grep -q 'func_b.*4' "${tmp_file}" && \
+  grep -q 'func_c.*6' "${tmp_file}" && \
+  grep -q 'func_d.*8' "${tmp_file}"; then
+  exit $BAD_STATUS
+fi
+
+# If none of the BADness conditions are met
+exit $GOOD_STATUS
author	Emma Vukelj <emmavukelj@google.com>	2019-07-03 09:47:13 -0700
committer	Emma Vukelj <emmavukelj@google.com>	2019-07-15 17:06:44 +0000
commit	bb3993a3cfa94253993679ae1a64b12b0d2b3f68 (patch)
tree	768d4c3813f72b946cdbedf4236dcb00d6d6b849 /afdo_tools
parent	584728183f90cf6ef27fa09b52c8ee5203c87e4a (diff)
download	toolchain-utils-bb3993a3cfa94253993679ae1a64b12b0d2b3f68.tar.gz