2 files changed, 329 insertions, 0 deletions
diff --git a/afdo_redaction/remove_cold_functions.py b/afdo_redaction/remove_cold_functions.py
new file mode 100755
index 00000000..097085db
--- /dev/null
+++ b/afdo_redaction/remove_cold_functions.py
@@ -0,0 +1,183 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Copyright 2020 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Script to remove cold functions in an textual AFDO profile.
+
+The script will look through the AFDO profile to find all the function
+records. Then it'll start with the functions with lowest sample count and
+remove it from the profile, until the total remaining functions in the
+profile meets the given number. When there are many functions having the
+same sample count, we need to remove all of them in order to meet the
+target, so the result profile will always have less than or equal to the
+given number of functions.
+
+The script is intended to be used on production Chrome OS profiles, after
+other redaction/trimming scripts. It can be used with given textual CWP
+and benchmark profiles, in order to analyze how many removed functions are
+from which profile (or both), which can be used an indicator of fairness
+during the removal.
+
+This is part of the effort to stablize the impact of AFDO profile on
+Chrome binary size. See crbug.com/1062014 for more context.
+"""
+
+from __future__ import division, print_function
+
+import argparse
+import collections
+import re
+import sys
+
+_function_line_re = re.compile(r'^([\w\$\.@]+):(\d+)(?::\d+)?$')
+ProfileRecord = collections.namedtuple(
+    'ProfileRecord', ['function_count', 'function_body', 'function_name'])
+
+
+def _read_sample_count(line):
+  m = _function_line_re.match(line)
+  assert m, 'Failed to interpret function line %s' % line
+  return m.group(1), int(m.group(2))
+
+
+def _read_textual_afdo_profile(stream):
+  """Parses an AFDO profile from a line stream into ProfileRecords."""
+  # ProfileRecords are actually nested, due to inlining. For the purpose of
+  # this script, that doesn't matter.
+  lines = (line.rstrip() for line in stream)
+  function_line = None
+  samples = []
+  ret = []
+  for line in lines:
+    if not line:
+      continue
+
+    if line[0].isspace():
+      assert function_line is not None, 'sample exists outside of a function?'
+      samples.append(line)
+      continue
+
+    if function_line is not None:
+      name, count = _read_sample_count(function_line)
+      body = [function_line] + samples
+      ret.append(
+          ProfileRecord(
+              function_count=count, function_body=body, function_name=name))
+    function_line = line
+    samples = []
+
+  if function_line is not None:
+    name, count = _read_sample_count(function_line)
+    body = [function_line] + samples
+    ret.append(
+        ProfileRecord(
+            function_count=count, function_body=body, function_name=name))
+  return ret
+
+
+def write_textual_afdo_profile(stream, records):
+  for r in records:
+    print('\n'.join(r.function_body), file=stream)
+
+
+def analyze_functions(records, cwp, benchmark):
+  cwp_functions = {x.function_name for x in cwp}
+  benchmark_functions = {x.function_name for x in benchmark}
+  all_functions = {x.function_name for x in records}
+  cwp_only_functions = len((all_functions & cwp_functions) -
+                           benchmark_functions)
+  benchmark_only_functions = len((all_functions & benchmark_functions) -
+                                 cwp_functions)
+  common_functions = len(all_functions & benchmark_functions & cwp_functions)
+  none_functions = len(all_functions - benchmark_functions - cwp_functions)
+
+  assert not none_functions
+  return cwp_only_functions, benchmark_only_functions, common_functions
+
+
+def run(input_stream, output_stream, goal, cwp=None, benchmark=None):
+  records = _read_textual_afdo_profile(input_stream)
+  num_functions = len(records)
+  if not num_functions:
+    return
+  assert goal, "It's invalid to remove all functions in the profile"
+
+  if cwp and benchmark:
+    cwp_records = _read_textual_afdo_profile(cwp)
+    benchmark_records = _read_textual_afdo_profile(benchmark)
+    cwp_num, benchmark_num, common_num = analyze_functions(
+        records, cwp_records, benchmark_records)
+
+  records.sort(key=lambda x: (-x.function_count, x.function_name))
+  records = records[:goal]
+
+  print(
+      'Retained %d/%d (%.1f%%) functions in the profile' %
+      (len(records), num_functions, 100.0 * len(records) / num_functions),
+      file=sys.stderr)
+  write_textual_afdo_profile(output_stream, records)
+
+  if cwp and benchmark:
+    cwp_num_after, benchmark_num_after, common_num_after = analyze_functions(
+        records, cwp_records, benchmark_records)
+    print(
+        'Retained %d/%d (%.1f%%) functions only appear in the CWP profile' %
+        (cwp_num_after, cwp_num, 100.0 * cwp_num_after / cwp_num),
+        file=sys.stderr)
+    print(
+        'Retained %d/%d (%.1f%%) functions only appear in the benchmark profile'
+        % (benchmark_num_after, benchmark_num,
+           100.0 * benchmark_num_after / benchmark_num),
+        file=sys.stderr)
+    print(
+        'Retained %d/%d (%.1f%%) functions appear in both CWP and benchmark'
+        ' profiles' % (common_num_after, common_num,
+                       100.0 * common_num_after / common_num),
+        file=sys.stderr)
+
+
+def main():
+  parser = argparse.ArgumentParser(
+      description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
+  parser.add_argument(
+      '--input',
+      default='/dev/stdin',
+      help='File to read from. Defaults to stdin.')
+  parser.add_argument(
+      '--output',
+      default='/dev/stdout',
+      help='File to write to. Defaults to stdout.')
+  parser.add_argument(
+      '--number',
+      type=int,
+      required=True,
+      help='Number of functions to retain in the profile.')
+  parser.add_argument(
+      '--cwp', help='Textualized CWP profiles, used for further analysis')
+  parser.add_argument(
+      '--benchmark',
+      help='Textualized benchmark profile, used for further analysis')
+  args = parser.parse_args()
+
+  if not args.number:
+    parser.error("It's invalid to remove the number of functions to 0.")
+
+  if (args.cwp and not args.benchmark) or (not args.cwp and args.benchmark):
+    parser.error('Please specify both --cwp and --benchmark')
+
+  with open(args.input) as stdin:
+    with open(args.output, 'w') as stdout:
+      # When user specify textualized cwp and benchmark profiles, perform
+      # the analysis. Otherwise, just trim the cold functions from profile.
+      if args.cwp and args.benchmark:
+        with open(args.cwp) as cwp:
+          with open(args.benchmark) as benchmark:
+            run(stdin, stdout, args.number, cwp, benchmark)
+      else:
+        run(stdin, stdout, args.number)
+
+
+if __name__ == '__main__':
+  main()
diff --git a/afdo_redaction/remove_cold_functions_test.py b/afdo_redaction/remove_cold_functions_test.py
new file mode 100755
index 00000000..14f946b0
--- /dev/null
+++ b/afdo_redaction/remove_cold_functions_test.py
@@ -0,0 +1,146 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Copyright 2020 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Tests for remove_cold_functions."""
+
+from __future__ import print_function
+
+import io
+from unittest.mock import patch
+import unittest
+
+from afdo_redaction import remove_cold_functions
+
+
+def _construct_profile(indices=None):
+  real_world_profile_functions = [
+      """SomeFunction1:24150:300
+ 2: 75
+ 3: 23850
+ 39: 225
+      """,
+      """SomeFunction2:8925:225
+ 0: 225
+ 0.2: 150
+ 0.1: SomeFunction2:6300
+  3: 6300
+ 0.2: SomeFunction2:150
+  3: 75
+      """,
+      """SomeFunction3:7500:75
+ 0: 75
+ 0.2: 75
+ 0.1: SomeFunction3:6600
+  1: 6600
+ 0.2: SomeFunction3:75
+  1: 75
+      """,
+      """LargerFunction4:51450:0
+ 1: 0
+ 3: 0
+ 3.1: 7350
+ 4: 7350
+ 7: 7350
+ 8: 7350
+ 9: 7350
+ 12: 0
+ 15: 0
+ 13: AnotherFunction5:0
+  3: 0
+  3.1: 0
+  3.2: 0
+  4: 0
+  5: 0
+  6: 0
+  7: 0
+  8: 0
+  9: 0
+      """,
+      """SomeFakeFunction5:7500:75
+ 0: 75
+ 0.2: 75
+ 0.1: SomeFakeFunction5:6600
+  1: 6600
+ 0.2: SomeFakeFunction5:75
+  1: 75
+      """,
+  ]
+
+  ret = []
+  if not indices:
+    for x in real_world_profile_functions:
+      ret += x.strip().splitlines()
+    return ret
+
+  ret = []
+  for i in indices:
+    ret += real_world_profile_functions[i].strip().splitlines()
+  return ret
+
+
+def _run_test(input_lines, goal, cwp_file=None, benchmark_file=None):
+  input_buf = io.StringIO('\n'.join(input_lines))
+  output_buf = io.StringIO()
+  remove_cold_functions.run(input_buf, output_buf, goal, cwp_file,
+                            benchmark_file)
+  return output_buf.getvalue().splitlines()
+
+
+class Test(unittest.TestCase):
+  """Test functions in remove_cold_functions.py"""
+
+  def test_empty_profile(self):
+    self.assertEqual(_run_test([], 0), [])
+
+  def test_remove_all_functions_fail(self):
+    input_profile_lines = _construct_profile()
+    with self.assertRaises(Exception) as context:
+      _run_test(input_profile_lines, 0)
+    self.assertEqual(
+        str(context.exception),
+        "It's invalid to remove all functions in the profile")
+
+  def test_remove_cold_functions_work(self):
+    input_profile_lines = _construct_profile()
+    # To make sure the cold functions are removed in order
+    expected_profile_lines = {
+        5: input_profile_lines,
+        # Entry 4 wins the tie breaker because the name is smaller
+        # alphabetically.
+        4: _construct_profile([0, 1, 3, 4]),
+        3: _construct_profile([0, 1, 3]),
+        2: _construct_profile([0, 3]),
+        1: _construct_profile([3]),
+    }
+
+    for num in expected_profile_lines:
+      self.assertCountEqual(
+          _run_test(input_profile_lines, num), expected_profile_lines[num])
+
+  def test_analyze_cwp_and_benchmark_work(self):
+    input_profile_lines = _construct_profile()
+    cwp_profile = _construct_profile([0, 1, 3, 4])
+    benchmark_profile = _construct_profile([1, 2, 3, 4])
+    cwp_buf = io.StringIO('\n'.join(cwp_profile))
+    benchmark_buf = io.StringIO('\n'.join(benchmark_profile))
+    with patch('sys.stderr', new=io.StringIO()) as fake_output:
+      _run_test(input_profile_lines, 3, cwp_buf, benchmark_buf)
+
+    output = fake_output.getvalue()
+    self.assertIn('Retained 3/5 (60.0%) functions in the profile', output)
+    self.assertIn(
+        'Retained 1/1 (100.0%) functions only appear in the CWP profile',
+        output)
+    self.assertIn(
+        'Retained 0/1 (0.0%) functions only appear in the benchmark profile',
+        output)
+    self.assertIn(
+        'Retained 2/3 (66.7%) functions appear in both CWP and benchmark'
+        ' profiles', output)
+
+
+if __name__ == '__main__':
+  unittest.main()