diff options
Diffstat (limited to 'afdo_redaction')
-rwxr-xr-x | afdo_redaction/remove_cold_functions.py | 183 | ||||
-rwxr-xr-x | afdo_redaction/remove_cold_functions_test.py | 146 |
2 files changed, 329 insertions, 0 deletions
diff --git a/afdo_redaction/remove_cold_functions.py b/afdo_redaction/remove_cold_functions.py new file mode 100755 index 00000000..097085db --- /dev/null +++ b/afdo_redaction/remove_cold_functions.py @@ -0,0 +1,183 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# Copyright 2020 The Chromium OS Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""Script to remove cold functions in an textual AFDO profile. + +The script will look through the AFDO profile to find all the function +records. Then it'll start with the functions with lowest sample count and +remove it from the profile, until the total remaining functions in the +profile meets the given number. When there are many functions having the +same sample count, we need to remove all of them in order to meet the +target, so the result profile will always have less than or equal to the +given number of functions. + +The script is intended to be used on production Chrome OS profiles, after +other redaction/trimming scripts. It can be used with given textual CWP +and benchmark profiles, in order to analyze how many removed functions are +from which profile (or both), which can be used an indicator of fairness +during the removal. + +This is part of the effort to stablize the impact of AFDO profile on +Chrome binary size. See crbug.com/1062014 for more context. +""" + +from __future__ import division, print_function + +import argparse +import collections +import re +import sys + +_function_line_re = re.compile(r'^([\w\$\.@]+):(\d+)(?::\d+)?$') +ProfileRecord = collections.namedtuple( + 'ProfileRecord', ['function_count', 'function_body', 'function_name']) + + +def _read_sample_count(line): + m = _function_line_re.match(line) + assert m, 'Failed to interpret function line %s' % line + return m.group(1), int(m.group(2)) + + +def _read_textual_afdo_profile(stream): + """Parses an AFDO profile from a line stream into ProfileRecords.""" + # ProfileRecords are actually nested, due to inlining. For the purpose of + # this script, that doesn't matter. + lines = (line.rstrip() for line in stream) + function_line = None + samples = [] + ret = [] + for line in lines: + if not line: + continue + + if line[0].isspace(): + assert function_line is not None, 'sample exists outside of a function?' + samples.append(line) + continue + + if function_line is not None: + name, count = _read_sample_count(function_line) + body = [function_line] + samples + ret.append( + ProfileRecord( + function_count=count, function_body=body, function_name=name)) + function_line = line + samples = [] + + if function_line is not None: + name, count = _read_sample_count(function_line) + body = [function_line] + samples + ret.append( + ProfileRecord( + function_count=count, function_body=body, function_name=name)) + return ret + + +def write_textual_afdo_profile(stream, records): + for r in records: + print('\n'.join(r.function_body), file=stream) + + +def analyze_functions(records, cwp, benchmark): + cwp_functions = {x.function_name for x in cwp} + benchmark_functions = {x.function_name for x in benchmark} + all_functions = {x.function_name for x in records} + cwp_only_functions = len((all_functions & cwp_functions) - + benchmark_functions) + benchmark_only_functions = len((all_functions & benchmark_functions) - + cwp_functions) + common_functions = len(all_functions & benchmark_functions & cwp_functions) + none_functions = len(all_functions - benchmark_functions - cwp_functions) + + assert not none_functions + return cwp_only_functions, benchmark_only_functions, common_functions + + +def run(input_stream, output_stream, goal, cwp=None, benchmark=None): + records = _read_textual_afdo_profile(input_stream) + num_functions = len(records) + if not num_functions: + return + assert goal, "It's invalid to remove all functions in the profile" + + if cwp and benchmark: + cwp_records = _read_textual_afdo_profile(cwp) + benchmark_records = _read_textual_afdo_profile(benchmark) + cwp_num, benchmark_num, common_num = analyze_functions( + records, cwp_records, benchmark_records) + + records.sort(key=lambda x: (-x.function_count, x.function_name)) + records = records[:goal] + + print( + 'Retained %d/%d (%.1f%%) functions in the profile' % + (len(records), num_functions, 100.0 * len(records) / num_functions), + file=sys.stderr) + write_textual_afdo_profile(output_stream, records) + + if cwp and benchmark: + cwp_num_after, benchmark_num_after, common_num_after = analyze_functions( + records, cwp_records, benchmark_records) + print( + 'Retained %d/%d (%.1f%%) functions only appear in the CWP profile' % + (cwp_num_after, cwp_num, 100.0 * cwp_num_after / cwp_num), + file=sys.stderr) + print( + 'Retained %d/%d (%.1f%%) functions only appear in the benchmark profile' + % (benchmark_num_after, benchmark_num, + 100.0 * benchmark_num_after / benchmark_num), + file=sys.stderr) + print( + 'Retained %d/%d (%.1f%%) functions appear in both CWP and benchmark' + ' profiles' % (common_num_after, common_num, + 100.0 * common_num_after / common_num), + file=sys.stderr) + + +def main(): + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument( + '--input', + default='/dev/stdin', + help='File to read from. Defaults to stdin.') + parser.add_argument( + '--output', + default='/dev/stdout', + help='File to write to. Defaults to stdout.') + parser.add_argument( + '--number', + type=int, + required=True, + help='Number of functions to retain in the profile.') + parser.add_argument( + '--cwp', help='Textualized CWP profiles, used for further analysis') + parser.add_argument( + '--benchmark', + help='Textualized benchmark profile, used for further analysis') + args = parser.parse_args() + + if not args.number: + parser.error("It's invalid to remove the number of functions to 0.") + + if (args.cwp and not args.benchmark) or (not args.cwp and args.benchmark): + parser.error('Please specify both --cwp and --benchmark') + + with open(args.input) as stdin: + with open(args.output, 'w') as stdout: + # When user specify textualized cwp and benchmark profiles, perform + # the analysis. Otherwise, just trim the cold functions from profile. + if args.cwp and args.benchmark: + with open(args.cwp) as cwp: + with open(args.benchmark) as benchmark: + run(stdin, stdout, args.number, cwp, benchmark) + else: + run(stdin, stdout, args.number) + + +if __name__ == '__main__': + main() diff --git a/afdo_redaction/remove_cold_functions_test.py b/afdo_redaction/remove_cold_functions_test.py new file mode 100755 index 00000000..14f946b0 --- /dev/null +++ b/afdo_redaction/remove_cold_functions_test.py @@ -0,0 +1,146 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# Copyright 2020 The Chromium OS Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""Tests for remove_cold_functions.""" + +from __future__ import print_function + +import io +from unittest.mock import patch +import unittest + +from afdo_redaction import remove_cold_functions + + +def _construct_profile(indices=None): + real_world_profile_functions = [ + """SomeFunction1:24150:300 + 2: 75 + 3: 23850 + 39: 225 + """, + """SomeFunction2:8925:225 + 0: 225 + 0.2: 150 + 0.1: SomeFunction2:6300 + 3: 6300 + 0.2: SomeFunction2:150 + 3: 75 + """, + """SomeFunction3:7500:75 + 0: 75 + 0.2: 75 + 0.1: SomeFunction3:6600 + 1: 6600 + 0.2: SomeFunction3:75 + 1: 75 + """, + """LargerFunction4:51450:0 + 1: 0 + 3: 0 + 3.1: 7350 + 4: 7350 + 7: 7350 + 8: 7350 + 9: 7350 + 12: 0 + 15: 0 + 13: AnotherFunction5:0 + 3: 0 + 3.1: 0 + 3.2: 0 + 4: 0 + 5: 0 + 6: 0 + 7: 0 + 8: 0 + 9: 0 + """, + """SomeFakeFunction5:7500:75 + 0: 75 + 0.2: 75 + 0.1: SomeFakeFunction5:6600 + 1: 6600 + 0.2: SomeFakeFunction5:75 + 1: 75 + """, + ] + + ret = [] + if not indices: + for x in real_world_profile_functions: + ret += x.strip().splitlines() + return ret + + ret = [] + for i in indices: + ret += real_world_profile_functions[i].strip().splitlines() + return ret + + +def _run_test(input_lines, goal, cwp_file=None, benchmark_file=None): + input_buf = io.StringIO('\n'.join(input_lines)) + output_buf = io.StringIO() + remove_cold_functions.run(input_buf, output_buf, goal, cwp_file, + benchmark_file) + return output_buf.getvalue().splitlines() + + +class Test(unittest.TestCase): + """Test functions in remove_cold_functions.py""" + + def test_empty_profile(self): + self.assertEqual(_run_test([], 0), []) + + def test_remove_all_functions_fail(self): + input_profile_lines = _construct_profile() + with self.assertRaises(Exception) as context: + _run_test(input_profile_lines, 0) + self.assertEqual( + str(context.exception), + "It's invalid to remove all functions in the profile") + + def test_remove_cold_functions_work(self): + input_profile_lines = _construct_profile() + # To make sure the cold functions are removed in order + expected_profile_lines = { + 5: input_profile_lines, + # Entry 4 wins the tie breaker because the name is smaller + # alphabetically. + 4: _construct_profile([0, 1, 3, 4]), + 3: _construct_profile([0, 1, 3]), + 2: _construct_profile([0, 3]), + 1: _construct_profile([3]), + } + + for num in expected_profile_lines: + self.assertCountEqual( + _run_test(input_profile_lines, num), expected_profile_lines[num]) + + def test_analyze_cwp_and_benchmark_work(self): + input_profile_lines = _construct_profile() + cwp_profile = _construct_profile([0, 1, 3, 4]) + benchmark_profile = _construct_profile([1, 2, 3, 4]) + cwp_buf = io.StringIO('\n'.join(cwp_profile)) + benchmark_buf = io.StringIO('\n'.join(benchmark_profile)) + with patch('sys.stderr', new=io.StringIO()) as fake_output: + _run_test(input_profile_lines, 3, cwp_buf, benchmark_buf) + + output = fake_output.getvalue() + self.assertIn('Retained 3/5 (60.0%) functions in the profile', output) + self.assertIn( + 'Retained 1/1 (100.0%) functions only appear in the CWP profile', + output) + self.assertIn( + 'Retained 0/1 (0.0%) functions only appear in the benchmark profile', + output) + self.assertIn( + 'Retained 2/3 (66.7%) functions appear in both CWP and benchmark' + ' profiles', output) + + +if __name__ == '__main__': + unittest.main() |