afdo_redaction/remove_cold_functions.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright 2020 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

"""Script to remove cold functions in an textual AFDO profile.

The script will look through the AFDO profile to find all the function
records. Then it'll start with the functions with lowest sample count and
remove it from the profile, until the total remaining functions in the
profile meets the given number. When there are many functions having the
same sample count, we need to remove all of them in order to meet the
target, so the result profile will always have less than or equal to the
given number of functions.

The script is intended to be used on production Chrome OS profiles, after
other redaction/trimming scripts. It can be used with given textual CWP
and benchmark profiles, in order to analyze how many removed functions are
from which profile (or both), which can be used an indicator of fairness
during the removal.

This is part of the effort to stablize the impact of AFDO profile on
Chrome binary size. See crbug.com/1062014 for more context.
"""

from __future__ import division, print_function

import argparse
import collections
import re
import sys

_function_line_re = re.compile(r'^([\w\$\.@]+):(\d+)(?::\d+)?$')
ProfileRecord = collections.namedtuple(
    'ProfileRecord', ['function_count', 'function_body', 'function_name'])


def _read_sample_count(line):
  m = _function_line_re.match(line)
  assert m, 'Failed to interpret function line %s' % line
  return m.group(1), int(m.group(2))


def _read_textual_afdo_profile(stream):
  """Parses an AFDO profile from a line stream into ProfileRecords."""
  # ProfileRecords are actually nested, due to inlining. For the purpose of
  # this script, that doesn't matter.
  lines = (line.rstrip() for line in stream)
  function_line = None
  samples = []
  ret = []
  for line in lines:
    if not line:
      continue

    if line[0].isspace():
      assert function_line is not None, 'sample exists outside of a function?'
      samples.append(line)
      continue

    if function_line is not None:
      name, count = _read_sample_count(function_line)
      body = [function_line] + samples
      ret.append(
          ProfileRecord(
              function_count=count, function_body=body, function_name=name))
    function_line = line
    samples = []

  if function_line is not None:
    name, count = _read_sample_count(function_line)
    body = [function_line] + samples
    ret.append(
        ProfileRecord(
            function_count=count, function_body=body, function_name=name))
  return ret


def write_textual_afdo_profile(stream, records):
  for r in records:
    print('\n'.join(r.function_body), file=stream)


def analyze_functions(records, cwp, benchmark):
  cwp_functions = {x.function_name for x in cwp}
  benchmark_functions = {x.function_name for x in benchmark}
  all_functions = {x.function_name for x in records}
  cwp_only_functions = len((all_functions & cwp_functions) -
                           benchmark_functions)
  benchmark_only_functions = len((all_functions & benchmark_functions) -
                                 cwp_functions)
  common_functions = len(all_functions & benchmark_functions & cwp_functions)
  none_functions = len(all_functions - benchmark_functions - cwp_functions)

  assert not none_functions
  return cwp_only_functions, benchmark_only_functions, common_functions


def run(input_stream, output_stream, goal, cwp=None, benchmark=None):
  records = _read_textual_afdo_profile(input_stream)
  num_functions = len(records)
  if not num_functions:
    return
  assert goal, "It's invalid to remove all functions in the profile"

  if cwp and benchmark:
    cwp_records = _read_textual_afdo_profile(cwp)
    benchmark_records = _read_textual_afdo_profile(benchmark)
    cwp_num, benchmark_num, common_num = analyze_functions(
        records, cwp_records, benchmark_records)

  records.sort(key=lambda x: (-x.function_count, x.function_name))
  records = records[:goal]

  print(
      'Retained %d/%d (%.1f%%) functions in the profile' %
      (len(records), num_functions, 100.0 * len(records) / num_functions),
      file=sys.stderr)
  write_textual_afdo_profile(output_stream, records)

  if cwp and benchmark:
    cwp_num_after, benchmark_num_after, common_num_after = analyze_functions(
        records, cwp_records, benchmark_records)
    print(
        'Retained %d/%d (%.1f%%) functions only appear in the CWP profile' %
        (cwp_num_after, cwp_num, 100.0 * cwp_num_after / cwp_num),
        file=sys.stderr)
    print(
        'Retained %d/%d (%.1f%%) functions only appear in the benchmark profile'
        % (benchmark_num_after, benchmark_num,
           100.0 * benchmark_num_after / benchmark_num),
        file=sys.stderr)
    print(
        'Retained %d/%d (%.1f%%) functions appear in both CWP and benchmark'
        ' profiles' % (common_num_after, common_num,
                       100.0 * common_num_after / common_num),
        file=sys.stderr)


def main():
  parser = argparse.ArgumentParser(
      description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
  parser.add_argument(
      '--input',
      default='/dev/stdin',
      help='File to read from. Defaults to stdin.')
  parser.add_argument(
      '--output',
      default='/dev/stdout',
      help='File to write to. Defaults to stdout.')
  parser.add_argument(
      '--number',
      type=int,
      required=True,
      help='Number of functions to retain in the profile.')
  parser.add_argument(
      '--cwp', help='Textualized CWP profiles, used for further analysis')
  parser.add_argument(
      '--benchmark',
      help='Textualized benchmark profile, used for further analysis')
  args = parser.parse_args()

  if not args.number:
    parser.error("It's invalid to remove the number of functions to 0.")

  if (args.cwp and not args.benchmark) or (not args.cwp and args.benchmark):
    parser.error('Please specify both --cwp and --benchmark')

  with open(args.input) as stdin:
    with open(args.output, 'w') as stdout:
      # When user specify textualized cwp and benchmark profiles, perform
      # the analysis. Otherwise, just trim the cold functions from profile.
      if args.cwp and args.benchmark:
        with open(args.cwp) as cwp:
          with open(args.benchmark) as benchmark:
            run(stdin, stdout, args.number, cwp, benchmark)
      else:
        run(stdin, stdout, args.number)


if __name__ == '__main__':
  main()