afdo_redaction/redact_profile.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236

#!/usr/bin/env python2
# -*- coding: utf-8 -*-
# Copyright 2018 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

"""Script to redact apparent ICF'ed symbolsfrom textual AFDO profiles.

AFDO sampling and ICF have an unfortunate interaction that causes a huge
inflation in sample counts. Essentially, if you have N functions ICF'ed to the
same location, one AFDO sample in any of those N functions will count as one
sample in *each* of those N functions.

In practice, there are a few forms of function bodies that are very heavily
ICF'ed (e.g. `ret`, `xor %eax, %eax; ret`, destructors for widely-used types
like std::map...). Recording 28,000 samples across all N thousand logical
functions that point to the same body really hurts our AFDO numbers, given that
our actual sample count across all of Chrome is something around 10,000,000.
(No, really, these are actual numbers. In practice, at the time of writing,
this script eliminates >90% of our AFDO samples by count. Sometimes as high as
98%.)

It reads a textual AFDO profile from stdin, and prints a 'fixed' version of it
to stdout. A summary of what the script actually did is printed to stderr.
"""

from __future__ import division, print_function

import collections
import re
import sys

def _count_samples(samples):
  """Count the total number of samples in a function."""
  line_re = re.compile(r'^(\s*)\d+(?:\.\d+)?: (\d+)\s*$')

  top_level_samples = 0
  all_samples = 0
  for line in samples:
    m = line_re.match(line)
    if not m:
      continue

    spaces, n = m.groups()
    n = int(n)
    all_samples += n
    if len(spaces) == 1:
      top_level_samples += n

  return top_level_samples, all_samples


# A ProfileRecord is a set of samples for a top-level symbol in a textual AFDO
# profile. function_line is the top line of said function, and `samples` is
# a list of all of the sample lines below that.
#
# We rely on the format of these strings in some places in this script. For
# reference, a full function sample will look something like:
#
# _ZNK5blink10PaintLayer19GetCompositingStateEv:4530:185
#  6: 83
#  15: 126
#  62832: 126
#  6: _ZNK5blink10PaintLayer14GroupedMappingEv:2349
#   1: 206
#   1: _ZNK5blink10PaintLayer14GroupedMappersEv:2060
#    1: 206
#  11: _ZNK5blink10PaintLayer25GetCompositedLayerMappingEv:800
#   2.1: 80
#
#
# In that case, function_line is
# '_ZNK5blink10PaintLayer19GetCompositingStateEv:4530:185', and samples will be
# every line below that.
#
# Function lines look like;
# function_symbol:entry_count:dont_care
#
# And samples look like one of:
#  arbitrary_number: sample_count
#  arbitrary_number: inlined_function_symbol:inlined_entry_count
ProfileRecord = collections.namedtuple('ProfileRecord',
                                       ['function_line', 'samples'])


def _normalize_samples(samples):
  """Normalizes the samples in the given function body.

  Normalization just means that we redact inlined function names. This is
  done so that a bit of templating doesn't make two function bodies look
  distinct. Namely:

  template <typename T>
  __attribute__((noinline))
  int getNumber() { return 1; }

  template <typename T>
  __attribute__((noinline))
  int getNumberIndirectly() { return getNumber<T>(); }

  int main() {
    return getNumber<int>() + getNumber<float>();
  }

  If the profile has the mangled name for getNumber<float> in
  getNumberIndirectly<float> (and similar for <int>), we'll consider them to
  be distinct when they're not.
  """

  # I'm not actually sure if this ends up being an issue in practice, but it's
  # simple enough to guard against.
  inlined_re = re.compile(r'(^\s*\d+): [^:]+:(\s*\d+)\s*$')
  result = []
  for s in samples:
    m = inlined_re.match(s)
    if m:
      result.append('%s: __REDACTED__:%s' % m.groups())
    else:
      result.append(s)
  return tuple(result)


def _read_textual_afdo_profile(stream):
  """Parses an AFDO profile from a line stream into ProfileRecords."""
  # ProfileRecords are actually nested, due to inlining. For the purpose of
  # this script, that doesn't matter.
  lines = (line.rstrip() for line in stream)
  function_line = None
  samples = []
  for line in lines:
    if not line:
      continue

    if line[0].isspace():
      assert function_line is not None, "sample exists outside of a function?"
      samples.append(line)
      continue

    if function_line is not None:
      yield ProfileRecord(function_line=function_line, samples=tuple(samples))
    function_line = line
    samples = []

  if function_line is not None:
    yield ProfileRecord(function_line=function_line, samples=tuple(samples))


# The default of 100 is arbitrarily selected, but it does make the overwhelming
# majority of obvious sample duplication disappear.
#
# We experimented shortly with an nm-powered version of this script (rather
# than structural matching, we'd see which functions mapped to the same literal
# address). Running this with a high value (100) for max_repeats produced
# results basically indistinguishable from nm, so ...
#
# Non-nm based approaches are superior because they don't require any prior
# build artifacts; just an AFDO profile.
def dedup_records(profile_records, summary_file, max_repeats=100):
  """Removes heavily duplicated records from profile_records.

  profile_records is expected to be an iterable of ProfileRecord.
  max_repeats ia how many functions must share identical bodies for us to
    consider it 'heavily duplicated' and remove the results.
  """

  # Build a mapping of function structure -> list of functions with identical
  # structure and sample counts
  counts = collections.defaultdict(list)
  for record in profile_records:
    counts[_normalize_samples(record.samples)].append(record)

  # Be sure that we didn't see any duplicate functions, since that's bad...
  total_functions_recorded = sum(len(records)
                                 for records in counts.itervalues())

  unique_function_names = set(record.function_line.split(':')[0]
                              for records in counts.itervalues()
                              for record in records)

  assert len(unique_function_names) == total_functions_recorded, \
      'duplicate function names?'

  num_kept = 0
  num_samples_kept = 0
  num_top_samples_kept = 0
  num_total = 0
  num_samples_total = 0
  num_top_samples_total = 0

  for normalized_samples, records in counts.iteritems():
    top_sample_count, all_sample_count = _count_samples(normalized_samples)
    top_sample_count *= len(records)
    all_sample_count *= len(records)

    num_total += len(records)
    num_samples_total += all_sample_count
    num_top_samples_total += top_sample_count

    if len(records) >= max_repeats:
      continue

    num_kept += len(records)
    num_samples_kept += all_sample_count
    num_top_samples_kept += top_sample_count
    for record in records:
      yield record

  print('Retained {:,}/{:,} functions'.format(num_kept, num_total),
        file=summary_file)
  print('Retained {:,}/{:,} samples, total'.format(num_samples_kept,
                                                   num_samples_total),
        file=summary_file)
  print('Retained {:,}/{:,} top-level samples' \
            .format(num_top_samples_kept, num_top_samples_total),
        file=summary_file)


def run(profile_input_file, summary_output_file, profile_output_file):
  profile_records = _read_textual_afdo_profile(profile_input_file)

  # Sort this so we get deterministic output. AFDO doesn't care what order it's
  # in.
  deduped = sorted(dedup_records(profile_records, summary_output_file),
                   key=lambda r: r.function_line)
  for function_line, samples in deduped:
    print(function_line, file=profile_output_file)
    print('\n'.join(samples), file=profile_output_file)


def _main():
  run(profile_input_file=sys.stdin, summary_output_file=sys.stderr,
      profile_output_file=sys.stdout)


if __name__ == '__main__':
  _main()