aboutsummaryrefslogtreecommitdiff
path: root/afdo_redaction/redact_profile.py
diff options
context:
space:
mode:
Diffstat (limited to 'afdo_redaction/redact_profile.py')
-rwxr-xr-xafdo_redaction/redact_profile.py306
1 files changed, 160 insertions, 146 deletions
diff --git a/afdo_redaction/redact_profile.py b/afdo_redaction/redact_profile.py
index 02bae928..0779d2ac 100755
--- a/afdo_redaction/redact_profile.py
+++ b/afdo_redaction/redact_profile.py
@@ -1,6 +1,6 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
-# Copyright 2018 The Chromium OS Authors. All rights reserved.
+# Copyright 2018 The ChromiumOS Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
@@ -24,7 +24,6 @@ It reads a textual AFDO profile from stdin, and prints a 'fixed' version of it
to stdout. A summary of what the script actually did is printed to stderr.
"""
-from __future__ import division, print_function
import collections
import re
@@ -32,23 +31,23 @@ import sys
def _count_samples(samples):
- """Count the total number of samples in a function."""
- line_re = re.compile(r'^(\s*)\d+(?:\.\d+)?: (\d+)\s*$')
+ """Count the total number of samples in a function."""
+ line_re = re.compile(r"^(\s*)\d+(?:\.\d+)?: (\d+)\s*$")
- top_level_samples = 0
- all_samples = 0
- for line in samples:
- m = line_re.match(line)
- if not m:
- continue
+ top_level_samples = 0
+ all_samples = 0
+ for line in samples:
+ m = line_re.match(line)
+ if not m:
+ continue
- spaces, n = m.groups()
- n = int(n)
- all_samples += n
- if len(spaces) == 1:
- top_level_samples += n
+ spaces, n = m.groups()
+ n = int(n)
+ all_samples += n
+ if len(spaces) == 1:
+ top_level_samples += n
- return top_level_samples, all_samples
+ return top_level_samples, all_samples
# A ProfileRecord is a set of samples for a top-level symbol in a textual AFDO
@@ -80,70 +79,75 @@ def _count_samples(samples):
# And samples look like one of:
# arbitrary_number: sample_count
# arbitrary_number: inlined_function_symbol:inlined_entry_count
-ProfileRecord = collections.namedtuple('ProfileRecord',
- ['function_line', 'samples'])
+ProfileRecord = collections.namedtuple(
+ "ProfileRecord", ["function_line", "samples"]
+)
def _normalize_samples(samples):
- """Normalizes the samples in the given function body.
-
- Normalization just means that we redact inlined function names. This is
- done so that a bit of templating doesn't make two function bodies look
- distinct. Namely:
-
- template <typename T>
- __attribute__((noinline))
- int getNumber() { return 1; }
-
- template <typename T>
- __attribute__((noinline))
- int getNumberIndirectly() { return getNumber<T>(); }
-
- int main() {
- return getNumber<int>() + getNumber<float>();
- }
-
- If the profile has the mangled name for getNumber<float> in
- getNumberIndirectly<float> (and similar for <int>), we'll consider them to
- be distinct when they're not.
- """
-
- # I'm not actually sure if this ends up being an issue in practice, but it's
- # simple enough to guard against.
- inlined_re = re.compile(r'(^\s*\d+): [^:]+:(\s*\d+)\s*$')
- result = []
- for s in samples:
- m = inlined_re.match(s)
- if m:
- result.append('%s: __REDACTED__:%s' % m.groups())
- else:
- result.append(s)
- return tuple(result)
+ """Normalizes the samples in the given function body.
+
+ Normalization just means that we redact inlined function names. This is
+ done so that a bit of templating doesn't make two function bodies look
+ distinct. Namely:
+
+ template <typename T>
+ __attribute__((noinline))
+ int getNumber() { return 1; }
+
+ template <typename T>
+ __attribute__((noinline))
+ int getNumberIndirectly() { return getNumber<T>(); }
+
+ int main() {
+ return getNumber<int>() + getNumber<float>();
+ }
+
+ If the profile has the mangled name for getNumber<float> in
+ getNumberIndirectly<float> (and similar for <int>), we'll consider them to
+ be distinct when they're not.
+ """
+
+ # I'm not actually sure if this ends up being an issue in practice, but it's
+ # simple enough to guard against.
+ inlined_re = re.compile(r"(^\s*\d+): [^:]+:(\s*\d+)\s*$")
+ result = []
+ for s in samples:
+ m = inlined_re.match(s)
+ if m:
+ result.append("%s: __REDACTED__:%s" % m.groups())
+ else:
+ result.append(s)
+ return tuple(result)
def _read_textual_afdo_profile(stream):
- """Parses an AFDO profile from a line stream into ProfileRecords."""
- # ProfileRecords are actually nested, due to inlining. For the purpose of
- # this script, that doesn't matter.
- lines = (line.rstrip() for line in stream)
- function_line = None
- samples = []
- for line in lines:
- if not line:
- continue
-
- if line[0].isspace():
- assert function_line is not None, 'sample exists outside of a function?'
- samples.append(line)
- continue
-
- if function_line is not None:
- yield ProfileRecord(function_line=function_line, samples=tuple(samples))
- function_line = line
+ """Parses an AFDO profile from a line stream into ProfileRecords."""
+ # ProfileRecords are actually nested, due to inlining. For the purpose of
+ # this script, that doesn't matter.
+ lines = (line.rstrip() for line in stream)
+ function_line = None
samples = []
+ for line in lines:
+ if not line:
+ continue
+
+ if line[0].isspace():
+ assert (
+ function_line is not None
+ ), "sample exists outside of a function?"
+ samples.append(line)
+ continue
+
+ if function_line is not None:
+ yield ProfileRecord(
+ function_line=function_line, samples=tuple(samples)
+ )
+ function_line = line
+ samples = []
- if function_line is not None:
- yield ProfileRecord(function_line=function_line, samples=tuple(samples))
+ if function_line is not None:
+ yield ProfileRecord(function_line=function_line, samples=tuple(samples))
# The default of 100 is arbitrarily selected, but it does make the overwhelming
@@ -157,86 +161,96 @@ def _read_textual_afdo_profile(stream):
# Non-nm based approaches are superior because they don't require any prior
# build artifacts; just an AFDO profile.
def dedup_records(profile_records, summary_file, max_repeats=100):
- """Removes heavily duplicated records from profile_records.
-
- profile_records is expected to be an iterable of ProfileRecord.
- max_repeats ia how many functions must share identical bodies for us to
- consider it 'heavily duplicated' and remove the results.
- """
-
- # Build a mapping of function structure -> list of functions with identical
- # structure and sample counts
- counts = collections.defaultdict(list)
- for record in profile_records:
- counts[_normalize_samples(record.samples)].append(record)
-
- # Be sure that we didn't see any duplicate functions, since that's bad...
- total_functions_recorded = sum(len(records) for records in counts.values())
-
- unique_function_names = {
- record.function_line.split(':')[0]
- for records in counts.values()
- for record in records
- }
-
- assert len(unique_function_names) == total_functions_recorded, \
- 'duplicate function names?'
-
- num_kept = 0
- num_samples_kept = 0
- num_top_samples_kept = 0
- num_total = 0
- num_samples_total = 0
- num_top_samples_total = 0
-
- for normalized_samples, records in counts.items():
- top_sample_count, all_sample_count = _count_samples(normalized_samples)
- top_sample_count *= len(records)
- all_sample_count *= len(records)
-
- num_total += len(records)
- num_samples_total += all_sample_count
- num_top_samples_total += top_sample_count
-
- if len(records) >= max_repeats:
- continue
-
- num_kept += len(records)
- num_samples_kept += all_sample_count
- num_top_samples_kept += top_sample_count
- for record in records:
- yield record
-
- print(
- 'Retained {:,}/{:,} functions'.format(num_kept, num_total),
- file=summary_file)
- print(
- 'Retained {:,}/{:,} samples, total'.format(num_samples_kept,
- num_samples_total),
- file=summary_file)
- print('Retained {:,}/{:,} top-level samples' \
- .format(num_top_samples_kept, num_top_samples_total),
- file=summary_file)
+ """Removes heavily duplicated records from profile_records.
+
+ profile_records is expected to be an iterable of ProfileRecord.
+ max_repeats ia how many functions must share identical bodies for us to
+ consider it 'heavily duplicated' and remove the results.
+ """
+
+ # Build a mapping of function structure -> list of functions with identical
+ # structure and sample counts
+ counts = collections.defaultdict(list)
+ for record in profile_records:
+ counts[_normalize_samples(record.samples)].append(record)
+
+ # Be sure that we didn't see any duplicate functions, since that's bad...
+ total_functions_recorded = sum(len(records) for records in counts.values())
+
+ unique_function_names = {
+ record.function_line.split(":")[0]
+ for records in counts.values()
+ for record in records
+ }
+
+ assert (
+ len(unique_function_names) == total_functions_recorded
+ ), "duplicate function names?"
+
+ num_kept = 0
+ num_samples_kept = 0
+ num_top_samples_kept = 0
+ num_total = 0
+ num_samples_total = 0
+ num_top_samples_total = 0
+
+ for normalized_samples, records in counts.items():
+ top_sample_count, all_sample_count = _count_samples(normalized_samples)
+ top_sample_count *= len(records)
+ all_sample_count *= len(records)
+
+ num_total += len(records)
+ num_samples_total += all_sample_count
+ num_top_samples_total += top_sample_count
+
+ if len(records) >= max_repeats:
+ continue
+
+ num_kept += len(records)
+ num_samples_kept += all_sample_count
+ num_top_samples_kept += top_sample_count
+ for record in records:
+ yield record
+
+ print(
+ "Retained {:,}/{:,} functions".format(num_kept, num_total),
+ file=summary_file,
+ )
+ print(
+ "Retained {:,}/{:,} samples, total".format(
+ num_samples_kept, num_samples_total
+ ),
+ file=summary_file,
+ )
+ print(
+ "Retained {:,}/{:,} top-level samples".format(
+ num_top_samples_kept, num_top_samples_total
+ ),
+ file=summary_file,
+ )
def run(profile_input_file, summary_output_file, profile_output_file):
- profile_records = _read_textual_afdo_profile(profile_input_file)
+ profile_records = _read_textual_afdo_profile(profile_input_file)
- # Sort this so we get deterministic output. AFDO doesn't care what order it's
- # in.
- deduped = sorted(
- dedup_records(profile_records, summary_output_file),
- key=lambda r: r.function_line)
- for function_line, samples in deduped:
- print(function_line, file=profile_output_file)
- print('\n'.join(samples), file=profile_output_file)
+ # Sort this so we get deterministic output. AFDO doesn't care what order it's
+ # in.
+ deduped = sorted(
+ dedup_records(profile_records, summary_output_file),
+ key=lambda r: r.function_line,
+ )
+ for function_line, samples in deduped:
+ print(function_line, file=profile_output_file)
+ print("\n".join(samples), file=profile_output_file)
def _main():
- run(profile_input_file=sys.stdin,
- summary_output_file=sys.stderr,
- profile_output_file=sys.stdout)
+ run(
+ profile_input_file=sys.stdin,
+ summary_output_file=sys.stderr,
+ profile_output_file=sys.stdout,
+ )
-if __name__ == '__main__':
- _main()
+if __name__ == "__main__":
+ _main()