1 files changed, 160 insertions, 146 deletions
diff --git a/afdo_redaction/redact_profile.py b/afdo_redaction/redact_profile.py
index 02bae928..0779d2ac 100755
--- a/afdo_redaction/redact_profile.py
+++ b/afdo_redaction/redact_profile.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
-# Copyright 2018 The Chromium OS Authors. All rights reserved.
+# Copyright 2018 The ChromiumOS Authors
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 
@@ -24,7 +24,6 @@ It reads a textual AFDO profile from stdin, and prints a 'fixed' version of it
 to stdout. A summary of what the script actually did is printed to stderr.
 """
 
-from __future__ import division, print_function
 
 import collections
 import re
@@ -32,23 +31,23 @@ import sys
 
 
 def _count_samples(samples):
-  """Count the total number of samples in a function."""
-  line_re = re.compile(r'^(\s*)\d+(?:\.\d+)?: (\d+)\s*$')
+    """Count the total number of samples in a function."""
+    line_re = re.compile(r"^(\s*)\d+(?:\.\d+)?: (\d+)\s*$")
 
-  top_level_samples = 0
-  all_samples = 0
-  for line in samples:
-    m = line_re.match(line)
-    if not m:
-      continue
+    top_level_samples = 0
+    all_samples = 0
+    for line in samples:
+        m = line_re.match(line)
+        if not m:
+            continue
 
-    spaces, n = m.groups()
-    n = int(n)
-    all_samples += n
-    if len(spaces) == 1:
-      top_level_samples += n
+        spaces, n = m.groups()
+        n = int(n)
+        all_samples += n
+        if len(spaces) == 1:
+            top_level_samples += n
 
-  return top_level_samples, all_samples
+    return top_level_samples, all_samples
 
 
 # A ProfileRecord is a set of samples for a top-level symbol in a textual AFDO
@@ -80,70 +79,75 @@ def _count_samples(samples):
 # And samples look like one of:
 #  arbitrary_number: sample_count
 #  arbitrary_number: inlined_function_symbol:inlined_entry_count
-ProfileRecord = collections.namedtuple('ProfileRecord',
-                                       ['function_line', 'samples'])
+ProfileRecord = collections.namedtuple(
+    "ProfileRecord", ["function_line", "samples"]
+)
 
 
 def _normalize_samples(samples):
-  """Normalizes the samples in the given function body.
-
-  Normalization just means that we redact inlined function names. This is
-  done so that a bit of templating doesn't make two function bodies look
-  distinct. Namely:
-
-  template <typename T>
-  __attribute__((noinline))
-  int getNumber() { return 1; }
-
-  template <typename T>
-  __attribute__((noinline))
-  int getNumberIndirectly() { return getNumber<T>(); }
-
-  int main() {
-    return getNumber<int>() + getNumber<float>();
-  }
-
-  If the profile has the mangled name for getNumber<float> in
-  getNumberIndirectly<float> (and similar for <int>), we'll consider them to
-  be distinct when they're not.
-  """
-
-  # I'm not actually sure if this ends up being an issue in practice, but it's
-  # simple enough to guard against.
-  inlined_re = re.compile(r'(^\s*\d+): [^:]+:(\s*\d+)\s*$')
-  result = []
-  for s in samples:
-    m = inlined_re.match(s)
-    if m:
-      result.append('%s: __REDACTED__:%s' % m.groups())
-    else:
-      result.append(s)
-  return tuple(result)
+    """Normalizes the samples in the given function body.
+
+    Normalization just means that we redact inlined function names. This is
+    done so that a bit of templating doesn't make two function bodies look
+    distinct. Namely:
+
+    template <typename T>
+    __attribute__((noinline))
+    int getNumber() { return 1; }
+
+    template <typename T>
+    __attribute__((noinline))
+    int getNumberIndirectly() { return getNumber<T>(); }
+
+    int main() {
+      return getNumber<int>() + getNumber<float>();
+    }
+
+    If the profile has the mangled name for getNumber<float> in
+    getNumberIndirectly<float> (and similar for <int>), we'll consider them to
+    be distinct when they're not.
+    """
+
+    # I'm not actually sure if this ends up being an issue in practice, but it's
+    # simple enough to guard against.
+    inlined_re = re.compile(r"(^\s*\d+): [^:]+:(\s*\d+)\s*$")
+    result = []
+    for s in samples:
+        m = inlined_re.match(s)
+        if m:
+            result.append("%s: __REDACTED__:%s" % m.groups())
+        else:
+            result.append(s)
+    return tuple(result)
 
 
 def _read_textual_afdo_profile(stream):
-  """Parses an AFDO profile from a line stream into ProfileRecords."""
-  # ProfileRecords are actually nested, due to inlining. For the purpose of
-  # this script, that doesn't matter.
-  lines = (line.rstrip() for line in stream)
-  function_line = None
-  samples = []
-  for line in lines:
-    if not line:
-      continue
-
-    if line[0].isspace():
-      assert function_line is not None, 'sample exists outside of a function?'
-      samples.append(line)
-      continue
-
-    if function_line is not None:
-      yield ProfileRecord(function_line=function_line, samples=tuple(samples))
-    function_line = line
+    """Parses an AFDO profile from a line stream into ProfileRecords."""
+    # ProfileRecords are actually nested, due to inlining. For the purpose of
+    # this script, that doesn't matter.
+    lines = (line.rstrip() for line in stream)
+    function_line = None
     samples = []
+    for line in lines:
+        if not line:
+            continue
+
+        if line[0].isspace():
+            assert (
+                function_line is not None
+            ), "sample exists outside of a function?"
+            samples.append(line)
+            continue
+
+        if function_line is not None:
+            yield ProfileRecord(
+                function_line=function_line, samples=tuple(samples)
+            )
+        function_line = line
+        samples = []
 
-  if function_line is not None:
-    yield ProfileRecord(function_line=function_line, samples=tuple(samples))
+    if function_line is not None:
+        yield ProfileRecord(function_line=function_line, samples=tuple(samples))
 
 
 # The default of 100 is arbitrarily selected, but it does make the overwhelming
@@ -157,86 +161,96 @@ def _read_textual_afdo_profile(stream):
 # Non-nm based approaches are superior because they don't require any prior
 # build artifacts; just an AFDO profile.
 def dedup_records(profile_records, summary_file, max_repeats=100):
-  """Removes heavily duplicated records from profile_records.
-
-  profile_records is expected to be an iterable of ProfileRecord.
-  max_repeats ia how many functions must share identical bodies for us to
-    consider it 'heavily duplicated' and remove the results.
-  """
-
-  # Build a mapping of function structure -> list of functions with identical
-  # structure and sample counts
-  counts = collections.defaultdict(list)
-  for record in profile_records:
-    counts[_normalize_samples(record.samples)].append(record)
-
-  # Be sure that we didn't see any duplicate functions, since that's bad...
-  total_functions_recorded = sum(len(records) for records in counts.values())
-
-  unique_function_names = {
-      record.function_line.split(':')[0]
-      for records in counts.values()
-      for record in records
-  }
-
-  assert len(unique_function_names) == total_functions_recorded, \
-      'duplicate function names?'
-
-  num_kept = 0
-  num_samples_kept = 0
-  num_top_samples_kept = 0
-  num_total = 0
-  num_samples_total = 0
-  num_top_samples_total = 0
-
-  for normalized_samples, records in counts.items():
-    top_sample_count, all_sample_count = _count_samples(normalized_samples)
-    top_sample_count *= len(records)
-    all_sample_count *= len(records)
-
-    num_total += len(records)
-    num_samples_total += all_sample_count
-    num_top_samples_total += top_sample_count
-
-    if len(records) >= max_repeats:
-      continue
-
-    num_kept += len(records)
-    num_samples_kept += all_sample_count
-    num_top_samples_kept += top_sample_count
-    for record in records:
-      yield record
-
-  print(
-      'Retained {:,}/{:,} functions'.format(num_kept, num_total),
-      file=summary_file)
-  print(
-      'Retained {:,}/{:,} samples, total'.format(num_samples_kept,
-                                                 num_samples_total),
-      file=summary_file)
-  print('Retained {:,}/{:,} top-level samples' \
-            .format(num_top_samples_kept, num_top_samples_total),
-        file=summary_file)
+    """Removes heavily duplicated records from profile_records.
+
+    profile_records is expected to be an iterable of ProfileRecord.
+    max_repeats ia how many functions must share identical bodies for us to
+      consider it 'heavily duplicated' and remove the results.
+    """
+
+    # Build a mapping of function structure -> list of functions with identical
+    # structure and sample counts
+    counts = collections.defaultdict(list)
+    for record in profile_records:
+        counts[_normalize_samples(record.samples)].append(record)
+
+    # Be sure that we didn't see any duplicate functions, since that's bad...
+    total_functions_recorded = sum(len(records) for records in counts.values())
+
+    unique_function_names = {
+        record.function_line.split(":")[0]
+        for records in counts.values()
+        for record in records
+    }
+
+    assert (
+        len(unique_function_names) == total_functions_recorded
+    ), "duplicate function names?"
+
+    num_kept = 0
+    num_samples_kept = 0
+    num_top_samples_kept = 0
+    num_total = 0
+    num_samples_total = 0
+    num_top_samples_total = 0
+
+    for normalized_samples, records in counts.items():
+        top_sample_count, all_sample_count = _count_samples(normalized_samples)
+        top_sample_count *= len(records)
+        all_sample_count *= len(records)
+
+        num_total += len(records)
+        num_samples_total += all_sample_count
+        num_top_samples_total += top_sample_count
+
+        if len(records) >= max_repeats:
+            continue
+
+        num_kept += len(records)
+        num_samples_kept += all_sample_count
+        num_top_samples_kept += top_sample_count
+        for record in records:
+            yield record
+
+    print(
+        "Retained {:,}/{:,} functions".format(num_kept, num_total),
+        file=summary_file,
+    )
+    print(
+        "Retained {:,}/{:,} samples, total".format(
+            num_samples_kept, num_samples_total
+        ),
+        file=summary_file,
+    )
+    print(
+        "Retained {:,}/{:,} top-level samples".format(
+            num_top_samples_kept, num_top_samples_total
+        ),
+        file=summary_file,
+    )
 
 
 def run(profile_input_file, summary_output_file, profile_output_file):
-  profile_records = _read_textual_afdo_profile(profile_input_file)
+    profile_records = _read_textual_afdo_profile(profile_input_file)
 
-  # Sort this so we get deterministic output. AFDO doesn't care what order it's
-  # in.
-  deduped = sorted(
-      dedup_records(profile_records, summary_output_file),
-      key=lambda r: r.function_line)
-  for function_line, samples in deduped:
-    print(function_line, file=profile_output_file)
-    print('\n'.join(samples), file=profile_output_file)
+    # Sort this so we get deterministic output. AFDO doesn't care what order it's
+    # in.
+    deduped = sorted(
+        dedup_records(profile_records, summary_output_file),
+        key=lambda r: r.function_line,
+    )
+    for function_line, samples in deduped:
+        print(function_line, file=profile_output_file)
+        print("\n".join(samples), file=profile_output_file)
 
 
 def _main():
-  run(profile_input_file=sys.stdin,
-      summary_output_file=sys.stderr,
-      profile_output_file=sys.stdout)
+    run(
+        profile_input_file=sys.stdin,
+        summary_output_file=sys.stderr,
+        profile_output_file=sys.stdout,
+    )
 
 
-if __name__ == '__main__':
-  _main()
+if __name__ == "__main__":
+    _main()