#!/usr/bin/env python3 # -*- coding: utf-8 -*- # Copyright 2018 The ChromiumOS Authors # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. """Script to redact apparent ICF'ed symbolsfrom textual AFDO profiles. AFDO sampling and ICF have an unfortunate interaction that causes a huge inflation in sample counts. Essentially, if you have N functions ICF'ed to the same location, one AFDO sample in any of those N functions will count as one sample in *each* of those N functions. In practice, there are a few forms of function bodies that are very heavily ICF'ed (e.g. `ret`, `xor %eax, %eax; ret`, destructors for widely-used types like std::map...). Recording 28,000 samples across all N thousand logical functions that point to the same body really hurts our AFDO numbers, given that our actual sample count across all of Chrome is something around 10,000,000. (No, really, these are actual numbers. In practice, at the time of writing, this script eliminates >90% of our AFDO samples by count. Sometimes as high as 98%.) It reads a textual AFDO profile from stdin, and prints a 'fixed' version of it to stdout. A summary of what the script actually did is printed to stderr. """ import collections import re import sys def _count_samples(samples): """Count the total number of samples in a function.""" line_re = re.compile(r"^(\s*)\d+(?:\.\d+)?: (\d+)\s*$") top_level_samples = 0 all_samples = 0 for line in samples: m = line_re.match(line) if not m: continue spaces, n = m.groups() n = int(n) all_samples += n if len(spaces) == 1: top_level_samples += n return top_level_samples, all_samples # A ProfileRecord is a set of samples for a top-level symbol in a textual AFDO # profile. function_line is the top line of said function, and `samples` is # a list of all of the sample lines below that. # # We rely on the format of these strings in some places in this script. For # reference, a full function sample will look something like: # # _ZNK5blink10PaintLayer19GetCompositingStateEv:4530:185 # 6: 83 # 15: 126 # 62832: 126 # 6: _ZNK5blink10PaintLayer14GroupedMappingEv:2349 # 1: 206 # 1: _ZNK5blink10PaintLayer14GroupedMappersEv:2060 # 1: 206 # 11: _ZNK5blink10PaintLayer25GetCompositedLayerMappingEv:800 # 2.1: 80 # # # In that case, function_line is # '_ZNK5blink10PaintLayer19GetCompositingStateEv:4530:185', and samples will be # every line below that. # # Function lines look like; # function_symbol:entry_count:dont_care # # And samples look like one of: # arbitrary_number: sample_count # arbitrary_number: inlined_function_symbol:inlined_entry_count ProfileRecord = collections.namedtuple( "ProfileRecord", ["function_line", "samples"] ) def _normalize_samples(samples): """Normalizes the samples in the given function body. Normalization just means that we redact inlined function names. This is done so that a bit of templating doesn't make two function bodies look distinct. Namely: template __attribute__((noinline)) int getNumber() { return 1; } template __attribute__((noinline)) int getNumberIndirectly() { return getNumber(); } int main() { return getNumber() + getNumber(); } If the profile has the mangled name for getNumber in getNumberIndirectly (and similar for ), we'll consider them to be distinct when they're not. """ # I'm not actually sure if this ends up being an issue in practice, but it's # simple enough to guard against. inlined_re = re.compile(r"(^\s*\d+): [^:]+:(\s*\d+)\s*$") result = [] for s in samples: m = inlined_re.match(s) if m: result.append("%s: __REDACTED__:%s" % m.groups()) else: result.append(s) return tuple(result) def _read_textual_afdo_profile(stream): """Parses an AFDO profile from a line stream into ProfileRecords.""" # ProfileRecords are actually nested, due to inlining. For the purpose of # this script, that doesn't matter. lines = (line.rstrip() for line in stream) function_line = None samples = [] for line in lines: if not line: continue if line[0].isspace(): assert ( function_line is not None ), "sample exists outside of a function?" samples.append(line) continue if function_line is not None: yield ProfileRecord( function_line=function_line, samples=tuple(samples) ) function_line = line samples = [] if function_line is not None: yield ProfileRecord(function_line=function_line, samples=tuple(samples)) # The default of 100 is arbitrarily selected, but it does make the overwhelming # majority of obvious sample duplication disappear. # # We experimented shortly with an nm-powered version of this script (rather # than structural matching, we'd see which functions mapped to the same literal # address). Running this with a high value (100) for max_repeats produced # results basically indistinguishable from nm, so ... # # Non-nm based approaches are superior because they don't require any prior # build artifacts; just an AFDO profile. def dedup_records(profile_records, summary_file, max_repeats=100): """Removes heavily duplicated records from profile_records. profile_records is expected to be an iterable of ProfileRecord. max_repeats ia how many functions must share identical bodies for us to consider it 'heavily duplicated' and remove the results. """ # Build a mapping of function structure -> list of functions with identical # structure and sample counts counts = collections.defaultdict(list) for record in profile_records: counts[_normalize_samples(record.samples)].append(record) # Be sure that we didn't see any duplicate functions, since that's bad... total_functions_recorded = sum(len(records) for records in counts.values()) unique_function_names = { record.function_line.split(":")[0] for records in counts.values() for record in records } assert ( len(unique_function_names) == total_functions_recorded ), "duplicate function names?" num_kept = 0 num_samples_kept = 0 num_top_samples_kept = 0 num_total = 0 num_samples_total = 0 num_top_samples_total = 0 for normalized_samples, records in counts.items(): top_sample_count, all_sample_count = _count_samples(normalized_samples) top_sample_count *= len(records) all_sample_count *= len(records) num_total += len(records) num_samples_total += all_sample_count num_top_samples_total += top_sample_count if len(records) >= max_repeats: continue num_kept += len(records) num_samples_kept += all_sample_count num_top_samples_kept += top_sample_count for record in records: yield record print( "Retained {:,}/{:,} functions".format(num_kept, num_total), file=summary_file, ) print( "Retained {:,}/{:,} samples, total".format( num_samples_kept, num_samples_total ), file=summary_file, ) print( "Retained {:,}/{:,} top-level samples".format( num_top_samples_kept, num_top_samples_total ), file=summary_file, ) def run(profile_input_file, summary_output_file, profile_output_file): profile_records = _read_textual_afdo_profile(profile_input_file) # Sort this so we get deterministic output. AFDO doesn't care what order it's # in. deduped = sorted( dedup_records(profile_records, summary_output_file), key=lambda r: r.function_line, ) for function_line, samples in deduped: print(function_line, file=profile_output_file) print("\n".join(samples), file=profile_output_file) def _main(): run( profile_input_file=sys.stdin, summary_output_file=sys.stderr, profile_output_file=sys.stdout, ) if __name__ == "__main__": _main()