diff options
author | Evelina Dumitrescu <evelinad@google.com> | 2016-08-19 21:48:16 -0700 |
---|---|---|
committer | chrome-bot <chrome-bot@chromium.org> | 2017-02-01 16:41:50 -0800 |
commit | 33d11c87bad5dc998d48bb2feb81b7e2918528e2 (patch) | |
tree | e22b28dfd5874cbe159fabdf98fc12bd65f70017 | |
parent | 3594db8be24f785b80781bfa43bacb4186cf8b30 (diff) | |
download | toolchain-utils-33d11c87bad5dc998d48bb2feb81b7e2918528e2.tar.gz |
user_activity_benchmarks: Added tool for processing pprof and CWP data.
BUG=None
TEST=process_hot_functions_unitttest passes
Change-Id: I9956b34e9d1ca902a23be86dd8e21208462f0117
Reviewed-on: https://chrome-internal-review.googlesource.com/278697
Reviewed-by: Luis Lozano <llozano@chromium.org>
Tested-by: Evelina Dumitrescu <evelinad@google.com>
Reviewed-on: https://chromium-review.googlesource.com/435454
Commit-Ready: Luis Lozano <llozano@chromium.org>
Tested-by: Luis Lozano <llozano@chromium.org>
-rwxr-xr-x | user_activity_benchmarks/process_hot_functions.py | 245 |
1 files changed, 245 insertions, 0 deletions
diff --git a/user_activity_benchmarks/process_hot_functions.py b/user_activity_benchmarks/process_hot_functions.py new file mode 100755 index 00000000..c6084322 --- /dev/null +++ b/user_activity_benchmarks/process_hot_functions.py @@ -0,0 +1,245 @@ +#!/usr/bin/python2 + +# Copyright 2016 The Chromium OS Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +"""Processes the functions from the pprof(go/pprof) files and CWP(go/cwp) data. + +The pprof output files should have the format given by the output of the +pprof --top command. A line containing a statistic should include the flat, +flat%, sum%, cum, cum%, function name and file name, separated by a space. + +The CWP hot functions should be specified in a CSV file that should contain the +fields for the function name, the file and the object where that function is +declared and the inclusive count value. + +For each pprof output file, the tool will output a file that contains the hot +functions present also in the CWP hot functions file. Afterwards, it extracts +the functions that are present in the CWP functions file and not in the +pprof output files. +""" + +import argparse +import csv +import os +import re +import sys + + +class HotFunctionsProcessor(object): + """Does the pprof and CWP output processing. + + Extracts the common and extra functions from the pprof output files, based on + the provided CWP functions. + """ + + # Constants used to identify if a function is common in the pprof and CWP + # files. + COMMON_FUNCTION = 1 + NOT_COMMON_FUNCTION = 0 + + def __init__(self, pprof_path, cwp_functions_file, common_functions_path, + extra_cwp_functions_file): + """Initializes the HotFunctionsProcessor. + + Args: + pprof_path: The directory containing the pprof output files. + cwp_functions_file: The file containing the CWP data. + common_functions_path: The directory where the files with the CWP and + pprof common functions should be stored. + extra_cwp_functions_file: The file where should be stored the CWP + functions that are not in the given pprof output files. + """ + self._pprof_path = pprof_path + self._cwp_functions_file = cwp_functions_file + self._common_functions_path = common_functions_path + self._extra_cwp_functions_file = extra_cwp_functions_file + + def ProcessHotFunctions(self): + """Does the processing of the hot functions.""" + cwp_statistics = \ + self.ExtractCommonFunctions(self._pprof_path, + self._common_functions_path, + self._cwp_functions_file) + + self.ExtractExtraFunctions(cwp_statistics, self._extra_cwp_functions_file) + + def ParseCWPStatistics(self, cwp_statistics_file_name): + """Parses the contents of the file containing the CWP data. + + A line contains the name of the function, the corresponding filenames, the + object files and their inclusive count values in CSV format. + + Args: + cwp_statistics_file_name: The name of the file containing the CWP data + in CSV format. + + Returns: + A dict containing the CWP statistics. The key contains the name of the + functions with the file name comma separated. The value represents a + tuple with the statistics and a marker to identify if the function is + present in one of the pprof files. + """ + cwp_statistics = {} + + with open(cwp_statistics_file_name) as cwp_statistics_file: + statistics_reader = csv.DictReader(cwp_statistics_file, delimiter=',') + + for statistic in statistics_reader: + function_name = statistic['function'] + file_name = statistic['file'] + dso_name = statistic['dso'] + inclusive_count = statistic['inclusive_count'] + + # We ignore the lines that have empty fields(i.e they specify only the + # addresses of the functions and the inclusive counts values). + if all([function_name, file_name, dso_name, inclusive_count]): + key = '%s,%s' % (function_name, file_name) + value = \ + ('%s,%s' % (dso_name, inclusive_count), self.NOT_COMMON_FUNCTION) + # All the functions are marked as NOT_COMMON_FUNCTION. + cwp_statistics[key] = value + + return cwp_statistics + + def ExtractCommonFunctions(self, pprof_path, common_functions_path, + cwp_functions_file): + """Extracts the common functions of the pprof files and the CWP file. + + For each pprof file, it creates a separate file with the same name + containing the common functions, that will be placed in the + common_functions_path directory. + + The resulting file is CSV format, containing the following fields: + function name, file name, object, inclusive count, flat, flat%, sum%, cum, + cum%. + + It builds a dict of the CWP statistics and if a function is common, it is + marked as a COMMON_FUNCTION. + + Args: + pprof_path: The directory with the pprof files. + common_functions_path: The directory with the common functions files. + cwp_functions_file: The file with the CWP data. + + Returns: + A dict containing the CWP statistics with the common functions marked as + COMMON_FUNCTION. + """ + # Get the list of pprof files from the given path. + pprof_files = os.listdir(pprof_path) + cwp_statistics = self.ParseCWPStatistics(cwp_functions_file) + function_statistic_regex = re.compile(r'\S+\s+\S+%\s+\S+%\s+\S+\s+\S+%') + function_regex = re.compile(r'[a-zA-Z0-9-/_:.~\[\]]+[ a-zA-Z0-9-/_~:.]*') + + for pprof_file in pprof_files: + # In the pprof output, the statistics of the functions start from the + # 8th line. + with open(os.path.join(pprof_path, pprof_file), 'r') as input_file: + pprof_statistics = input_file.readlines()[6:] + output_lines = \ + ['function,file,dso,inclusive_count,flat,flat%,sum%,cum,cum%'] + + for pprof_statistic in pprof_statistics: + function_statistic_match = \ + function_statistic_regex.search(pprof_statistic) + function_statistic = \ + ','.join(function_statistic_match.group(0).split()) + function_match = function_regex.search(pprof_statistic[ + function_statistic_match.end():]) + function = ','.join(function_match.group(0).split()) + + if function in cwp_statistics: + cwp_statistic = cwp_statistics[function] + output_lines.append(','.join([function, cwp_statistic[0], + function_statistic])) + cwp_statistics[function] = (cwp_statistic[0], self.COMMON_FUNCTION) + + with open(os.path.join(common_functions_path, pprof_file), 'w') \ + as output_file: + output_file.write('\n'.join(output_lines)) + + return cwp_statistics + + def ExtractExtraFunctions(self, cwp_statistics, extra_cwp_functions_file): + """Gets the functions that are in the CWP file, but not in the pprof output. + + Writes the functions and their statistics in the extra_cwp_functions_file + file. The file is in CSV format, containing the fields: function name, + file name, object name, inclusive count. + + Args: + cwp_statistics: A dict containing the CWP statistics. + extra_cwp_functions_file: The file where should be stored the CWP + functions and statistics that are marked as NOT_COMMON_FUNCTIONS. + """ + output_lines = ['function,file,dso,inclusive_count'] + + for function, statistics in cwp_statistics.iteritems(): + if statistics[1] == self.NOT_COMMON_FUNCTION: + output_lines.append(function + ',' + statistics[0]) + + with open(extra_cwp_functions_file, 'w') as output_file: + output_file.write('\n'.join(output_lines)) + + +def ParseArguments(arguments): + parser = argparse.ArgumentParser() + + parser.add_argument( + '-p', + '--pprof_path', + dest='pprof_path', + required=True, + help='The directory containing the pprof output files.') + parser.add_argument( + '-w', + '--cwp_hot_functions_file', + dest='cwp_hot_functions_file', + required=True, + help='The CSV file containing the CWP hot functions. The ' + 'file should include the name of the functions, the ' + 'file names with the definition, the object file ' + 'and the CWP inclusive count values, comma ' + 'separated.') + parser.add_argument( + '-c', + '--common_functions_path', + dest='common_functions_path', + required=True, + help='The directory containing the files with the pprof ' + 'and CWP common functions. A file will contain all ' + 'the hot functions from a pprof output file that ' + 'are also included in the CWP hot functions file. ' + 'The files with the common functions will have the ' + 'same names with the corresponding pprof output ' + 'files.') + parser.add_argument( + '-e', + '--extra_cwp_functions_file', + dest='extra_cwp_functions_file', + required=True, + help='The file that will contain the CWP hot functions ' + 'that are not in any of the pprof output files. ' + 'The file should include the name of the functions, ' + 'the file names with the definition, the object ' + 'file and the CWP inclusive count values, comma ' + 'separated.') + + options = parser.parse_args(arguments) + + return options + + +def Main(argv): + options = ParseArguments(argv) + + hot_functions_processor = HotFunctionsProcessor(options.pprof_path, \ + options.cwp_hot_functions_file, options.common_functions_path, \ + options.extra_cwp_functions_file) + + hot_functions_processor.ProcessHotFunctions() + + +if __name__ == '__main__': + Main(sys.argv[1:]) |