diff options
Diffstat (limited to 'user_activity_benchmarks/process_hot_functions.py')
-rwxr-xr-x | user_activity_benchmarks/process_hot_functions.py | 482 |
1 files changed, 482 insertions, 0 deletions
diff --git a/user_activity_benchmarks/process_hot_functions.py b/user_activity_benchmarks/process_hot_functions.py new file mode 100755 index 00000000..2fbf3f93 --- /dev/null +++ b/user_activity_benchmarks/process_hot_functions.py @@ -0,0 +1,482 @@ +#!/usr/bin/python2 + +# Copyright 2016 The Chromium OS Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +"""Processes the functions from the pprof(go/pprof) files and CWP(go/cwp) data. + +The pprof --top and pprof --tree outputs should be extracted from the benchmark +profiles. The outputs contain the hot functions and the call chains. + +For each pair of pprof --top and --tree output files, the tool will create a +file that contains the hot functions present also in the extracted CWP data. +The common functions are organized in groups that represent a Chrome OS +component. A function belongs to a group that is defined by a given file path +if it is declared in a file that shares that path. + +A set of metrics are computed for each function, benchmark and Chrome OS group +covered by a benchmark. + +Afterwards, this script extracts the functions that are present in the CWP +data and not in the benchmark profiles. The extra functions are also groupped +in Chrome OS components. +""" + +from collections import defaultdict + +import argparse +import os +import shutil +import sys + +import benchmark_metrics +import utils + + +class HotFunctionsProcessor(object): + """Does the pprof and CWP output processing. + + Extracts the common, extra functions from the pprof files, groups them in + Chrome OS components. Computes the metrics for the common functions, + benchmark and Chrome OS groups covered by a benchmark. + """ + + def __init__(self, pprof_top_path, pprof_tree_path, cwp_inclusive_count_file, + cwp_pairwise_inclusive_count_file, cwp_function_groups_file, + common_functions_path, common_functions_groups_path, + benchmark_set_metrics_file, extra_cwp_functions_file, + extra_cwp_functions_groups_file, + extra_cwp_functions_groups_path): + """Initializes the HotFunctionsProcessor. + + Args: + pprof_top_path: The directory containing the files with the pprof --top + output. + pprof_tree_path: The directory containing the files with the pprof --tree + output. + cwp_inclusive_count_file: The CSV file containing the CWP functions with + the inclusive count values. + cwp_pairwise_inclusive_count_file: The CSV file containing the CWP pairs + of parent and child functions with their inclusive count values. + cwp_function_groups_file: The file that contains the CWP function groups. + common_functions_path: The directory containing the CSV output files + with the common functions of the benchmark profiles and CWP data. + common_functions_groups_path: The directory containing the CSV output + files with the CWP groups and their metrics that match the common + functions of the benchmark profiles and CWP. + benchmark_set_metrics_file: The CSV output file containing the metrics for + each benchmark. + extra_cwp_functions_file: The CSV output file containing the functions + that are in the CWP data, but are not in any of the benchmark profiles. + extra_cwp_functions_groups_file: The CSV output file containing the groups + that match the extra CWP functions and their statistics. + extra_cwp_functions_groups_path: The directory containing the CSV output + files with the extra CWP functions that match a particular group. + """ + self._pprof_top_path = pprof_top_path + self._pprof_tree_path = pprof_tree_path + self._cwp_inclusive_count_file = cwp_inclusive_count_file + self._cwp_pairwise_inclusive_count_file = cwp_pairwise_inclusive_count_file + self._cwp_function_groups_file = cwp_function_groups_file + self._common_functions_path = common_functions_path + self._common_functions_groups_path = common_functions_groups_path + self._benchmark_set_metrics_file = benchmark_set_metrics_file + self._extra_cwp_functions_file = extra_cwp_functions_file + self._extra_cwp_functions_groups_file = extra_cwp_functions_groups_file + self._extra_cwp_functions_groups_path = extra_cwp_functions_groups_path + + def ProcessHotFunctions(self): + """Does the processing of the hot functions.""" + with open(self._cwp_function_groups_file) as input_file: + cwp_function_groups = utils.ParseFunctionGroups(input_file.readlines()) + cwp_statistics = \ + self.ExtractCommonFunctions(self._pprof_top_path, + self._pprof_tree_path, + self._cwp_inclusive_count_file, + self._cwp_pairwise_inclusive_count_file, + cwp_function_groups, + self._common_functions_path, + self._common_functions_groups_path, + self._benchmark_set_metrics_file) + self.ExtractExtraFunctions(cwp_statistics, self._extra_cwp_functions_file) + self.GroupExtraFunctions(cwp_statistics, cwp_function_groups, + self._extra_cwp_functions_groups_path, + self._extra_cwp_functions_groups_file) + + def ExtractCommonFunctions(self, pprof_top_path, pprof_tree_path, + cwp_inclusive_count_file, + cwp_pairwise_inclusive_count_file, + cwp_function_groups, common_functions_path, + common_functions_groups_path, + benchmark_set_metrics_file): + """Extracts the common functions of the benchmark profiles and the CWP data. + + For each pair of pprof --top and --tree output files, it creates a separate + file with the same name containing the common functions specifications and + metrics, that will be placed in the common_functions_path directory. + + The resulting file is in CSV format, containing the following fields: + function name, file name, object, inclusive count, inclusive_count_fraction, + flat, flat%, sum%, cum, cum%, distance and score. + + For each pair of pprof files, an additional file is created with the + Chrome OS groups that match the common functions. + + The file is in CSV format containing the fields: group name, group path, + the number of functions that match the group, the average and cumulative + distance, the average and cumulative score. + The file has the same name with the pprof file and it is placed in the + common_functions_groups_path directory. + + For all the analyzed benchmarks, the method creates a CSV output file + containing the metrics for each benchmark. The CSV fields include the + benchmark name, the number of common functions, the average and + cumulative distance and score. + + It builds a dict of the CWP statistics by calling the + utils.ParseCWPInclusiveCountFile method and if a function is common, it is + marked as a COMMON_FUNCTION. + + Args: + pprof_top_path: The name of the directory with the files with the + pprof --top output. + pprof_tree_path: The name of the directory with the files with the + pprof --tree output. + cwp_inclusive_count_file: A dict with the inclusive count values. + cwp_pairwise_inclusive_count_file: A dict with the pairwise inclusive + count values. + cwp_function_groups: A list of tuples containing the name of the group + and the corresponding file path. + common_functions_path: The path containing the output files with the + common functions and their metrics. + common_functions_groups_path: The path containing the output files with + the Chrome OS groups that match the common functions and their metrics. + benchmark_set_metrics_file: The CSV output file containing the metrics for + all the analyzed benchmarks. + + Returns: + A dict containing the CWP statistics with the common functions marked as + COMMON_FUNCTION. + """ + cwp_inclusive_count_statistics = \ + utils.ParseCWPInclusiveCountFile(cwp_inclusive_count_file) + cwp_pairwise_inclusive_count_statistics = \ + utils.ParseCWPPairwiseInclusiveCountFile( + cwp_pairwise_inclusive_count_file) + cwp_inclusive_count_statistics_cumulative = \ + utils.ComputeCWPCummulativeInclusiveStatistics( + cwp_inclusive_count_statistics) + cwp_pairwise_inclusive_count_fractions = \ + utils.ComputeCWPChildFunctionsFractions( + cwp_inclusive_count_statistics_cumulative, + cwp_pairwise_inclusive_count_statistics) + benchmark_set_metrics = {} + pprof_files = os.listdir(pprof_top_path) + + for pprof_file in pprof_files: + pprof_top_statistics = \ + utils.ParsePprofTopOutput(os.path.join(pprof_top_path, pprof_file)) + pprof_tree_statistics = \ + utils.ParsePprofTreeOutput(os.path.join(pprof_tree_path, pprof_file)) + common_functions_lines = [] + benchmark_function_metrics = {} + + for function_key, function_statistic in pprof_top_statistics.iteritems(): + if function_key not in cwp_inclusive_count_statistics: + continue + + cwp_dso_name, cwp_inclusive_count, cwp_inclusive_count_fraction, _ = \ + cwp_inclusive_count_statistics[function_key] + cwp_inclusive_count_statistics[function_key] = \ + (cwp_dso_name, cwp_inclusive_count, cwp_inclusive_count_fraction, + utils.COMMON_FUNCTION) + + function_name, _ = function_key.split(',') + distance = benchmark_metrics.ComputeDistanceForFunction( + pprof_tree_statistics[function_key], + cwp_pairwise_inclusive_count_fractions.get(function_name, {})) + benchmark_cum_p = float(function_statistic[4]) + score = benchmark_metrics.ComputeScoreForFunction( + distance, cwp_inclusive_count_fraction, benchmark_cum_p) + benchmark_function_metrics[function_key] = (distance, score) + + common_functions_lines.append(','.join([function_key, cwp_dso_name, str( + cwp_inclusive_count), str(cwp_inclusive_count_fraction), ','.join( + function_statistic), str(distance), str(score)])) + benchmark_function_groups_statistics = \ + benchmark_metrics.ComputeMetricsForComponents( + cwp_function_groups, benchmark_function_metrics) + benchmark_set_metrics[pprof_file] = \ + benchmark_metrics.ComputeMetricsForBenchmark( + benchmark_function_metrics) + + with open(os.path.join(common_functions_path, pprof_file), 'w') \ + as output_file: + common_functions_lines.sort( + key=lambda x: float(x.split(',')[11]), reverse=True) + common_functions_lines.insert(0, 'function,file,dso,inclusive_count,' + 'inclusive_count_fraction,flat,flat%,' + 'sum%,cum,cum%,distance,score') + output_file.write('\n'.join(common_functions_lines)) + + with open(os.path.join(common_functions_groups_path, pprof_file), 'w') \ + as output_file: + common_functions_groups_lines = \ + [','.join([group_name, ','.join( + [str(statistic) for statistic in group_statistic])]) + for group_name, group_statistic in + benchmark_function_groups_statistics.iteritems()] + common_functions_groups_lines.sort( + key=lambda x: float(x.split(',')[5]), reverse=True) + common_functions_groups_lines.insert( + 0, 'group_name,file_path,number_of_functions,distance_cum,' + 'distance_avg,score_cum,score_avg') + output_file.write('\n'.join(common_functions_groups_lines)) + + with open(benchmark_set_metrics_file, 'w') as output_file: + benchmark_set_metrics_lines = [] + + for benchmark_name, metrics in benchmark_set_metrics.iteritems(): + benchmark_set_metrics_lines.append(','.join([benchmark_name, ','.join( + [str(metric) for metric in metrics])])) + benchmark_set_metrics_lines.sort( + key=lambda x: float(x.split(',')[4]), reverse=True) + benchmark_set_metrics_lines.insert( + 0, 'benchmark_name,number_of_functions,distance_cum,distance_avg,' + 'score_cum,score_avg') + output_file.write('\n'.join(benchmark_set_metrics_lines)) + + return cwp_inclusive_count_statistics + + def GroupExtraFunctions(self, cwp_statistics, cwp_function_groups, + extra_cwp_functions_groups_path, + extra_cwp_functions_groups_file): + """Groups the extra functions. + + Writes the data of the functions that belong to each group in a separate + file, sorted by their inclusive count value, in descending order. The file + name is the same as the group name. + + The file is in CSV format, containing the fields: function name, file name, + object name, inclusive count, inclusive count fraction. + + It creates a CSV file containing the name of the group, their + common path, the total inclusive count and inclusive count fraction values + of all the functions declared in files that share the common path, sorted + in descending order by the inclusive count value. + + Args: + cwp_statistics: A dict containing the CWP statistics. + cwp_function_groups: A list of tuples with the groups names and the path + describing the groups. + extra_cwp_functions_groups_path: The name of the directory containing + the CSV output files with the extra CWP functions that match a + particular group. + extra_cwp_functions_groups_file: The CSV output file containing the groups + that match the extra functions and their statistics. + """ + cwp_function_groups_statistics = defaultdict(lambda: ([], '', 0, 0.0)) + for function, statistics in cwp_statistics.iteritems(): + if statistics[3] == utils.COMMON_FUNCTION: + continue + + file_name = function.split(',')[1] + group_inclusive_count = int(statistics[1]) + group_inclusive_count_fraction = float(statistics[2]) + + for group in cwp_function_groups: + group_common_path = group[1] + + if group_common_path not in file_name: + continue + + group_name = group[0] + group_statistics = cwp_function_groups_statistics[group_name] + group_lines = group_statistics[0] + group_inclusive_count += group_statistics[2] + group_inclusive_count_fraction += group_statistics[3] + + group_lines.append(','.join([function, statistics[0], + str(statistics[1]), str(statistics[2])])) + cwp_function_groups_statistics[group_name] = \ + (group_lines, group_common_path, group_inclusive_count, + group_inclusive_count_fraction) + break + + extra_cwp_functions_groups_lines = [] + for group_name, group_statistics \ + in cwp_function_groups_statistics.iteritems(): + group_output_lines = group_statistics[0] + group_output_lines.sort(key=lambda x: int(x.split(',')[3]), reverse=True) + group_output_lines.insert( + 0, 'function,file,dso,inclusive_count,inclusive_count_fraction') + with open(os.path.join(extra_cwp_functions_groups_path, group_name), + 'w') as output_file: + output_file.write('\n'.join(group_output_lines)) + extra_cwp_functions_groups_lines.append(','.join( + [group_name, group_statistics[1], str(group_statistics[2]), str( + group_statistics[3])])) + + extra_cwp_functions_groups_lines.sort( + key=lambda x: int(x.split(',')[2]), reverse=True) + extra_cwp_functions_groups_lines.insert( + 0, 'group,shared_path,inclusive_count,inclusive_count_fraction') + with open(extra_cwp_functions_groups_file, 'w') as output_file: + output_file.write('\n'.join(extra_cwp_functions_groups_lines)) + + def ExtractExtraFunctions(self, cwp_statistics, extra_cwp_functions_file): + """Gets the functions that are in the CWP data, but not in the pprof output. + + Writes the functions and their statistics in the extra_cwp_functions_file + file. The output is sorted based on the inclusive_count value. The file is + in CSV format, containing the fields: function name, file name, object name, + inclusive count and inclusive count fraction. + + Args: + cwp_statistics: A dict containing the CWP statistics indexed by the + function and the file name, comma separated. + extra_cwp_functions_file: The file where it should be stored the CWP + functions and statistics that are marked as EXTRA_FUNCTION. + """ + output_lines = [] + + for function, statistics in cwp_statistics.iteritems(): + if statistics[3] == utils.EXTRA_FUNCTION: + output_lines.append(','.join([function, statistics[0], + str(statistics[1]), str(statistics[2])])) + + with open(extra_cwp_functions_file, 'w') as output_file: + output_lines.sort(key=lambda x: int(x.split(',')[3]), reverse=True) + output_lines.insert(0, 'function,file,dso,inclusive_count,' + 'inclusive_count_fraction') + output_file.write('\n'.join(output_lines)) + + +def ParseArguments(arguments): + parser = argparse.ArgumentParser() + + parser.add_argument( + '--pprof_top_path', + required=True, + help='The directory containing the files with the pprof --top output of ' + 'the benchmark profiles (the hot functions). The name of the files ' + 'should match with the ones from the pprof tree output files.') + parser.add_argument( + '--pprof_tree_path', + required=True, + help='The directory containing the files with the pprof --tree output ' + 'of the benchmark profiles (the call chains). The name of the files ' + 'should match with the ones of the pprof top output files.') + parser.add_argument( + '--cwp_inclusive_count_file', + required=True, + help='The CSV file containing the CWP hot functions with their ' + 'inclusive_count values. The CSV fields include the name of the ' + 'function, the file and the object with the definition, the inclusive ' + 'count value and the inclusive count fraction out of the total amount of ' + 'inclusive count values.') + parser.add_argument( + '--cwp_pairwise_inclusive_count_file', + required=True, + help='The CSV file containing the CWP pairs of parent and child ' + 'functions with their inclusive count values. The CSV fields include the ' + 'name of the parent and child functions concatenated by ;;, the file ' + 'and the object with the definition of the child function, and the ' + 'inclusive count value.') + parser.add_argument( + '--cwp_function_groups_file', + required=True, + help='The file that contains the CWP function groups. A line consists in ' + 'the group name and a file path describing the group. A group must ' + 'represent a ChromeOS component.') + parser.add_argument( + '--common_functions_path', + required=True, + help='The directory containing the CSV output files with the common ' + 'functions of the benchmark profiles and CWP data. A file will contain ' + 'all the hot functions from a pprof top output file that are also ' + 'included in the file containing the cwp inclusive count values. The CSV ' + 'fields are: the function name, the file and the object where the ' + 'function is declared, the CWP inclusive count and inclusive count ' + 'fraction values, the cumulative and average distance, the cumulative ' + 'and average score. The files with the common functions will have the ' + 'same names with the corresponding pprof output files.') + parser.add_argument( + '--common_functions_groups_path', + required=True, + help='The directory containing the CSV output files with the Chrome OS ' + 'groups and their metrics that match the common functions of the ' + 'benchmark profiles and CWP. The files with the groups will have the ' + 'same names with the corresponding pprof output files. The CSV fields ' + 'include the group name, group path, the number of functions that match ' + 'the group, the average and cumulative distance, the average and ' + 'cumulative score.') + parser.add_argument( + '--benchmark_set_metrics_file', + required=True, + help='The CSV output file containing the metrics for each benchmark. The ' + 'CSV fields include the benchmark name, the number of common functions, ' + 'the average and cumulative distance and score.') + parser.add_argument( + '--extra_cwp_functions_file', + required=True, + help='The CSV output file containing the functions that are in the CWP ' + 'data, but are not in any of the benchmark profiles. The CSV fields ' + 'include the name of the function, the file name and the object with the ' + 'definition, and the CWP inclusive count and inclusive count fraction ' + 'values. The entries are sorted in descending order based on the ' + 'inclusive count value.') + parser.add_argument( + '--extra_cwp_functions_groups_file', + required=True, + help='The CSV output file containing the groups that match the extra CWP ' + 'functions and their statistics. The CSV fields include the group name, ' + 'the file path, the total inclusive count and inclusive count fraction ' + 'values of the functions matching a particular group.') + parser.add_argument( + '--extra_cwp_functions_groups_path', + required=True, + help='The directory containing the CSV output files with the extra CWP ' + 'functions that match a particular group. The name of the file is the ' + 'same as the group name. The CSV fields include the name of the ' + 'function, the file name and the object with the definition, and the CWP ' + 'inclusive count and inclusive count fraction values. The entries are ' + 'sorted in descending order based on the inclusive count value.') + + options = parser.parse_args(arguments) + + return options + + +def Main(argv): + options = ParseArguments(argv) + + if os.path.exists(options.common_functions_path): + shutil.rmtree(options.common_functions_path) + + os.makedirs(options.common_functions_path) + + if os.path.exists(options.common_functions_groups_path): + shutil.rmtree(options.common_functions_groups_path) + + os.makedirs(options.common_functions_groups_path) + + if os.path.exists(options.extra_cwp_functions_groups_path): + shutil.rmtree(options.extra_cwp_functions_groups_path) + + os.makedirs(options.extra_cwp_functions_groups_path) + + hot_functions_processor = HotFunctionsProcessor( + options.pprof_top_path, options.pprof_tree_path, + options.cwp_inclusive_count_file, + options.cwp_pairwise_inclusive_count_file, + options.cwp_function_groups_file, options.common_functions_path, + options.common_functions_groups_path, options.benchmark_set_metrics_file, + options.extra_cwp_functions_file, options.extra_cwp_functions_groups_file, + options.extra_cwp_functions_groups_path) + + hot_functions_processor.ProcessHotFunctions() + + +if __name__ == '__main__': + Main(sys.argv[1:]) |