diff options
Diffstat (limited to 'user_activity_benchmarks/process_hot_functions.py')
-rwxr-xr-x | user_activity_benchmarks/process_hot_functions.py | 482 |
1 files changed, 0 insertions, 482 deletions
diff --git a/user_activity_benchmarks/process_hot_functions.py b/user_activity_benchmarks/process_hot_functions.py deleted file mode 100755 index 2fbf3f93..00000000 --- a/user_activity_benchmarks/process_hot_functions.py +++ /dev/null @@ -1,482 +0,0 @@ -#!/usr/bin/python2 - -# Copyright 2016 The Chromium OS Authors. All rights reserved. -# Use of this source code is governed by a BSD-style license that can be -# found in the LICENSE file. -"""Processes the functions from the pprof(go/pprof) files and CWP(go/cwp) data. - -The pprof --top and pprof --tree outputs should be extracted from the benchmark -profiles. The outputs contain the hot functions and the call chains. - -For each pair of pprof --top and --tree output files, the tool will create a -file that contains the hot functions present also in the extracted CWP data. -The common functions are organized in groups that represent a Chrome OS -component. A function belongs to a group that is defined by a given file path -if it is declared in a file that shares that path. - -A set of metrics are computed for each function, benchmark and Chrome OS group -covered by a benchmark. - -Afterwards, this script extracts the functions that are present in the CWP -data and not in the benchmark profiles. The extra functions are also groupped -in Chrome OS components. -""" - -from collections import defaultdict - -import argparse -import os -import shutil -import sys - -import benchmark_metrics -import utils - - -class HotFunctionsProcessor(object): - """Does the pprof and CWP output processing. - - Extracts the common, extra functions from the pprof files, groups them in - Chrome OS components. Computes the metrics for the common functions, - benchmark and Chrome OS groups covered by a benchmark. - """ - - def __init__(self, pprof_top_path, pprof_tree_path, cwp_inclusive_count_file, - cwp_pairwise_inclusive_count_file, cwp_function_groups_file, - common_functions_path, common_functions_groups_path, - benchmark_set_metrics_file, extra_cwp_functions_file, - extra_cwp_functions_groups_file, - extra_cwp_functions_groups_path): - """Initializes the HotFunctionsProcessor. - - Args: - pprof_top_path: The directory containing the files with the pprof --top - output. - pprof_tree_path: The directory containing the files with the pprof --tree - output. - cwp_inclusive_count_file: The CSV file containing the CWP functions with - the inclusive count values. - cwp_pairwise_inclusive_count_file: The CSV file containing the CWP pairs - of parent and child functions with their inclusive count values. - cwp_function_groups_file: The file that contains the CWP function groups. - common_functions_path: The directory containing the CSV output files - with the common functions of the benchmark profiles and CWP data. - common_functions_groups_path: The directory containing the CSV output - files with the CWP groups and their metrics that match the common - functions of the benchmark profiles and CWP. - benchmark_set_metrics_file: The CSV output file containing the metrics for - each benchmark. - extra_cwp_functions_file: The CSV output file containing the functions - that are in the CWP data, but are not in any of the benchmark profiles. - extra_cwp_functions_groups_file: The CSV output file containing the groups - that match the extra CWP functions and their statistics. - extra_cwp_functions_groups_path: The directory containing the CSV output - files with the extra CWP functions that match a particular group. - """ - self._pprof_top_path = pprof_top_path - self._pprof_tree_path = pprof_tree_path - self._cwp_inclusive_count_file = cwp_inclusive_count_file - self._cwp_pairwise_inclusive_count_file = cwp_pairwise_inclusive_count_file - self._cwp_function_groups_file = cwp_function_groups_file - self._common_functions_path = common_functions_path - self._common_functions_groups_path = common_functions_groups_path - self._benchmark_set_metrics_file = benchmark_set_metrics_file - self._extra_cwp_functions_file = extra_cwp_functions_file - self._extra_cwp_functions_groups_file = extra_cwp_functions_groups_file - self._extra_cwp_functions_groups_path = extra_cwp_functions_groups_path - - def ProcessHotFunctions(self): - """Does the processing of the hot functions.""" - with open(self._cwp_function_groups_file) as input_file: - cwp_function_groups = utils.ParseFunctionGroups(input_file.readlines()) - cwp_statistics = \ - self.ExtractCommonFunctions(self._pprof_top_path, - self._pprof_tree_path, - self._cwp_inclusive_count_file, - self._cwp_pairwise_inclusive_count_file, - cwp_function_groups, - self._common_functions_path, - self._common_functions_groups_path, - self._benchmark_set_metrics_file) - self.ExtractExtraFunctions(cwp_statistics, self._extra_cwp_functions_file) - self.GroupExtraFunctions(cwp_statistics, cwp_function_groups, - self._extra_cwp_functions_groups_path, - self._extra_cwp_functions_groups_file) - - def ExtractCommonFunctions(self, pprof_top_path, pprof_tree_path, - cwp_inclusive_count_file, - cwp_pairwise_inclusive_count_file, - cwp_function_groups, common_functions_path, - common_functions_groups_path, - benchmark_set_metrics_file): - """Extracts the common functions of the benchmark profiles and the CWP data. - - For each pair of pprof --top and --tree output files, it creates a separate - file with the same name containing the common functions specifications and - metrics, that will be placed in the common_functions_path directory. - - The resulting file is in CSV format, containing the following fields: - function name, file name, object, inclusive count, inclusive_count_fraction, - flat, flat%, sum%, cum, cum%, distance and score. - - For each pair of pprof files, an additional file is created with the - Chrome OS groups that match the common functions. - - The file is in CSV format containing the fields: group name, group path, - the number of functions that match the group, the average and cumulative - distance, the average and cumulative score. - The file has the same name with the pprof file and it is placed in the - common_functions_groups_path directory. - - For all the analyzed benchmarks, the method creates a CSV output file - containing the metrics for each benchmark. The CSV fields include the - benchmark name, the number of common functions, the average and - cumulative distance and score. - - It builds a dict of the CWP statistics by calling the - utils.ParseCWPInclusiveCountFile method and if a function is common, it is - marked as a COMMON_FUNCTION. - - Args: - pprof_top_path: The name of the directory with the files with the - pprof --top output. - pprof_tree_path: The name of the directory with the files with the - pprof --tree output. - cwp_inclusive_count_file: A dict with the inclusive count values. - cwp_pairwise_inclusive_count_file: A dict with the pairwise inclusive - count values. - cwp_function_groups: A list of tuples containing the name of the group - and the corresponding file path. - common_functions_path: The path containing the output files with the - common functions and their metrics. - common_functions_groups_path: The path containing the output files with - the Chrome OS groups that match the common functions and their metrics. - benchmark_set_metrics_file: The CSV output file containing the metrics for - all the analyzed benchmarks. - - Returns: - A dict containing the CWP statistics with the common functions marked as - COMMON_FUNCTION. - """ - cwp_inclusive_count_statistics = \ - utils.ParseCWPInclusiveCountFile(cwp_inclusive_count_file) - cwp_pairwise_inclusive_count_statistics = \ - utils.ParseCWPPairwiseInclusiveCountFile( - cwp_pairwise_inclusive_count_file) - cwp_inclusive_count_statistics_cumulative = \ - utils.ComputeCWPCummulativeInclusiveStatistics( - cwp_inclusive_count_statistics) - cwp_pairwise_inclusive_count_fractions = \ - utils.ComputeCWPChildFunctionsFractions( - cwp_inclusive_count_statistics_cumulative, - cwp_pairwise_inclusive_count_statistics) - benchmark_set_metrics = {} - pprof_files = os.listdir(pprof_top_path) - - for pprof_file in pprof_files: - pprof_top_statistics = \ - utils.ParsePprofTopOutput(os.path.join(pprof_top_path, pprof_file)) - pprof_tree_statistics = \ - utils.ParsePprofTreeOutput(os.path.join(pprof_tree_path, pprof_file)) - common_functions_lines = [] - benchmark_function_metrics = {} - - for function_key, function_statistic in pprof_top_statistics.iteritems(): - if function_key not in cwp_inclusive_count_statistics: - continue - - cwp_dso_name, cwp_inclusive_count, cwp_inclusive_count_fraction, _ = \ - cwp_inclusive_count_statistics[function_key] - cwp_inclusive_count_statistics[function_key] = \ - (cwp_dso_name, cwp_inclusive_count, cwp_inclusive_count_fraction, - utils.COMMON_FUNCTION) - - function_name, _ = function_key.split(',') - distance = benchmark_metrics.ComputeDistanceForFunction( - pprof_tree_statistics[function_key], - cwp_pairwise_inclusive_count_fractions.get(function_name, {})) - benchmark_cum_p = float(function_statistic[4]) - score = benchmark_metrics.ComputeScoreForFunction( - distance, cwp_inclusive_count_fraction, benchmark_cum_p) - benchmark_function_metrics[function_key] = (distance, score) - - common_functions_lines.append(','.join([function_key, cwp_dso_name, str( - cwp_inclusive_count), str(cwp_inclusive_count_fraction), ','.join( - function_statistic), str(distance), str(score)])) - benchmark_function_groups_statistics = \ - benchmark_metrics.ComputeMetricsForComponents( - cwp_function_groups, benchmark_function_metrics) - benchmark_set_metrics[pprof_file] = \ - benchmark_metrics.ComputeMetricsForBenchmark( - benchmark_function_metrics) - - with open(os.path.join(common_functions_path, pprof_file), 'w') \ - as output_file: - common_functions_lines.sort( - key=lambda x: float(x.split(',')[11]), reverse=True) - common_functions_lines.insert(0, 'function,file,dso,inclusive_count,' - 'inclusive_count_fraction,flat,flat%,' - 'sum%,cum,cum%,distance,score') - output_file.write('\n'.join(common_functions_lines)) - - with open(os.path.join(common_functions_groups_path, pprof_file), 'w') \ - as output_file: - common_functions_groups_lines = \ - [','.join([group_name, ','.join( - [str(statistic) for statistic in group_statistic])]) - for group_name, group_statistic in - benchmark_function_groups_statistics.iteritems()] - common_functions_groups_lines.sort( - key=lambda x: float(x.split(',')[5]), reverse=True) - common_functions_groups_lines.insert( - 0, 'group_name,file_path,number_of_functions,distance_cum,' - 'distance_avg,score_cum,score_avg') - output_file.write('\n'.join(common_functions_groups_lines)) - - with open(benchmark_set_metrics_file, 'w') as output_file: - benchmark_set_metrics_lines = [] - - for benchmark_name, metrics in benchmark_set_metrics.iteritems(): - benchmark_set_metrics_lines.append(','.join([benchmark_name, ','.join( - [str(metric) for metric in metrics])])) - benchmark_set_metrics_lines.sort( - key=lambda x: float(x.split(',')[4]), reverse=True) - benchmark_set_metrics_lines.insert( - 0, 'benchmark_name,number_of_functions,distance_cum,distance_avg,' - 'score_cum,score_avg') - output_file.write('\n'.join(benchmark_set_metrics_lines)) - - return cwp_inclusive_count_statistics - - def GroupExtraFunctions(self, cwp_statistics, cwp_function_groups, - extra_cwp_functions_groups_path, - extra_cwp_functions_groups_file): - """Groups the extra functions. - - Writes the data of the functions that belong to each group in a separate - file, sorted by their inclusive count value, in descending order. The file - name is the same as the group name. - - The file is in CSV format, containing the fields: function name, file name, - object name, inclusive count, inclusive count fraction. - - It creates a CSV file containing the name of the group, their - common path, the total inclusive count and inclusive count fraction values - of all the functions declared in files that share the common path, sorted - in descending order by the inclusive count value. - - Args: - cwp_statistics: A dict containing the CWP statistics. - cwp_function_groups: A list of tuples with the groups names and the path - describing the groups. - extra_cwp_functions_groups_path: The name of the directory containing - the CSV output files with the extra CWP functions that match a - particular group. - extra_cwp_functions_groups_file: The CSV output file containing the groups - that match the extra functions and their statistics. - """ - cwp_function_groups_statistics = defaultdict(lambda: ([], '', 0, 0.0)) - for function, statistics in cwp_statistics.iteritems(): - if statistics[3] == utils.COMMON_FUNCTION: - continue - - file_name = function.split(',')[1] - group_inclusive_count = int(statistics[1]) - group_inclusive_count_fraction = float(statistics[2]) - - for group in cwp_function_groups: - group_common_path = group[1] - - if group_common_path not in file_name: - continue - - group_name = group[0] - group_statistics = cwp_function_groups_statistics[group_name] - group_lines = group_statistics[0] - group_inclusive_count += group_statistics[2] - group_inclusive_count_fraction += group_statistics[3] - - group_lines.append(','.join([function, statistics[0], - str(statistics[1]), str(statistics[2])])) - cwp_function_groups_statistics[group_name] = \ - (group_lines, group_common_path, group_inclusive_count, - group_inclusive_count_fraction) - break - - extra_cwp_functions_groups_lines = [] - for group_name, group_statistics \ - in cwp_function_groups_statistics.iteritems(): - group_output_lines = group_statistics[0] - group_output_lines.sort(key=lambda x: int(x.split(',')[3]), reverse=True) - group_output_lines.insert( - 0, 'function,file,dso,inclusive_count,inclusive_count_fraction') - with open(os.path.join(extra_cwp_functions_groups_path, group_name), - 'w') as output_file: - output_file.write('\n'.join(group_output_lines)) - extra_cwp_functions_groups_lines.append(','.join( - [group_name, group_statistics[1], str(group_statistics[2]), str( - group_statistics[3])])) - - extra_cwp_functions_groups_lines.sort( - key=lambda x: int(x.split(',')[2]), reverse=True) - extra_cwp_functions_groups_lines.insert( - 0, 'group,shared_path,inclusive_count,inclusive_count_fraction') - with open(extra_cwp_functions_groups_file, 'w') as output_file: - output_file.write('\n'.join(extra_cwp_functions_groups_lines)) - - def ExtractExtraFunctions(self, cwp_statistics, extra_cwp_functions_file): - """Gets the functions that are in the CWP data, but not in the pprof output. - - Writes the functions and their statistics in the extra_cwp_functions_file - file. The output is sorted based on the inclusive_count value. The file is - in CSV format, containing the fields: function name, file name, object name, - inclusive count and inclusive count fraction. - - Args: - cwp_statistics: A dict containing the CWP statistics indexed by the - function and the file name, comma separated. - extra_cwp_functions_file: The file where it should be stored the CWP - functions and statistics that are marked as EXTRA_FUNCTION. - """ - output_lines = [] - - for function, statistics in cwp_statistics.iteritems(): - if statistics[3] == utils.EXTRA_FUNCTION: - output_lines.append(','.join([function, statistics[0], - str(statistics[1]), str(statistics[2])])) - - with open(extra_cwp_functions_file, 'w') as output_file: - output_lines.sort(key=lambda x: int(x.split(',')[3]), reverse=True) - output_lines.insert(0, 'function,file,dso,inclusive_count,' - 'inclusive_count_fraction') - output_file.write('\n'.join(output_lines)) - - -def ParseArguments(arguments): - parser = argparse.ArgumentParser() - - parser.add_argument( - '--pprof_top_path', - required=True, - help='The directory containing the files with the pprof --top output of ' - 'the benchmark profiles (the hot functions). The name of the files ' - 'should match with the ones from the pprof tree output files.') - parser.add_argument( - '--pprof_tree_path', - required=True, - help='The directory containing the files with the pprof --tree output ' - 'of the benchmark profiles (the call chains). The name of the files ' - 'should match with the ones of the pprof top output files.') - parser.add_argument( - '--cwp_inclusive_count_file', - required=True, - help='The CSV file containing the CWP hot functions with their ' - 'inclusive_count values. The CSV fields include the name of the ' - 'function, the file and the object with the definition, the inclusive ' - 'count value and the inclusive count fraction out of the total amount of ' - 'inclusive count values.') - parser.add_argument( - '--cwp_pairwise_inclusive_count_file', - required=True, - help='The CSV file containing the CWP pairs of parent and child ' - 'functions with their inclusive count values. The CSV fields include the ' - 'name of the parent and child functions concatenated by ;;, the file ' - 'and the object with the definition of the child function, and the ' - 'inclusive count value.') - parser.add_argument( - '--cwp_function_groups_file', - required=True, - help='The file that contains the CWP function groups. A line consists in ' - 'the group name and a file path describing the group. A group must ' - 'represent a ChromeOS component.') - parser.add_argument( - '--common_functions_path', - required=True, - help='The directory containing the CSV output files with the common ' - 'functions of the benchmark profiles and CWP data. A file will contain ' - 'all the hot functions from a pprof top output file that are also ' - 'included in the file containing the cwp inclusive count values. The CSV ' - 'fields are: the function name, the file and the object where the ' - 'function is declared, the CWP inclusive count and inclusive count ' - 'fraction values, the cumulative and average distance, the cumulative ' - 'and average score. The files with the common functions will have the ' - 'same names with the corresponding pprof output files.') - parser.add_argument( - '--common_functions_groups_path', - required=True, - help='The directory containing the CSV output files with the Chrome OS ' - 'groups and their metrics that match the common functions of the ' - 'benchmark profiles and CWP. The files with the groups will have the ' - 'same names with the corresponding pprof output files. The CSV fields ' - 'include the group name, group path, the number of functions that match ' - 'the group, the average and cumulative distance, the average and ' - 'cumulative score.') - parser.add_argument( - '--benchmark_set_metrics_file', - required=True, - help='The CSV output file containing the metrics for each benchmark. The ' - 'CSV fields include the benchmark name, the number of common functions, ' - 'the average and cumulative distance and score.') - parser.add_argument( - '--extra_cwp_functions_file', - required=True, - help='The CSV output file containing the functions that are in the CWP ' - 'data, but are not in any of the benchmark profiles. The CSV fields ' - 'include the name of the function, the file name and the object with the ' - 'definition, and the CWP inclusive count and inclusive count fraction ' - 'values. The entries are sorted in descending order based on the ' - 'inclusive count value.') - parser.add_argument( - '--extra_cwp_functions_groups_file', - required=True, - help='The CSV output file containing the groups that match the extra CWP ' - 'functions and their statistics. The CSV fields include the group name, ' - 'the file path, the total inclusive count and inclusive count fraction ' - 'values of the functions matching a particular group.') - parser.add_argument( - '--extra_cwp_functions_groups_path', - required=True, - help='The directory containing the CSV output files with the extra CWP ' - 'functions that match a particular group. The name of the file is the ' - 'same as the group name. The CSV fields include the name of the ' - 'function, the file name and the object with the definition, and the CWP ' - 'inclusive count and inclusive count fraction values. The entries are ' - 'sorted in descending order based on the inclusive count value.') - - options = parser.parse_args(arguments) - - return options - - -def Main(argv): - options = ParseArguments(argv) - - if os.path.exists(options.common_functions_path): - shutil.rmtree(options.common_functions_path) - - os.makedirs(options.common_functions_path) - - if os.path.exists(options.common_functions_groups_path): - shutil.rmtree(options.common_functions_groups_path) - - os.makedirs(options.common_functions_groups_path) - - if os.path.exists(options.extra_cwp_functions_groups_path): - shutil.rmtree(options.extra_cwp_functions_groups_path) - - os.makedirs(options.extra_cwp_functions_groups_path) - - hot_functions_processor = HotFunctionsProcessor( - options.pprof_top_path, options.pprof_tree_path, - options.cwp_inclusive_count_file, - options.cwp_pairwise_inclusive_count_file, - options.cwp_function_groups_file, options.common_functions_path, - options.common_functions_groups_path, options.benchmark_set_metrics_file, - options.extra_cwp_functions_file, options.extra_cwp_functions_groups_file, - options.extra_cwp_functions_groups_path) - - hot_functions_processor.ProcessHotFunctions() - - -if __name__ == '__main__': - Main(sys.argv[1:]) |