diff options
-rwxr-xr-x | user_activity_benchmarks/process_hot_functions.py | 724 |
1 files changed, 442 insertions, 282 deletions
diff --git a/user_activity_benchmarks/process_hot_functions.py b/user_activity_benchmarks/process_hot_functions.py index f021e8c7..13d228bb 100755 --- a/user_activity_benchmarks/process_hot_functions.py +++ b/user_activity_benchmarks/process_hot_functions.py @@ -5,312 +5,427 @@ # found in the LICENSE file. """Processes the functions from the pprof(go/pprof) files and CWP(go/cwp) data. -The pprof output files should have the format given by the output of the -pprof --top command. A line containing a statistic should include the flat, -flat%, sum%, cum, cum%, function name and file name, separated by a space. - -The CWP hot functions should be specified in a CSV file that should contain the -fields for the function name, the file and the object where that function is -declared and the inclusive count value. - -For each pprof output file, the tool will output a file that contains the hot -functions present also in the CWP hot functions file. Afterwards, it extracts -the functions that are present in the CWP functions file and not in the -pprof output files. - -Optionally, it will organize the extra CWP functions in groups that have to -represent a ChromeOS component. A function belongs to a group that is defined -by a given file path if it is declared in a file that shares that path. +The pprof --top and pprof --tree outputs should be extracted from the benchmark +profiles. The outputs contain the hot functions and the call chains. + +For each pair of pprof --top and --tree output files, the tool will create a +file that contains the hot functions present also in the extracted CWP data. +The common functions are organized in groups that represent a Chrome OS +component. A function belongs to a group that is defined by a given file path +if it is declared in a file that shares that path. + +A set of metrics are computed for each function, benchmark and Chrome OS group +covered by a benchmark. + +Afterwards, this script extracts the functions that are present in the CWP +data and not in the benchmark profiles. The extra functions are also groupped +in Chrome OS components. """ +from collections import defaultdict + import argparse -import csv import os -import re +import shutil import sys +import benchmark_metrics +import utils + class HotFunctionsProcessor(object): """Does the pprof and CWP output processing. - Extracts the common and extra functions from the pprof output files, based on - the provided CWP functions. + Extracts the common, extra functions from the pprof files, groups them in + Chrome OS components. Computes the metrics for the common functions, + benchmark and Chrome OS groups covered by a benchmark. """ - # Constants used to identify if a function is common in the pprof and CWP - # files. - COMMON_FUNCTION = 1 - NOT_COMMON_FUNCTION = 0 - - def __init__(self, pprof_path, cwp_functions_file, common_functions_path, - extra_cwp_functions_file, cwp_function_groups_file, - cwp_function_groups_statistics_file, - cwp_function_groups_file_prefix): + def __init__(self, pprof_top_path, pprof_tree_path, cwp_inclusive_count_file, + cwp_pairwise_inclusive_count_file, cwp_function_groups_file, + common_functions_path, common_functions_groups_path, + benchmark_set_metrics_file, extra_cwp_functions_file, + extra_cwp_functions_groups_file, + extra_cwp_functions_groups_path): """Initializes the HotFunctionsProcessor. Args: - pprof_path: The directory containing the pprof output files. - cwp_functions_file: The file containing the CWP data. - common_functions_path: The directory where the files with the CWP and - pprof common functions should be stored. - extra_cwp_functions_file: The file where should be stored the CWP - functions that are not in the given pprof output files. - cwp_function_groups_file: The name of the file containing the groups of - functions. - cwp_function_groups_statistics_file: The name of the file containing the - statistics for the function groups. - cwp_function_groups_file_prefix: The prefix of the files that will store - the function statistics for each function group. + pprof_top_path: The directory containing the files with the pprof --top + output. + pprof_tree_path: The directory containing the files with the pprof --tree + output. + cwp_inclusive_count_file: The CSV file containing the CWP functions with + the inclusive count values. + cwp_pairwise_inclusive_count_file: The CSV file containing the CWP pairs + of parent and child functions with their inclusive count values. + cwp_function_groups_file: The file that contains the CWP function groups. + common_functions_path: The directory containing the CSV output files + with the common functions of the benchmark profiles and CWP data. + common_functions_groups_path: The directory containing the CSV output + files with the CWP groups and their metrics that match the common + functions of the benchmark profiles and CWP. + benchmark_set_metrics_file: The CSV output file containing the metrics for + each benchmark. + extra_cwp_functions_file: The CSV output file containing the functions + that are in the CWP data, but are not in any of the benchmark profiles. + extra_cwp_functions_groups_file: The CSV output file containing the groups + that match the extra CWP functions and their statistics. + extra_cwp_functions_groups_path: The directory containing the CSV output + files with the extra CWP functions that match a particular group. """ - self._pprof_path = pprof_path - self._cwp_functions_file = cwp_functions_file + self._pprof_top_path = pprof_top_path + self._pprof_tree_path = pprof_tree_path + self._cwp_inclusive_count_file = cwp_inclusive_count_file + self._cwp_pairwise_inclusive_count_file = cwp_pairwise_inclusive_count_file + self._cwp_function_groups_file = cwp_function_groups_file self._common_functions_path = common_functions_path + self._common_functions_groups_path = common_functions_groups_path + self._benchmark_set_metrics_file = benchmark_set_metrics_file self._extra_cwp_functions_file = extra_cwp_functions_file - self._cwp_function_groups_file = cwp_function_groups_file - self._cwp_function_groups_statistics_file = \ - cwp_function_groups_statistics_file - self._cwp_function_groups_file_prefix = cwp_function_groups_file_prefix + self._extra_cwp_functions_groups_file = extra_cwp_functions_groups_file + self._extra_cwp_functions_groups_path = extra_cwp_functions_groups_path def ProcessHotFunctions(self): """Does the processing of the hot functions.""" + with open(self._cwp_function_groups_file) as input_file: + cwp_function_groups = utils.ParseFunctionGroups(input_file.readlines()) cwp_statistics = \ - self.ExtractCommonFunctions(self._pprof_path, - self._common_functions_path, - self._cwp_functions_file) - + self.ExtractCommonFunctions(self._pprof_top_path, + self._pprof_tree_path, + self._cwp_inclusive_count_file, + self._cwp_pairwise_inclusive_count_file, + cwp_function_groups, + self._common_functions_path, + self._common_functions_groups_path, + self._benchmark_set_metrics_file) self.ExtractExtraFunctions(cwp_statistics, self._extra_cwp_functions_file) - if all([self._cwp_function_groups_file, - self._cwp_function_groups_statistics_file, - self._cwp_function_groups_file_prefix]): - self.GroupExtraFunctions(cwp_statistics, - self._cwp_function_groups_file_prefix, - self._cwp_function_groups_file, - self._cwp_function_groups_statistics_file) + self.GroupExtraFunctions(cwp_statistics, cwp_function_groups, + self._extra_cwp_functions_groups_path, + self._extra_cwp_functions_groups_file) - def ParseCWPStatistics(self, cwp_statistics_file_name): - """Parses the contents of the file containing the CWP data. + @staticmethod + def ComputeCWPCummulativeInclusiveStatistics(cwp_inclusive_count_statistics): + """Computes the cumulative inclusive count value of a function. - A line contains the name of the function, the corresponding filenames, the - object files and their inclusive count values in CSV format. + A function might appear declared in multiple files or objects. When + computing the fraction of the inclusive count value from a child function to + the parent function, we take into consideration the sum of the + inclusive_count + count values from all the ocurences of that function. Args: - cwp_statistics_file_name: The name of the file containing the CWP data - in CSV format. + cwp_inclusive_count_statistics: A dict containing the inclusive count + statistics extracted by the ParseCWPInclusiveCountFile method. Returns: - A dict containing the CWP statistics. The key contains the name of the - functions with the file name comma separated. The value represents a - tuple with the statistics and a marker to identify if the function is - present in one of the pprof files. + A dict having as a ket the name of the function and as a value the sum of + the inclusive count values of the occurences of the functions from all + the files and objects. """ - cwp_statistics = {} + cwp_inclusive_count_statistics_cumulative = defaultdict(int) - with open(cwp_statistics_file_name) as cwp_statistics_file: - statistics_reader = csv.DictReader(cwp_statistics_file, delimiter=',') + for function_key, function_statistics \ + in cwp_inclusive_count_statistics.iteritems(): + function_name, _ = function_key.split(',') + cwp_inclusive_count_statistics_cumulative[function_name] += \ + function_statistics[1] - for statistic in statistics_reader: - function_name = statistic['function'] - file_name = os.path.normpath(statistic['file']) - dso_name = statistic['dso'] - inclusive_count = statistic['inclusive_count'] + return cwp_inclusive_count_statistics_cumulative - # We ignore the lines that have empty fields(i.e they specify only the - # addresses of the functions and the inclusive counts values). - if all([function_name, file_name, dso_name, inclusive_count]): - key = '%s,%s' % (function_name, file_name) - value = \ - ('%s,%s' % (dso_name, inclusive_count), self.NOT_COMMON_FUNCTION) - # All the functions are marked as NOT_COMMON_FUNCTION. - cwp_statistics[key] = value - - return cwp_statistics + @staticmethod + def ComputeCWPChildFunctionsFractions( + cwp_inclusive_count_statistics_cumulative, + cwp_pairwise_inclusive_count_statistics): + """Computes the fractions of the inclusive count values for child functions. - def ExtractCommonFunctions(self, pprof_path, common_functions_path, - cwp_functions_file): - """Extracts the common functions of the pprof files and the CWP file. + The fraction represents the inclusive count value of a child function over + the one of the parent function. - For each pprof file, it creates a separate file with the same name - containing the common functions, that will be placed in the - common_functions_path directory. + Args: + cwp_inclusive_count_statistics_cumulative: A dict containing the + cumulative inclusive count values of the CWP functions. + cwp_pairwise_inclusive_count_statistics: A dict containing the inclusive + count statistics for pairs of parent and child functions. The key is the + parent function. The value is a dict with the key the name of the child + function and the file name, comma separated, and the value is the + inclusive count value of the pair of parent and child functions. - The resulting file is CSV format, containing the following fields: - function name, file name, object, inclusive count, flat, flat%, sum%, cum, - cum%. + Returns: + A dict containing the inclusive count statistics for pairs of parent + and child functions. The key is the parent function. The value is a + dict with the key the name of the child function and the file name, + comma separated, and the value is the inclusive count fraction of the + child function out of the parent function. + """ - It builds a dict of the CWP statistics and if a function is common, it is + pairwise_inclusive_count_fractions = {} + + for parent_function_key, child_functions_metrics in \ + cwp_pairwise_inclusive_count_statistics.iteritems(): + child_functions_fractions = {} + parent_function_inclusive_count = \ + cwp_inclusive_count_statistics_cumulative.get(parent_function_key, 0.0) + + if parent_function_key in cwp_inclusive_count_statistics_cumulative: + for child_function_key, child_function_inclusive_count \ + in child_functions_metrics.iteritems(): + child_functions_fractions[child_function_key] = \ + child_function_inclusive_count / parent_function_inclusive_count + else: + for child_function_key, child_function_inclusive_count \ + in child_functions_metrics.iteritems(): + child_functions_fractions[child_function_key] = 0.0 + pairwise_inclusive_count_fractions[parent_function_key] = \ + child_functions_fractions + + return pairwise_inclusive_count_fractions + + def ExtractCommonFunctions(self, pprof_top_path, pprof_tree_path, + cwp_inclusive_count_file, + cwp_pairwise_inclusive_count_file, + cwp_function_groups, common_functions_path, + common_functions_groups_path, + benchmark_set_metrics_file): + """Extracts the common functions of the benchmark profiles and the CWP data. + + For each pair of pprof --top and --tree output files, it creates a separate + file with the same name containing the common functions specifications and + metrics, that will be placed in the common_functions_path directory. + + The resulting file is in CSV format, containing the following fields: + function name, file name, object, inclusive count, inclusive_count_fraction, + flat, flat%, sum%, cum, cum%, distance and score. + + For each pair of pprof files, an additional file is created with the + Chrome OS groups that match the common functions. + + The file is in CSV format containing the fields: group name, group path, + the number of functions that match the group, the average and cumulative + distance, the average and cumulative score. + The file has the same name with the pprof file and it is placed in the + common_functions_groups_path directory. + + For all the analyzed benchmarks, the method creates a CSV output file + containing the metrics for each benchmark. The CSV fields include the + benchmark name, the number of common functions, the average and + cumulative distance and score. + + It builds a dict of the CWP statistics by calling the + utils.ParseCWPInclusiveCountFile method and if a function is common, it is marked as a COMMON_FUNCTION. Args: - pprof_path: The directory with the pprof files. - common_functions_path: The directory with the common functions files. - cwp_functions_file: The file with the CWP data. + pprof_top_path: The name of the directory with the files with the + pprof --top output. + pprof_tree_path: The name of the directory with the files with the + pprof --tree output. + cwp_inclusive_count_file: A dict with the inclusive count values. + cwp_pairwise_inclusive_count_file: A dict with the pairwise inclusive + count values. + cwp_function_groups: A list of tuples containing the name of the group + and the corresponding file path. + common_functions_path: The path containing the output files with the + common functions and their metrics. + common_functions_groups_path: The path containing the output files with + the Chrome OS groups that match the common functions and their metrics. + benchmark_set_metrics_file: The CSV output file containing the metrics for + all the analyzed benchmarks. Returns: A dict containing the CWP statistics with the common functions marked as COMMON_FUNCTION. """ - # Get the list of pprof files from the given path. - pprof_files = os.listdir(pprof_path) - cwp_statistics = self.ParseCWPStatistics(cwp_functions_file) - function_statistic_regex = re.compile(r'\S+\s+\S+%\s+\S+%\s+\S+\s+\S+%') - function_regex = re.compile(r'[a-zA-Z0-9-_:.~\[\]]+') - # TODO(evelinad): Consider the case where the file name can have other - # characters. - file_regex = re.compile(r'[a-zA-Z0-9-/_.]+') + cwp_inclusive_count_statistics = \ + utils.ParseCWPInclusiveCountFile(cwp_inclusive_count_file) + cwp_pairwise_inclusive_count_statistics = \ + utils.ParseCWPPairwiseInclusiveCountFile( + cwp_pairwise_inclusive_count_file) + cwp_inclusive_count_statistics_cumulative = \ + self.ComputeCWPCummulativeInclusiveStatistics( + cwp_inclusive_count_statistics) + cwp_pairwise_inclusive_count_fractions = \ + self.ComputeCWPChildFunctionsFractions( + cwp_inclusive_count_statistics_cumulative, + cwp_pairwise_inclusive_count_statistics) + benchmark_set_metrics = {} + pprof_files = os.listdir(pprof_top_path) for pprof_file in pprof_files: - # In the pprof output, the statistics of the functions start from the - # 8th line. - with open(os.path.join(pprof_path, pprof_file), 'r') as input_file: - pprof_statistics = input_file.readlines()[6:] - output_lines = \ - ['function,file,dso,inclusive_count,flat,flat%,sum%,cum,cum%'] - - for pprof_statistic in pprof_statistics: - function_statistic_match = \ - function_statistic_regex.search(pprof_statistic) - function_statistic = \ - ','.join(function_statistic_match.group(0).split()) - lookup_index = function_statistic_match.end() - function_match = function_regex.search(pprof_statistic[lookup_index:]) - function_name = function_match.group(0) - lookup_index += function_match.end() - file_match = file_regex.search(pprof_statistic[lookup_index:]) - if file_match: - key = ",".join([function_name, os.path.normpath(file_match.group(0))]) - else: - key = function_name - - if key in cwp_statistics: - cwp_statistic = cwp_statistics[key] - output_lines.append(','.join([key, cwp_statistic[0], - function_statistic])) - cwp_statistics[key] = (cwp_statistic[0], self.COMMON_FUNCTION) + pprof_top_statistics = \ + utils.ParsePprofTopOutput(os.path.join(pprof_top_path, pprof_file)) + pprof_tree_statistics = \ + utils.ParsePprofTreeOutput(os.path.join(pprof_tree_path, pprof_file)) + common_functions_lines = [] + benchmark_function_metrics = {} + + for function_key, function_statistic in pprof_top_statistics.iteritems(): + if function_key not in cwp_inclusive_count_statistics: + continue + + cwp_dso_name, cwp_inclusive_count, cwp_inclusive_count_fraction, _ = \ + cwp_inclusive_count_statistics[function_key] + cwp_inclusive_count_statistics[function_key] = \ + (cwp_dso_name, cwp_inclusive_count, cwp_inclusive_count_fraction, + utils.COMMON_FUNCTION) + + function_name, _ = function_key.split(',') + distance = benchmark_metrics.ComputeDistanceForFunction( + pprof_tree_statistics[function_key], + cwp_pairwise_inclusive_count_fractions.get(function_name, {})) + benchmark_cum_p = float(function_statistic[4]) + score = benchmark_metrics.ComputeScoreForFunction( + distance, cwp_inclusive_count_fraction, benchmark_cum_p) + benchmark_function_metrics[function_key] = (distance, score) + + common_functions_lines.append(','.join([function_key, cwp_dso_name, str( + cwp_inclusive_count), str(cwp_inclusive_count_fraction), ','.join( + function_statistic), str(distance), str(score)])) + benchmark_function_groups_statistics = \ + benchmark_metrics.ComputeMetricsForComponents( + cwp_function_groups, benchmark_function_metrics) + benchmark_set_metrics[pprof_file] = \ + benchmark_metrics.ComputeMetricsForBenchmark( + benchmark_function_metrics) with open(os.path.join(common_functions_path, pprof_file), 'w') \ - as output_file: - output_file.write('\n'.join(output_lines)) + as output_file: + common_functions_lines.sort( + key=lambda x: float(x.split(',')[11]), reverse=True) + common_functions_lines.insert(0, 'function,file,dso,inclusive_count,' + 'inclusive_count_fraction,flat,flat%,' + 'sum%,cum,cum%,distance,score') + output_file.write('\n'.join(common_functions_lines)) + + with open(os.path.join(common_functions_groups_path, pprof_file), 'w') \ + as output_file: + common_functions_groups_lines = \ + [','.join([group_name, ','.join( + [str(statistic) for statistic in group_statistic])]) + for group_name, group_statistic in + benchmark_function_groups_statistics.iteritems()] + common_functions_groups_lines.sort( + key=lambda x: float(x.split(',')[5]), reverse=True) + common_functions_groups_lines.insert( + 0, 'group_name,file_path,number_of_functions,distance_cum,' + 'distance_avg,score_cum,score_avg') + output_file.write('\n'.join(common_functions_groups_lines)) + + with open(benchmark_set_metrics_file, 'w') as output_file: + benchmark_set_metrics_lines = [] + + for benchmark_name, metrics in benchmark_set_metrics.iteritems(): + benchmark_set_metrics_lines.append(','.join([benchmark_name, ','.join( + [str(metric) for metric in metrics])])) + benchmark_set_metrics_lines.sort( + key=lambda x: float(x.split(',')[4]), reverse=True) + benchmark_set_metrics_lines.insert( + 0, 'benchmark_name,number_of_functions,distance_cum,distance_avg,' + 'score_cum,score_avg') + output_file.write('\n'.join(benchmark_set_metrics_lines)) + + return cwp_inclusive_count_statistics + + def GroupExtraFunctions(self, cwp_statistics, cwp_function_groups, + extra_cwp_functions_groups_path, + extra_cwp_functions_groups_file): + """Groups the extra functions. + + Writes the data of the functions that belong to each group in a separate + file, sorted by their inclusive count value, in descending order. The file + name is the same as the group name. - return cwp_statistics - - @staticmethod - def ParseFunctionGroups(cwp_function_groups_lines): - """Parses the contents of the function groups file. - - Args: - cwp_function_groups_lines: A list of the lines contained in the CWP - function groups file. - Returns: - A list of tuples containing the group name, the file path, the total - number of inclusive count values for that group, a list that will contain - the CWP statistics of the functions declared in files that share the file - path. - """ - cwp_function_groups = [] - - for line in cwp_function_groups_lines: - group_name, file_path = line.split() - cwp_function_groups.append((group_name, file_path, 0, [])) - - return cwp_function_groups - - def GroupExtraFunctions(self, cwp_statistics, cwp_function_groups_file_prefix, - cwp_function_groups_file, - cwp_function_groups_statistics_file): - """Groups the functions that are in the CWP statistics and not in the pprof - output. A function belongs to a group that is defined by a given file path - if it is declared in a file that shares that path. - - Writes the data of the functions that belong to a group in a file, sorted - by their inclusive count value, in descendant order. The file name is - composed by the cwp_function_groups_file_prefix and the name of the group. The file is in CSV format, containing the fields: function name, file name, - object name, inclusive count. + object name, inclusive count, inclusive count fraction. - It creates a CSV file containing the name of the groups, their - common path, the total inclusive count value of all the functions declared - in files that share the common path, sorted in descendant order by the - inclusive count value. + It creates a CSV file containing the name of the group, their + common path, the total inclusive count and inclusive count fraction values + of all the functions declared in files that share the common path, sorted + in descending order by the inclusive count value. Args: cwp_statistics: A dict containing the CWP statistics. - cwp_function_groups_file_prefix: The prefix used for naming the files that - the function data for a specific group. - cwp_function_groups_file: The name of the file containing the groups of - functions. - cwp_function_groups_statistics_file: The name of the file that will - contain the statistics for the function groups. + cwp_function_groups: A list of tuples with the groups names and the path + describing the groups. + extra_cwp_functions_groups_path: The name of the directory containing + the CSV output files with the extra CWP functions that match a + particular group. + extra_cwp_functions_groups_file: The CSV output file containing the groups + that match the extra functions and their statistics. """ - with open(cwp_function_groups_file, 'r') as input_file: - cwp_function_groups = self.ParseFunctionGroups(input_file.readlines()) - + cwp_function_groups_statistics = defaultdict(lambda: ([], '', 0, 0.0)) for function, statistics in cwp_statistics.iteritems(): - if statistics[1] == self.COMMON_FUNCTION: + if statistics[3] == utils.COMMON_FUNCTION: continue + file_name = function.split(',')[1] - group_inclusive_count = int(statistics[0].split(',')[1]) - for i, group in enumerate(cwp_function_groups): + group_inclusive_count = int(statistics[1]) + group_inclusive_count_fraction = float(statistics[2]) + + for group in cwp_function_groups: group_common_path = group[1] - # The order of the groups mentioned in the cwp_functions_groups - # matters. A function declared in a file will belong to the first - # mentioned group that matches it's path to the one of the file. - # It is possible to have multiple paths that belong to the same group. - if group_common_path in file_name: - group_name = group[0] - group_inclusive_count += group[2] - group_lines = group[3] - - group_lines.append(','.join([function, statistics[0]])) - cwp_function_groups[i] = (group_name, group_common_path, - group_inclusive_count, group_lines) - break - - group_statistics_lines = [] - - for group_name, group_path, group_inclusive_count, group_lines in \ - cwp_function_groups: - group_statistics_lines.append(','.join([group_name, group_path, - str(group_inclusive_count)])) - if group_lines: - # Sort the output in descendant order based on the inclusive_count - # value. - group_lines.sort(key=lambda x: int(x.split(',')[-1]), reverse=True) - group_lines.insert(0, 'function,file,dso,inclusive_count') - group_file_name = cwp_function_groups_file_prefix + group_name - - with open(group_file_name, 'w') as output_file: - output_file.write('\n'.join(group_lines)) - - group_statistics_lines.sort( + if group_common_path not in file_name: + continue + + group_name = group[0] + group_statistics = cwp_function_groups_statistics[group_name] + group_lines = group_statistics[0] + group_inclusive_count += group_statistics[2] + group_inclusive_count_fraction += group_statistics[3] + + group_lines.append(','.join([function, statistics[0], + str(statistics[1]), str(statistics[2])])) + cwp_function_groups_statistics[group_name] = \ + (group_lines, group_common_path, group_inclusive_count, + group_inclusive_count_fraction) + break + + extra_cwp_functions_groups_lines = [] + for group_name, group_statistics \ + in cwp_function_groups_statistics.iteritems(): + group_output_lines = group_statistics[0] + group_output_lines.sort(key=lambda x: int(x.split(',')[3]), reverse=True) + group_output_lines.insert( + 0, 'function,file,dso,inclusive_count,inclusive_count_fraction') + with open(os.path.join(extra_cwp_functions_groups_path, group_name), + 'w') as output_file: + output_file.write('\n'.join(group_output_lines)) + extra_cwp_functions_groups_lines.append(','.join( + [group_name, group_statistics[1], str(group_statistics[2]), str( + group_statistics[3])])) + + extra_cwp_functions_groups_lines.sort( key=lambda x: int(x.split(',')[2]), reverse=True) - group_statistics_lines.insert(0, 'group,shared_path,inclusive_count') - - with open(cwp_function_groups_statistics_file, 'w') as output_file: - output_file.write('\n'.join(group_statistics_lines)) + extra_cwp_functions_groups_lines.insert( + 0, 'group,shared_path,inclusive_count,inclusive_count_fraction') + with open(extra_cwp_functions_groups_file, 'w') as output_file: + output_file.write('\n'.join(extra_cwp_functions_groups_lines)) def ExtractExtraFunctions(self, cwp_statistics, extra_cwp_functions_file): - """Gets the functions that are in the CWP file, but not in the pprof output. + """Gets the functions that are in the CWP data, but not in the pprof output. Writes the functions and their statistics in the extra_cwp_functions_file file. The output is sorted based on the inclusive_count value. The file is in CSV format, containing the fields: function name, file name, object name, - inclusive count. + inclusive count and inclusive count fraction. Args: - cwp_statistics: A dict containing the CWP statistics. - extra_cwp_functions_file: The file where should be stored the CWP - functions and statistics that are marked as NOT_COMMON_FUNCTIONS. + cwp_statistics: A dict containing the CWP statistics indexed by the + function and the file name, comma separated. + extra_cwp_functions_file: The file where it should be stored the CWP + functions and statistics that are marked as EXTRA_FUNCTION. """ output_lines = [] for function, statistics in cwp_statistics.iteritems(): - if statistics[1] == self.NOT_COMMON_FUNCTION: - output_lines.append(','.join([function, statistics[0]])) + if statistics[3] == utils.EXTRA_FUNCTION: + output_lines.append(','.join([function, statistics[0], + str(statistics[1]), str(statistics[2])])) with open(extra_cwp_functions_file, 'w') as output_file: - output_lines.sort(key=lambda x: int(x.split(',')[-1]), reverse=True) - output_lines.insert(0, 'function,file,dso,inclusive_count') + output_lines.sort(key=lambda x: int(x.split(',')[3]), reverse=True) + output_lines.insert(0, 'function,file,dso,inclusive_count,' + 'inclusive_count_fraction') output_file.write('\n'.join(output_lines)) @@ -318,65 +433,92 @@ def ParseArguments(arguments): parser = argparse.ArgumentParser() parser.add_argument( - '-p', - '--pprof_path', - dest='pprof_path', + '--pprof_top_path', required=True, - help='The directory containing the pprof output files.') + help='The directory containing the files with the pprof --top output of ' + 'the benchmark profiles (the hot functions). The name of the files ' + 'should match with the ones from the pprof tree output files.') parser.add_argument( - '-w', - '--cwp_hot_functions_file', - dest='cwp_hot_functions_file', + '--pprof_tree_path', required=True, - help='The CSV file containing the CWP hot functions. The ' - 'file should include the name of the functions, the ' - 'file names with the definition, the object file ' - 'and the CWP inclusive count values, comma ' - 'separated.') + help='The directory containing the files with the pprof --tree output ' + 'of the benchmark profiles (the call chains). The name of the files ' + 'should match with the ones of the pprof top output files.') parser.add_argument( - '-c', - '--common_functions_path', - dest='common_functions_path', + '--cwp_inclusive_count_file', required=True, - help='The directory containing the files with the pprof ' - 'and CWP common functions. A file will contain all ' - 'the hot functions from a pprof output file that ' - 'are also included in the CWP hot functions file. ' - 'The files with the common functions will have the ' - 'same names with the corresponding pprof output ' - 'files.') + help='The CSV file containing the CWP hot functions with their ' + 'inclusive_count values. The CSV fields include the name of the ' + 'function, the file and the object with the definition, the inclusive ' + 'count value and the inclusive count fraction out of the total amount of ' + 'inclusive count values.') parser.add_argument( - '-e', - '--extra_cwp_functions_file', - dest='extra_cwp_functions_file', + '--cwp_pairwise_inclusive_count_file', required=True, - help='The file that will contain the CWP hot functions ' - 'that are not in any of the pprof output files. ' - 'The file should include the name of the functions, ' - 'the file names with the definition, the object ' - 'file and the CWP inclusive count values, comma ' - 'separated.') + help='The CSV file containing the CWP pairs of parent and child ' + 'functions with their inclusive count values. The CSV fields include the ' + 'name of the parent and child functions concatenated by ;;, the file ' + 'and the object with the definition of the child function, and the ' + 'inclusive count value.') parser.add_argument( - '-g', '--cwp_function_groups_file', - dest='cwp_function_groups_file', - help='The file that will contain the CWP function groups.' - 'A line consists in the group name and a file path. A group must ' + required=True, + help='The file that contains the CWP function groups. A line consists in ' + 'the group name and a file path describing the group. A group must ' 'represent a ChromeOS component.') parser.add_argument( - '-s', - '--cwp_function_groups_statistics_file', - dest='cwp_function_groups_statistics_file', - help='The file that will contain the total inclusive count values of CWP ' - 'function groups in CSV format. A line will contain the name of the ' - 'group, the common path, the total inclusive count value of all the' - 'functions declared in files that share the common path.') + '--common_functions_path', + required=True, + help='The directory containing the CSV output files with the common ' + 'functions of the benchmark profiles and CWP data. A file will contain ' + 'all the hot functions from a pprof top output file that are also ' + 'included in the file containing the cwp inclusive count values. The CSV ' + 'fields are: the function name, the file and the object where the ' + 'function is declared, the CWP inclusive count and inclusive count ' + 'fraction values, the cumulative and average distance, the cumulative ' + 'and average score. The files with the common functions will have the ' + 'same names with the corresponding pprof output files.') + parser.add_argument( + '--common_functions_groups_path', + required=True, + help='The directory containing the CSV output files with the Chrome OS ' + 'groups and their metrics that match the common functions of the ' + 'benchmark profiles and CWP. The files with the groups will have the ' + 'same names with the corresponding pprof output files. The CSV fields ' + 'include the group name, group path, the number of functions that match ' + 'the group, the average and cumulative distance, the average and ' + 'cumulative score.') + parser.add_argument( + '--benchmark_set_metrics_file', + required=True, + help='The CSV output file containing the metrics for each benchmark. The ' + 'CSV fields include the benchmark name, the number of common functions, ' + 'the average and cumulative distance and score.') parser.add_argument( - '-x', - '--cwp_function_groups_file_prefix', - dest='cwp_function_groups_file_prefix', - help='The prefix of the files that will store the function statistics ' - 'for each function group.') + '--extra_cwp_functions_file', + required=True, + help='The CSV output file containing the functions that are in the CWP ' + 'data, but are not in any of the benchmark profiles. The CSV fields ' + 'include the name of the function, the file name and the object with the ' + 'definition, and the CWP inclusive count and inclusive count fraction ' + 'values. The entries are sorted in descending order based on the ' + 'inclusive count value.') + parser.add_argument( + '--extra_cwp_functions_groups_file', + required=True, + help='The CSV output file containing the groups that match the extra CWP ' + 'functions and their statistics. The CSV fields include the group name, ' + 'the file path, the total inclusive count and inclusive count fraction ' + 'values of the functions matching a particular group.') + parser.add_argument( + '--extra_cwp_functions_groups_path', + required=True, + help='The directory containing the CSV output files with the extra CWP ' + 'functions that match a particular group. The name of the file is the ' + 'same as the group name. The CSV fields include the name of the ' + 'function, the file name and the object with the definition, and the CWP ' + 'inclusive count and inclusive count fraction values. The entries are ' + 'sorted in descending order based on the inclusive count value.') options = parser.parse_args(arguments) @@ -386,11 +528,29 @@ def ParseArguments(arguments): def Main(argv): options = ParseArguments(argv) - hot_functions_processor = HotFunctionsProcessor(options.pprof_path, - options.cwp_hot_functions_file, options.common_functions_path, - options.extra_cwp_functions_file, options.cwp_function_groups_file, - options.cwp_function_groups_statistics_file, - options.cwp_function_groups_file_prefix) + if os.path.exists(options.common_functions_path): + shutil.rmtree(options.common_functions_path) + + os.makedirs(options.common_functions_path) + + if os.path.exists(options.common_functions_groups_path): + shutil.rmtree(options.common_functions_groups_path) + + os.makedirs(options.common_functions_groups_path) + + if os.path.exists(options.extra_cwp_functions_groups_path): + shutil.rmtree(options.extra_cwp_functions_groups_path) + + os.makedirs(options.extra_cwp_functions_groups_path) + + hot_functions_processor = HotFunctionsProcessor( + options.pprof_top_path, options.pprof_tree_path, + options.cwp_inclusive_count_file, + options.cwp_pairwise_inclusive_count_file, + options.cwp_function_groups_file, options.common_functions_path, + options.common_functions_groups_path, options.benchmark_set_metrics_file, + options.extra_cwp_functions_file, options.extra_cwp_functions_groups_file, + options.extra_cwp_functions_groups_path) hot_functions_processor.ProcessHotFunctions() |