diff options
Diffstat (limited to 'user_activity_benchmarks/select_optimal_benchmark_set.py')
-rwxr-xr-x | user_activity_benchmarks/select_optimal_benchmark_set.py | 347 |
1 files changed, 0 insertions, 347 deletions
diff --git a/user_activity_benchmarks/select_optimal_benchmark_set.py b/user_activity_benchmarks/select_optimal_benchmark_set.py deleted file mode 100755 index 1c8305cf..00000000 --- a/user_activity_benchmarks/select_optimal_benchmark_set.py +++ /dev/null @@ -1,347 +0,0 @@ -#!/usr/bin/python2 - -# Copyright 2016 The Chromium OS Authors. All rights reserved. -# Use of this source code is governed by a BSD-style license that can be -# found in the LICENSE file. -"""Selects the optimal set of benchmarks. - -For each benchmark, there is a file with the common functions, as extracted by -the process_hot_functions module. - -The script receives as input the CSV file with the CWP inclusive count values, -the file with Chrome OS groups and the path containing a file with common -functions for every benchmark. - -It extracts for every benchmark and for the CWP data all the functions that -match the given Chrome OS groups. - -It generates all possible combinations of benchmark sets of a given size and -it computes for every set a metric. -It outputs the optimal sets, based on which ones have the best metric. - -Three different metrics have been used: function count, distance -variation and score. - -For the function count metric, we count the unique functions covered by a -set of benchmarks. Besides the number of unique functions, we compute also -the fraction of unique functions out of the amount of CWP functions from the -given groups. The benchmark set with the highest amount of unique functions -that belong to all the given groups is considered better. - -For the distance variation metric, we compute the sum of the distance variations -of the functions covered by a set of benchmarks. We define the distance -variation as the difference between the distance value of a function and the -ideal distance value (1.0). If a function appears in multiple common functions -files, we consider only the minimum value. We compute also the distance -variation per function. The set that has the smaller value for the -distance variation per function is considered better. - -For the score metric, we compute the sum of the scores of the functions from a -set of benchmarks. If a function appears in multiple common functions files, -we consider only the maximum value. We compute also the fraction of this sum -from the sum of all the scores of the functions from the CWP data covering the -given groups, in the ideal case (the ideal score of a function is 1.0). - -We compute the metrics in the same manner for individual Chrome OS groups. -""" - -from collections import defaultdict - -import argparse -import csv -import itertools -import json -import operator -import os -import sys - -import benchmark_metrics -import utils - - -class BenchmarkSet(object): - """Selects the optimal set of benchmarks of given size.""" - - # Constants that specify the metric type. - FUNCTION_COUNT_METRIC = 'function_count' - DISTANCE_METRIC = 'distance_variation' - SCORE_METRIC = 'score_fraction' - - def __init__(self, benchmark_set_size, benchmark_set_output_file, - benchmark_set_common_functions_path, cwp_inclusive_count_file, - cwp_function_groups_file, metric): - """Initializes the BenchmarkSet. - - Args: - benchmark_set_size: Constant representing the size of a benchmark set. - benchmark_set_output_file: The output file that will contain the set of - optimal benchmarks with the metric values. - benchmark_set_common_functions_path: The directory containing the files - with the common functions for the list of benchmarks. - cwp_inclusive_count_file: The CSV file containing the CWP functions with - their inclusive count values. - cwp_function_groups_file: The file that contains the CWP function groups. - metric: The type of metric used for the analysis. - """ - self._benchmark_set_size = int(benchmark_set_size) - self._benchmark_set_output_file = benchmark_set_output_file - self._benchmark_set_common_functions_path = \ - benchmark_set_common_functions_path - self._cwp_inclusive_count_file = cwp_inclusive_count_file - self._cwp_function_groups_file = cwp_function_groups_file - self._metric = metric - - @staticmethod - def OrganizeCWPFunctionsInGroups(cwp_inclusive_count_statistics, - cwp_function_groups): - """Selects the CWP functions that match the given Chrome OS groups. - - Args: - cwp_inclusive_count_statistics: A dict with the CWP functions. - cwp_function_groups: A list with the CWP function groups. - - Returns: - A dict having as a key the name of the groups and as a value the list of - CWP functions that match an individual group. - """ - cwp_functions_grouped = defaultdict(list) - for function_key in cwp_inclusive_count_statistics: - _, file_name = function_key.split(',') - for group_name, file_path in cwp_function_groups: - if file_path not in file_name: - continue - cwp_functions_grouped[group_name].append(function_key) - break - return cwp_functions_grouped - - @staticmethod - def OrganizeBenchmarkSetFunctionsInGroups(benchmark_set_files, - benchmark_set_common_functions_path, - cwp_function_groups): - """Selects the benchmark functions that match the given Chrome OS groups. - - Args: - benchmark_set_files: The list of common functions files corresponding to a - benchmark. - benchmark_set_common_functions_path: The directory containing the files - with the common functions for the list of benchmarks. - cwp_function_groups: A list with the CWP function groups. - - Returns: - A dict having as a key the name of a common functions file. The value is - a dict having as a key the name of a group and as value a list of - functions that match the given group. - """ - - benchmark_set_functions_grouped = {} - for benchmark_file_name in benchmark_set_files: - benchmark_full_file_path = \ - os.path.join(benchmark_set_common_functions_path, - benchmark_file_name) - with open(benchmark_full_file_path) as input_file: - statistics_reader = \ - csv.DictReader(input_file, delimiter=',') - benchmark_functions_grouped = defaultdict(dict) - for statistic in statistics_reader: - function_name = statistic['function'] - file_name = statistic['file'] - for group_name, file_path in cwp_function_groups: - if file_path not in file_name: - continue - function_key = ','.join([function_name, file_name]) - distance = float(statistic['distance']) - score = float(statistic['score']) - benchmark_functions_grouped[group_name][function_key] = \ - (distance, score) - break - benchmark_set_functions_grouped[benchmark_file_name] = \ - benchmark_functions_grouped - return benchmark_set_functions_grouped - - @staticmethod - def SelectOptimalBenchmarkSetBasedOnMetric(all_benchmark_combinations_sets, - benchmark_set_functions_grouped, - cwp_functions_grouped, - metric_function_for_set, - metric_comparison_operator, - metric_default_value, - metric_string): - """Generic method that selects the optimal benchmark set based on a metric. - - The reason of implementing a generic function is to avoid logic duplication - for selecting a benchmark set based on the three different metrics. - - Args: - all_benchmark_combinations_sets: The list with all the sets of benchmark - combinations. - benchmark_set_functions_grouped: A dict with benchmark functions as - returned by OrganizeBenchmarkSetFunctionsInGroups. - cwp_functions_grouped: A dict with the CWP functions as returned by - OrganizeCWPFunctionsInGroups. - metric_function_for_set: The method used to compute the metric for a given - benchmark set. - metric_comparison_operator: A comparison operator used to compare two - values of the same metric (i.e: operator.lt or operator.gt). - metric_default_value: The default value for the metric. - metric_string: A tuple of strings used in the JSON output for the pair of - the values of the metric. - - Returns: - A list of tuples containing for each optimal benchmark set. A tuple - contains the list of benchmarks from the set, the pair of metric values - and a dictionary with the metrics for each group. - """ - optimal_sets = [([], metric_default_value, {})] - - for benchmark_combination_set in all_benchmark_combinations_sets: - function_metrics = [benchmark_set_functions_grouped[benchmark] - for benchmark in benchmark_combination_set] - set_metrics, set_groups_metrics = \ - metric_function_for_set(function_metrics, cwp_functions_grouped, - metric_string) - optimal_value = optimal_sets[0][1][0] - if metric_comparison_operator(set_metrics[0], optimal_value): - optimal_sets = \ - [(benchmark_combination_set, set_metrics, set_groups_metrics)] - elif set_metrics[0] == optimal_sets[0][1][0]: - optimal_sets.append( - (benchmark_combination_set, set_metrics, set_groups_metrics)) - - return optimal_sets - - def SelectOptimalBenchmarkSet(self): - """Selects the optimal benchmark sets and writes them in JSON format. - - Parses the CWP inclusive count statistics and benchmark common functions - files. Organizes the functions into groups. For every optimal benchmark - set, the method writes in the self._benchmark_set_output_file the list of - benchmarks, the pair of metrics and a dictionary with the pair of - metrics for each group covered by the benchmark set. - """ - - benchmark_set_files = os.listdir(self._benchmark_set_common_functions_path) - all_benchmark_combinations_sets = \ - itertools.combinations(benchmark_set_files, self._benchmark_set_size) - - with open(self._cwp_function_groups_file) as input_file: - cwp_function_groups = utils.ParseFunctionGroups(input_file.readlines()) - - cwp_inclusive_count_statistics = \ - utils.ParseCWPInclusiveCountFile(self._cwp_inclusive_count_file) - cwp_functions_grouped = self.OrganizeCWPFunctionsInGroups( - cwp_inclusive_count_statistics, cwp_function_groups) - benchmark_set_functions_grouped = \ - self.OrganizeBenchmarkSetFunctionsInGroups( - benchmark_set_files, self._benchmark_set_common_functions_path, - cwp_function_groups) - - if self._metric == self.FUNCTION_COUNT_METRIC: - metric_function_for_benchmark_set = \ - benchmark_metrics.ComputeFunctionCountForBenchmarkSet - metric_comparison_operator = operator.gt - metric_default_value = (0, 0.0) - metric_string = ('function_count', 'function_count_fraction') - elif self._metric == self.DISTANCE_METRIC: - metric_function_for_benchmark_set = \ - benchmark_metrics.ComputeDistanceForBenchmarkSet - metric_comparison_operator = operator.lt - metric_default_value = (float('inf'), float('inf')) - metric_string = \ - ('distance_variation_per_function', 'total_distance_variation') - elif self._metric == self.SCORE_METRIC: - metric_function_for_benchmark_set = \ - benchmark_metrics.ComputeScoreForBenchmarkSet - metric_comparison_operator = operator.gt - metric_default_value = (0.0, 0.0) - metric_string = ('score_fraction', 'total_score') - else: - raise ValueError("Invalid metric") - - optimal_benchmark_sets = \ - self.SelectOptimalBenchmarkSetBasedOnMetric( - all_benchmark_combinations_sets, benchmark_set_functions_grouped, - cwp_functions_grouped, metric_function_for_benchmark_set, - metric_comparison_operator, metric_default_value, metric_string) - - json_output = [] - - for benchmark_set in optimal_benchmark_sets: - json_entry = { - 'benchmark_set': - list(benchmark_set[0]), - 'metrics': { - metric_string[0]: benchmark_set[1][0], - metric_string[1]: benchmark_set[1][1] - }, - 'groups': - dict(benchmark_set[2]) - } - json_output.append(json_entry) - - with open(self._benchmark_set_output_file, 'w') as output_file: - json.dump(json_output, output_file) - - -def ParseArguments(arguments): - parser = argparse.ArgumentParser() - - parser.add_argument( - '--benchmark_set_common_functions_path', - required=True, - help='The directory containing the CSV files with the common functions ' - 'of the benchmark profiles and CWP data. A file will contain all the hot ' - 'functions from a pprof top output file that are also included in the ' - 'file containing the cwp inclusive count values. The CSV fields are: the ' - 'function name, the file and the object where the function is declared, ' - 'the CWP inclusive count and inclusive count fraction values, the ' - 'cumulative and average distance, the cumulative and average score. The ' - 'files with the common functions will have the same names with the ' - 'corresponding pprof output files.') - parser.add_argument( - '--cwp_inclusive_count_file', - required=True, - help='The CSV file containing the CWP hot functions with their ' - 'inclusive_count values. The CSV fields include the name of the ' - 'function, the file and the object with the definition, the inclusive ' - 'count value and the inclusive count fraction out of the total amount of ' - 'inclusive count values.') - parser.add_argument( - '--benchmark_set_size', - required=True, - help='The size of the benchmark sets.') - parser.add_argument( - '--benchmark_set_output_file', - required=True, - help='The JSON output file containing optimal benchmark sets with their ' - 'metrics. For every optimal benchmark set, the file contains the list of ' - 'benchmarks, the pair of metrics and a dictionary with the pair of ' - 'metrics for each group covered by the benchmark set.') - parser.add_argument( - '--metric', - required=True, - help='The metric used to select the optimal benchmark set. The possible ' - 'values are: distance_variation, function_count and score_fraction.') - parser.add_argument( - '--cwp_function_groups_file', - required=True, - help='The file that contains the CWP function groups. A line consists in ' - 'the group name and a file path describing the group. A group must ' - 'represent a Chrome OS component.') - - options = parser.parse_args(arguments) - - return options - - -def Main(argv): - options = ParseArguments(argv) - benchmark_set = BenchmarkSet(options.benchmark_set_size, - options.benchmark_set_output_file, - options.benchmark_set_common_functions_path, - options.cwp_inclusive_count_file, - options.cwp_function_groups_file, options.metric) - benchmark_set.SelectOptimalBenchmarkSet() - - -if __name__ == '__main__': - Main(sys.argv[1:]) |