#!/usr/bin/python2 # Copyright 2016 The Chromium OS Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. """Selects the optimal set of benchmarks. For each benchmark, there is a file with the common functions, as extracted by the process_hot_functions module. The script receives as input the CSV file with the CWP inclusive count values, the file with Chrome OS groups and the path containing a file with common functions for every benchmark. It extracts for every benchmark and for the CWP data all the functions that match the given Chrome OS groups. It generates all possible combinations of benchmark sets of a given size and it computes for every set a metric. It outputs the optimal sets, based on which ones have the best metric. Three different metrics have been used: function count, distance variation and score. For the function count metric, we count the unique functions covered by a set of benchmarks. Besides the number of unique functions, we compute also the fraction of unique functions out of the amount of CWP functions from the given groups. The benchmark set with the highest amount of unique functions that belong to all the given groups is considered better. For the distance variation metric, we compute the sum of the distance variations of the functions covered by a set of benchmarks. We define the distance variation as the difference between the distance value of a function and the ideal distance value (1.0). If a function appears in multiple common functions files, we consider only the minimum value. We compute also the distance variation per function. The set that has the smaller value for the distance variation per function is considered better. For the score metric, we compute the sum of the scores of the functions from a set of benchmarks. If a function appears in multiple common functions files, we consider only the maximum value. We compute also the fraction of this sum from the sum of all the scores of the functions from the CWP data covering the given groups, in the ideal case (the ideal score of a function is 1.0). We compute the metrics in the same manner for individual Chrome OS groups. """ from collections import defaultdict import argparse import csv import itertools import json import operator import os import sys import benchmark_metrics import utils class BenchmarkSet(object): """Selects the optimal set of benchmarks of given size.""" # Constants that specify the metric type. FUNCTION_COUNT_METRIC = 'function_count' DISTANCE_METRIC = 'distance_variation' SCORE_METRIC = 'score_fraction' def __init__(self, benchmark_set_size, benchmark_set_output_file, benchmark_set_common_functions_path, cwp_inclusive_count_file, cwp_function_groups_file, metric): """Initializes the BenchmarkSet. Args: benchmark_set_size: Constant representing the size of a benchmark set. benchmark_set_output_file: The output file that will contain the set of optimal benchmarks with the metric values. benchmark_set_common_functions_path: The directory containing the files with the common functions for the list of benchmarks. cwp_inclusive_count_file: The CSV file containing the CWP functions with their inclusive count values. cwp_function_groups_file: The file that contains the CWP function groups. metric: The type of metric used for the analysis. """ self._benchmark_set_size = int(benchmark_set_size) self._benchmark_set_output_file = benchmark_set_output_file self._benchmark_set_common_functions_path = \ benchmark_set_common_functions_path self._cwp_inclusive_count_file = cwp_inclusive_count_file self._cwp_function_groups_file = cwp_function_groups_file self._metric = metric @staticmethod def OrganizeCWPFunctionsInGroups(cwp_inclusive_count_statistics, cwp_function_groups): """Selects the CWP functions that match the given Chrome OS groups. Args: cwp_inclusive_count_statistics: A dict with the CWP functions. cwp_function_groups: A list with the CWP function groups. Returns: A dict having as a key the name of the groups and as a value the list of CWP functions that match an individual group. """ cwp_functions_grouped = defaultdict(list) for function_key in cwp_inclusive_count_statistics: _, file_name = function_key.split(',') for group_name, file_path in cwp_function_groups: if file_path not in file_name: continue cwp_functions_grouped[group_name].append(function_key) break return cwp_functions_grouped @staticmethod def OrganizeBenchmarkSetFunctionsInGroups(benchmark_set_files, benchmark_set_common_functions_path, cwp_function_groups): """Selects the benchmark functions that match the given Chrome OS groups. Args: benchmark_set_files: The list of common functions files corresponding to a benchmark. benchmark_set_common_functions_path: The directory containing the files with the common functions for the list of benchmarks. cwp_function_groups: A list with the CWP function groups. Returns: A dict having as a key the name of a common functions file. The value is a dict having as a key the name of a group and as value a list of functions that match the given group. """ benchmark_set_functions_grouped = {} for benchmark_file_name in benchmark_set_files: benchmark_full_file_path = \ os.path.join(benchmark_set_common_functions_path, benchmark_file_name) with open(benchmark_full_file_path) as input_file: statistics_reader = \ csv.DictReader(input_file, delimiter=',') benchmark_functions_grouped = defaultdict(dict) for statistic in statistics_reader: function_name = statistic['function'] file_name = statistic['file'] for group_name, file_path in cwp_function_groups: if file_path not in file_name: continue function_key = ','.join([function_name, file_name]) distance = float(statistic['distance']) score = float(statistic['score']) benchmark_functions_grouped[group_name][function_key] = \ (distance, score) break benchmark_set_functions_grouped[benchmark_file_name] = \ benchmark_functions_grouped return benchmark_set_functions_grouped @staticmethod def SelectOptimalBenchmarkSetBasedOnMetric(all_benchmark_combinations_sets, benchmark_set_functions_grouped, cwp_functions_grouped, metric_function_for_set, metric_comparison_operator, metric_default_value, metric_string): """Generic method that selects the optimal benchmark set based on a metric. The reason of implementing a generic function is to avoid logic duplication for selecting a benchmark set based on the three different metrics. Args: all_benchmark_combinations_sets: The list with all the sets of benchmark combinations. benchmark_set_functions_grouped: A dict with benchmark functions as returned by OrganizeBenchmarkSetFunctionsInGroups. cwp_functions_grouped: A dict with the CWP functions as returned by OrganizeCWPFunctionsInGroups. metric_function_for_set: The method used to compute the metric for a given benchmark set. metric_comparison_operator: A comparison operator used to compare two values of the same metric (i.e: operator.lt or operator.gt). metric_default_value: The default value for the metric. metric_string: A tuple of strings used in the JSON output for the pair of the values of the metric. Returns: A list of tuples containing for each optimal benchmark set. A tuple contains the list of benchmarks from the set, the pair of metric values and a dictionary with the metrics for each group. """ optimal_sets = [([], metric_default_value, {})] for benchmark_combination_set in all_benchmark_combinations_sets: function_metrics = [benchmark_set_functions_grouped[benchmark] for benchmark in benchmark_combination_set] set_metrics, set_groups_metrics = \ metric_function_for_set(function_metrics, cwp_functions_grouped, metric_string) optimal_value = optimal_sets[0][1][0] if metric_comparison_operator(set_metrics[0], optimal_value): optimal_sets = \ [(benchmark_combination_set, set_metrics, set_groups_metrics)] elif set_metrics[0] == optimal_sets[0][1][0]: optimal_sets.append( (benchmark_combination_set, set_metrics, set_groups_metrics)) return optimal_sets def SelectOptimalBenchmarkSet(self): """Selects the optimal benchmark sets and writes them in JSON format. Parses the CWP inclusive count statistics and benchmark common functions files. Organizes the functions into groups. For every optimal benchmark set, the method writes in the self._benchmark_set_output_file the list of benchmarks, the pair of metrics and a dictionary with the pair of metrics for each group covered by the benchmark set. """ benchmark_set_files = os.listdir(self._benchmark_set_common_functions_path) all_benchmark_combinations_sets = \ itertools.combinations(benchmark_set_files, self._benchmark_set_size) with open(self._cwp_function_groups_file) as input_file: cwp_function_groups = utils.ParseFunctionGroups(input_file.readlines()) cwp_inclusive_count_statistics = \ utils.ParseCWPInclusiveCountFile(self._cwp_inclusive_count_file) cwp_functions_grouped = self.OrganizeCWPFunctionsInGroups( cwp_inclusive_count_statistics, cwp_function_groups) benchmark_set_functions_grouped = \ self.OrganizeBenchmarkSetFunctionsInGroups( benchmark_set_files, self._benchmark_set_common_functions_path, cwp_function_groups) if self._metric == self.FUNCTION_COUNT_METRIC: metric_function_for_benchmark_set = \ benchmark_metrics.ComputeFunctionCountForBenchmarkSet metric_comparison_operator = operator.gt metric_default_value = (0, 0.0) metric_string = ('function_count', 'function_count_fraction') elif self._metric == self.DISTANCE_METRIC: metric_function_for_benchmark_set = \ benchmark_metrics.ComputeDistanceForBenchmarkSet metric_comparison_operator = operator.lt metric_default_value = (float('inf'), float('inf')) metric_string = \ ('distance_variation_per_function', 'total_distance_variation') elif self._metric == self.SCORE_METRIC: metric_function_for_benchmark_set = \ benchmark_metrics.ComputeScoreForBenchmarkSet metric_comparison_operator = operator.gt metric_default_value = (0.0, 0.0) metric_string = ('score_fraction', 'total_score') else: raise ValueError("Invalid metric") optimal_benchmark_sets = \ self.SelectOptimalBenchmarkSetBasedOnMetric( all_benchmark_combinations_sets, benchmark_set_functions_grouped, cwp_functions_grouped, metric_function_for_benchmark_set, metric_comparison_operator, metric_default_value, metric_string) json_output = [] for benchmark_set in optimal_benchmark_sets: json_entry = { 'benchmark_set': list(benchmark_set[0]), 'metrics': { metric_string[0]: benchmark_set[1][0], metric_string[1]: benchmark_set[1][1] }, 'groups': dict(benchmark_set[2]) } json_output.append(json_entry) with open(self._benchmark_set_output_file, 'w') as output_file: json.dump(json_output, output_file) def ParseArguments(arguments): parser = argparse.ArgumentParser() parser.add_argument( '--benchmark_set_common_functions_path', required=True, help='The directory containing the CSV files with the common functions ' 'of the benchmark profiles and CWP data. A file will contain all the hot ' 'functions from a pprof top output file that are also included in the ' 'file containing the cwp inclusive count values. The CSV fields are: the ' 'function name, the file and the object where the function is declared, ' 'the CWP inclusive count and inclusive count fraction values, the ' 'cumulative and average distance, the cumulative and average score. The ' 'files with the common functions will have the same names with the ' 'corresponding pprof output files.') parser.add_argument( '--cwp_inclusive_count_file', required=True, help='The CSV file containing the CWP hot functions with their ' 'inclusive_count values. The CSV fields include the name of the ' 'function, the file and the object with the definition, the inclusive ' 'count value and the inclusive count fraction out of the total amount of ' 'inclusive count values.') parser.add_argument( '--benchmark_set_size', required=True, help='The size of the benchmark sets.') parser.add_argument( '--benchmark_set_output_file', required=True, help='The JSON output file containing optimal benchmark sets with their ' 'metrics. For every optimal benchmark set, the file contains the list of ' 'benchmarks, the pair of metrics and a dictionary with the pair of ' 'metrics for each group covered by the benchmark set.') parser.add_argument( '--metric', required=True, help='The metric used to select the optimal benchmark set. The possible ' 'values are: distance_variation, function_count and score_fraction.') parser.add_argument( '--cwp_function_groups_file', required=True, help='The file that contains the CWP function groups. A line consists in ' 'the group name and a file path describing the group. A group must ' 'represent a Chrome OS component.') options = parser.parse_args(arguments) return options def Main(argv): options = ParseArguments(argv) benchmark_set = BenchmarkSet(options.benchmark_set_size, options.benchmark_set_output_file, options.benchmark_set_common_functions_path, options.cwp_inclusive_count_file, options.cwp_function_groups_file, options.metric) benchmark_set.SelectOptimalBenchmarkSet() if __name__ == '__main__': Main(sys.argv[1:])