diff options
Diffstat (limited to 'user_activity_benchmarks/benchmark_metrics.py')
-rw-r--r-- | user_activity_benchmarks/benchmark_metrics.py | 306 |
1 files changed, 306 insertions, 0 deletions
diff --git a/user_activity_benchmarks/benchmark_metrics.py b/user_activity_benchmarks/benchmark_metrics.py new file mode 100644 index 00000000..30ae31e0 --- /dev/null +++ b/user_activity_benchmarks/benchmark_metrics.py @@ -0,0 +1,306 @@ +# Copyright 2016 The Chromium OS Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +"""Computes the metrics for functions, Chrome OS components and benchmarks.""" + +from collections import defaultdict + + +def ComputeDistanceForFunction(child_functions_statistics_sample, + child_functions_statistics_reference): + """Computes the distance metric for a function. + + Args: + child_functions_statistics_sample: A dict that has as a key the name of a + function and as a value the inclusive count fraction. The keys are + the child functions of a sample parent function. + child_functions_statistics_reference: A dict that has as a key the name of + a function and as a value the inclusive count fraction. The keys are + the child functions of a reference parent function. + + Returns: + A float value representing the sum of inclusive count fraction + differences of pairs of common child functions. If a child function is + present in a single data set, then we consider the missing inclusive + count fraction as 0. This value describes the difference in behaviour + between a sample and the reference parent function. + """ + # We initialize the distance with a small value to avoid the further + # division by zero. + distance = 1.0 + + for child_function, inclusive_count_fraction_reference in \ + child_functions_statistics_reference.iteritems(): + inclusive_count_fraction_sample = 0.0 + + if child_function in child_functions_statistics_sample: + inclusive_count_fraction_sample = \ + child_functions_statistics_sample[child_function] + distance += \ + abs(inclusive_count_fraction_sample - + inclusive_count_fraction_reference) + + for child_function, inclusive_count_fraction_sample in \ + child_functions_statistics_sample.iteritems(): + if child_function not in child_functions_statistics_reference: + distance += inclusive_count_fraction_sample + + return distance + + +def ComputeScoreForFunction(distance, reference_fraction, sample_fraction): + """Computes the score for a function. + + Args: + distance: A float value representing the difference in behaviour between + the sample and the reference function. + reference_fraction: A float value representing the inclusive count + fraction of the reference function. + sample_fraction: A float value representing the inclusive count + fraction of the sample function. + + Returns: + A float value representing the score of the function. + """ + return reference_fraction * sample_fraction / distance + + +def ComputeMetricsForComponents(cwp_function_groups, function_metrics): + """Computes the metrics for a set of Chrome OS components. + + For every Chrome OS group, we compute the number of functions matching the + group, the cumulative and average score, the cumulative and average distance + of all those functions. A function matches a group if the path of the file + containing its definition contains the common path describing the group. + + Args: + cwp_function_groups: A dict having as a key the name of the group and as a + value a common path describing the group. + function_metrics: A dict having as a key the name of the function and the + name of the file where it is declared concatenated by a ',', and as a + value a tuple containing the distance and the score metrics. + + Returns: + A dict containing as a key the name of the group and as a value a tuple + with the group file path, the number of functions matching the group, + the cumulative and average score, cumulative and average distance of all + those functions. + """ + function_groups_metrics = defaultdict(lambda: (0, 0.0, 0.0, 0.0, 0.0)) + + for function_key, metric in function_metrics.iteritems(): + _, function_file = function_key.split(',') + + for group, common_path in cwp_function_groups: + if common_path not in function_file: + continue + + function_distance = metric[0] + function_score = metric[1] + group_statistic = function_groups_metrics[group] + + function_count = group_statistic[1] + 1 + function_distance_cum = function_distance + group_statistic[2] + function_distance_avg = function_distance_cum / float(function_count) + function_score_cum = function_score + group_statistic[4] + function_score_avg = function_score_cum / float(function_count) + + function_groups_metrics[group] = \ + (common_path, + function_count, + function_distance_cum, + function_distance_avg, + function_score_cum, + function_score_avg) + break + + return function_groups_metrics + + +def ComputeMetricsForBenchmark(function_metrics): + function_count = len(function_metrics.keys()) + distance_cum = 0.0 + distance_avg = 0.0 + score_cum = 0.0 + score_avg = 0.0 + + for distance, score in function_metrics.values(): + distance_cum += distance + score_cum += score + + distance_avg = distance_cum / float(function_count) + score_avg = score_cum / float(function_count) + return function_count, distance_cum, distance_avg, score_cum, score_avg + + +def ComputeFunctionCountForBenchmarkSet(set_function_metrics, cwp_functions, + metric_string): + """Computes the function count metric pair for the benchmark set. + + For the function count metric, we count the unique functions covered by the + set of benchmarks. We compute the fraction of unique functions out + of the amount of CWP functions given. + + We compute also the same metric pair for every group from the keys of the + set_function_metrics dict. + + Args: + set_function_metrics: A list of dicts having as a key the name of a group + and as value a list of functions that match the given group. + cwp_functions: A dict having as a key the name of the groups and as a value + the list of CWP functions that match an individual group. + metric_string: A tuple of strings that will be mapped to the tuple of metric + values in the returned function group dict. This is done for convenience + for the JSON output. + + Returns: + A tuple with the metric pair and a dict with the group names and values + of the metric pair. The first value of the metric pair represents the + function count and the second value the function count fraction. + The dict has as a key the name of the group and as a value a dict that + maps the metric_string to the values of the metric pair of the group. + """ + cwp_functions_count = sum(len(functions) + for functions in cwp_functions.itervalues()) + set_groups_functions = defaultdict(set) + for benchmark_function_metrics in set_function_metrics: + for group_name in benchmark_function_metrics: + set_groups_functions[group_name] |= \ + set(benchmark_function_metrics[group_name]) + + set_groups_functions_count = {} + set_functions_count = 0 + for group_name, functions \ + in set_groups_functions.iteritems(): + set_group_functions_count = len(functions) + if group_name in cwp_functions: + set_groups_functions_count[group_name] = { + metric_string[0]: set_group_functions_count, + metric_string[1]: + set_group_functions_count / float(len(cwp_functions[group_name]))} + else: + set_groups_functions_count[group_name] = \ + {metric_string[0]: set_group_functions_count, metric_string[1]: 0.0} + set_functions_count += set_group_functions_count + + set_functions_count_fraction = \ + set_functions_count / float(cwp_functions_count) + return (set_functions_count, set_functions_count_fraction), \ + set_groups_functions_count + + +def ComputeDistanceForBenchmarkSet(set_function_metrics, cwp_functions, + metric_string): + """Computes the distance variation metric pair for the benchmark set. + + For the distance variation metric, we compute the sum of the distance + variations of the functions covered by a set of benchmarks. + We define the distance variation as the difference between the distance + value of a functions and the ideal distance value (1.0). + If a function appears in multiple common functions files, we consider + only the minimum value. We compute also the distance variation per + function. + + In addition, we compute also the same metric pair for every group from + the keys of the set_function_metrics dict. + + Args: + set_function_metrics: A list of dicts having as a key the name of a group + and as value a list of functions that match the given group. + cwp_functions: A dict having as a key the name of the groups and as a value + the list of CWP functions that match an individual group. + metric_string: A tuple of strings that will be mapped to the tuple of metric + values in the returned function group dict. This is done for convenience + for the JSON output. + + Returns: + A tuple with the metric pair and a dict with the group names and values + of the metric pair. The first value of the metric pair represents the + distance variation per function and the second value the distance variation. + The dict has as a key the name of the group and as a value a dict that + maps the metric_string to the values of the metric pair of the group. + """ + set_unique_functions = defaultdict(lambda: defaultdict(lambda: float('inf'))) + set_function_count = 0 + total_distance_variation = 0.0 + for benchmark_function_metrics in set_function_metrics: + for group_name in benchmark_function_metrics: + for function_key, metrics in \ + benchmark_function_metrics[group_name].iteritems(): + previous_distance = \ + set_unique_functions[group_name][function_key] + min_distance = min(metrics[0], previous_distance) + set_unique_functions[group_name][function_key] = min_distance + groups_distance_variations = defaultdict(lambda: (0.0, 0.0)) + for group_name, functions_distances in set_unique_functions.iteritems(): + group_function_count = len(functions_distances) + group_distance_variation = \ + sum(functions_distances.itervalues()) - float(group_function_count) + total_distance_variation += group_distance_variation + set_function_count += group_function_count + groups_distance_variations[group_name] = \ + {metric_string[0]: + group_distance_variation / float(group_function_count), + metric_string[1]: group_distance_variation} + + return (total_distance_variation / set_function_count, + total_distance_variation), groups_distance_variations + + +def ComputeScoreForBenchmarkSet(set_function_metrics, cwp_functions, + metric_string): + """Computes the function count metric pair for the benchmark set. + + For the score metric, we compute the sum of the scores of the functions + from a set of benchmarks. If a function appears in multiple common + functions files, we consider only the maximum value. We compute also the + fraction of this sum from the sum of all the scores of the functions from + the CWP data covering the given groups, in the ideal case (the ideal + score of a function is 1.0). + + In addition, we compute the same metric pair for every group from the + keys of the set_function_metrics dict. + + Args: + set_function_metrics: A list of dicts having as a key the name of a group + and as value a list of functions that match the given group. + cwp_functions: A dict having as a key the name of the groups and as a value + the list of CWP functions that match an individual group. + metric_string: A tuple of strings that will be mapped to the tuple of metric + values in the returned function group dict. This is done for convenience + for the JSON output. + + Returns: + A tuple with the metric pair and a dict with the group names and values + of the metric pair. The first value of the pair is the fraction of the sum + of the scores from the ideal case and the second value represents the + sum of scores of the functions. The dict has as a key the name of the group + and as a value a dict that maps the metric_string to the values of the + metric pair of the group. + """ + cwp_functions_count = sum(len(functions) + for functions in cwp_functions.itervalues()) + set_unique_functions = defaultdict(lambda: defaultdict(lambda: 0.0)) + total_score = 0.0 + + for benchmark_function_metrics in set_function_metrics: + for group_name in benchmark_function_metrics: + for function_key, metrics in \ + benchmark_function_metrics[group_name].iteritems(): + previous_score = \ + set_unique_functions[group_name][function_key] + max_score = max(metrics[1], previous_score) + set_unique_functions[group_name][function_key] = max_score + + groups_scores = defaultdict(lambda: (0.0, 0.0)) + + for group_name, function_scores in set_unique_functions.iteritems(): + group_function_count = float(len(cwp_functions[group_name])) + group_score = sum(function_scores.itervalues()) + total_score += group_score + groups_scores[group_name] = { + metric_string[0]: group_score / group_function_count, + metric_string[1]: group_score + } + + return (total_score / cwp_functions_count, total_score), groups_scores |