1 files changed, 0 insertions, 347 deletions
diff --git a/user_activity_benchmarks/select_optimal_benchmark_set.py b/user_activity_benchmarks/select_optimal_benchmark_set.py
deleted file mode 100755
index 1c8305cf..00000000
--- a/user_activity_benchmarks/select_optimal_benchmark_set.py
+++ /dev/null
@@ -1,347 +0,0 @@
-#!/usr/bin/python2
-
-# Copyright 2016 The Chromium OS Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-"""Selects the optimal set of benchmarks.
-
-For each benchmark, there is a file with the common functions, as extracted by
-the process_hot_functions module.
-
-The script receives as input the CSV file with the CWP inclusive count values,
-the file with Chrome OS groups and the path containing a file with common
-functions for every benchmark.
-
-It extracts for every benchmark and for the CWP data all the functions that
-match the given Chrome OS groups.
-
-It generates all possible combinations of benchmark sets of a given size and
-it computes for every set a metric.
-It outputs the optimal sets, based on which ones have the best metric.
-
-Three different metrics have been used: function count, distance
-variation and score.
-
-For the function count metric, we count the unique functions covered by a
-set of benchmarks. Besides the number of unique functions, we compute also
-the fraction of unique functions out of the amount of CWP functions from the
-given groups. The benchmark set with the highest amount of unique functions
-that belong to all the given groups is considered better.
-
-For the distance variation metric, we compute the sum of the distance variations
-of the functions covered by a set of benchmarks. We define the distance
-variation as the difference between the distance value of a function and the
-ideal distance value (1.0). If a function appears in multiple common functions
-files, we consider only the minimum value. We compute also the distance
-variation per function. The set that has the smaller value for the
-distance variation per function is considered better.
-
-For the score metric, we compute the sum of the scores of the functions from a
-set of benchmarks. If a function appears in multiple common functions files,
-we consider only the maximum value. We compute also the fraction of this sum
-from the sum of all the scores of the functions from the CWP data covering the
-given groups, in the ideal case (the ideal score of a function is 1.0).
-
-We compute the metrics in the same manner for individual Chrome OS groups.
-"""
-
-from collections import defaultdict
-
-import argparse
-import csv
-import itertools
-import json
-import operator
-import os
-import sys
-
-import benchmark_metrics
-import utils
-
-
-class BenchmarkSet(object):
-  """Selects the optimal set of benchmarks of given size."""
-
-  # Constants that specify the metric type.
-  FUNCTION_COUNT_METRIC = 'function_count'
-  DISTANCE_METRIC = 'distance_variation'
-  SCORE_METRIC = 'score_fraction'
-
-  def __init__(self, benchmark_set_size, benchmark_set_output_file,
-               benchmark_set_common_functions_path, cwp_inclusive_count_file,
-               cwp_function_groups_file, metric):
-    """Initializes the BenchmarkSet.
-
-    Args:
-      benchmark_set_size: Constant representing the size of a benchmark set.
-      benchmark_set_output_file: The output file that will contain the set of
-        optimal benchmarks with the metric values.
-      benchmark_set_common_functions_path: The directory containing the files
-        with the common functions for the list of benchmarks.
-      cwp_inclusive_count_file: The CSV file containing the CWP functions with
-        their inclusive count values.
-      cwp_function_groups_file: The file that contains the CWP function groups.
-      metric: The type of metric used for the analysis.
-    """
-    self._benchmark_set_size = int(benchmark_set_size)
-    self._benchmark_set_output_file = benchmark_set_output_file
-    self._benchmark_set_common_functions_path = \
-        benchmark_set_common_functions_path
-    self._cwp_inclusive_count_file = cwp_inclusive_count_file
-    self._cwp_function_groups_file = cwp_function_groups_file
-    self._metric = metric
-
-  @staticmethod
-  def OrganizeCWPFunctionsInGroups(cwp_inclusive_count_statistics,
-                                   cwp_function_groups):
-    """Selects the CWP functions that match the given Chrome OS groups.
-
-    Args:
-      cwp_inclusive_count_statistics: A dict with the CWP functions.
-      cwp_function_groups: A list with the CWP function groups.
-
-    Returns:
-      A dict having as a key the name of the groups and as a value the list of
-      CWP functions that match an individual group.
-    """
-    cwp_functions_grouped = defaultdict(list)
-    for function_key in cwp_inclusive_count_statistics:
-      _, file_name = function_key.split(',')
-      for group_name, file_path in cwp_function_groups:
-        if file_path not in file_name:
-          continue
-        cwp_functions_grouped[group_name].append(function_key)
-        break
-    return cwp_functions_grouped
-
-  @staticmethod
-  def OrganizeBenchmarkSetFunctionsInGroups(benchmark_set_files,
-                                            benchmark_set_common_functions_path,
-                                            cwp_function_groups):
-    """Selects the benchmark functions that match the given Chrome OS groups.
-
-    Args:
-      benchmark_set_files: The list of common functions files corresponding to a
-        benchmark.
-      benchmark_set_common_functions_path: The directory containing the files
-        with the common functions for the list of benchmarks.
-      cwp_function_groups: A list with the CWP function groups.
-
-    Returns:
-      A dict having as a key the name of a common functions file. The value is
-      a dict having as a key the name of a group and as value a list of
-      functions that match the given group.
-    """
-
-    benchmark_set_functions_grouped = {}
-    for benchmark_file_name in benchmark_set_files:
-      benchmark_full_file_path = \
-          os.path.join(benchmark_set_common_functions_path,
-                       benchmark_file_name)
-      with open(benchmark_full_file_path) as input_file:
-        statistics_reader = \
-            csv.DictReader(input_file, delimiter=',')
-        benchmark_functions_grouped = defaultdict(dict)
-        for statistic in statistics_reader:
-          function_name = statistic['function']
-          file_name = statistic['file']
-          for group_name, file_path in cwp_function_groups:
-            if file_path not in file_name:
-              continue
-            function_key = ','.join([function_name, file_name])
-            distance = float(statistic['distance'])
-            score = float(statistic['score'])
-            benchmark_functions_grouped[group_name][function_key] = \
-                (distance, score)
-            break
-          benchmark_set_functions_grouped[benchmark_file_name] = \
-              benchmark_functions_grouped
-    return benchmark_set_functions_grouped
-
-  @staticmethod
-  def SelectOptimalBenchmarkSetBasedOnMetric(all_benchmark_combinations_sets,
-                                             benchmark_set_functions_grouped,
-                                             cwp_functions_grouped,
-                                             metric_function_for_set,
-                                             metric_comparison_operator,
-                                             metric_default_value,
-                                             metric_string):
-    """Generic method that selects the optimal benchmark set based on a metric.
-
-    The reason of implementing a generic function is to avoid logic duplication
-    for selecting a benchmark set based on the three different metrics.
-
-    Args:
-      all_benchmark_combinations_sets: The list with all the sets of benchmark
-        combinations.
-      benchmark_set_functions_grouped: A dict with benchmark functions as
-        returned by OrganizeBenchmarkSetFunctionsInGroups.
-      cwp_functions_grouped: A dict with the CWP functions as returned by
-        OrganizeCWPFunctionsInGroups.
-      metric_function_for_set: The method used to compute the metric for a given
-        benchmark set.
-      metric_comparison_operator: A comparison operator used to compare two
-        values of the same metric (i.e: operator.lt or operator.gt).
-      metric_default_value: The default value for the metric.
-      metric_string: A tuple of strings used in the JSON output for the pair of
-        the values of the metric.
-
-    Returns:
-      A list of tuples containing for each optimal benchmark set. A tuple
-      contains the list of benchmarks from the set, the pair of metric values
-      and a dictionary with the metrics for each group.
-    """
-    optimal_sets = [([], metric_default_value, {})]
-
-    for benchmark_combination_set in all_benchmark_combinations_sets:
-      function_metrics = [benchmark_set_functions_grouped[benchmark]
-                          for benchmark in benchmark_combination_set]
-      set_metrics, set_groups_metrics = \
-          metric_function_for_set(function_metrics, cwp_functions_grouped,
-                                  metric_string)
-      optimal_value = optimal_sets[0][1][0]
-      if metric_comparison_operator(set_metrics[0], optimal_value):
-        optimal_sets = \
-            [(benchmark_combination_set, set_metrics, set_groups_metrics)]
-      elif set_metrics[0] == optimal_sets[0][1][0]:
-        optimal_sets.append(
-            (benchmark_combination_set, set_metrics, set_groups_metrics))
-
-    return optimal_sets
-
-  def SelectOptimalBenchmarkSet(self):
-    """Selects the optimal benchmark sets and writes them in JSON format.
-
-    Parses the CWP inclusive count statistics and benchmark common functions
-    files. Organizes the functions into groups. For every optimal benchmark
-    set, the method writes in the self._benchmark_set_output_file the list of
-    benchmarks, the pair of metrics and a dictionary with the pair of
-    metrics for each group covered by the benchmark set.
-    """
-
-    benchmark_set_files = os.listdir(self._benchmark_set_common_functions_path)
-    all_benchmark_combinations_sets = \
-        itertools.combinations(benchmark_set_files, self._benchmark_set_size)
-
-    with open(self._cwp_function_groups_file) as input_file:
-      cwp_function_groups = utils.ParseFunctionGroups(input_file.readlines())
-
-    cwp_inclusive_count_statistics = \
-        utils.ParseCWPInclusiveCountFile(self._cwp_inclusive_count_file)
-    cwp_functions_grouped = self.OrganizeCWPFunctionsInGroups(
-        cwp_inclusive_count_statistics, cwp_function_groups)
-    benchmark_set_functions_grouped = \
-        self.OrganizeBenchmarkSetFunctionsInGroups(
-            benchmark_set_files, self._benchmark_set_common_functions_path,
-            cwp_function_groups)
-
-    if self._metric == self.FUNCTION_COUNT_METRIC:
-      metric_function_for_benchmark_set = \
-          benchmark_metrics.ComputeFunctionCountForBenchmarkSet
-      metric_comparison_operator = operator.gt
-      metric_default_value = (0, 0.0)
-      metric_string = ('function_count', 'function_count_fraction')
-    elif self._metric == self.DISTANCE_METRIC:
-      metric_function_for_benchmark_set = \
-          benchmark_metrics.ComputeDistanceForBenchmarkSet
-      metric_comparison_operator = operator.lt
-      metric_default_value = (float('inf'), float('inf'))
-      metric_string = \
-          ('distance_variation_per_function', 'total_distance_variation')
-    elif self._metric == self.SCORE_METRIC:
-      metric_function_for_benchmark_set = \
-          benchmark_metrics.ComputeScoreForBenchmarkSet
-      metric_comparison_operator = operator.gt
-      metric_default_value = (0.0, 0.0)
-      metric_string = ('score_fraction', 'total_score')
-    else:
-      raise ValueError("Invalid metric")
-
-    optimal_benchmark_sets = \
-        self.SelectOptimalBenchmarkSetBasedOnMetric(
-            all_benchmark_combinations_sets, benchmark_set_functions_grouped,
-            cwp_functions_grouped, metric_function_for_benchmark_set,
-            metric_comparison_operator, metric_default_value, metric_string)
-
-    json_output = []
-
-    for benchmark_set in optimal_benchmark_sets:
-      json_entry = {
-          'benchmark_set':
-              list(benchmark_set[0]),
-          'metrics': {
-              metric_string[0]: benchmark_set[1][0],
-              metric_string[1]: benchmark_set[1][1]
-          },
-          'groups':
-              dict(benchmark_set[2])
-      }
-      json_output.append(json_entry)
-
-    with open(self._benchmark_set_output_file, 'w') as output_file:
-      json.dump(json_output, output_file)
-
-
-def ParseArguments(arguments):
-  parser = argparse.ArgumentParser()
-
-  parser.add_argument(
-      '--benchmark_set_common_functions_path',
-      required=True,
-      help='The directory containing the CSV files with the common functions '
-      'of the benchmark profiles and CWP data. A file will contain all the hot '
-      'functions from a pprof top output file that are also included in the '
-      'file containing the cwp inclusive count values. The CSV fields are: the '
-      'function name, the file and the object where the function is declared, '
-      'the CWP inclusive count and inclusive count fraction values, the '
-      'cumulative and average distance, the cumulative and average score. The '
-      'files with the common functions will have the same names with the '
-      'corresponding pprof output files.')
-  parser.add_argument(
-      '--cwp_inclusive_count_file',
-      required=True,
-      help='The CSV file containing the CWP hot functions with their '
-      'inclusive_count values. The CSV fields include the name of the '
-      'function, the file and the object with the definition, the inclusive '
-      'count value and the inclusive count fraction out of the total amount of '
-      'inclusive count values.')
-  parser.add_argument(
-      '--benchmark_set_size',
-      required=True,
-      help='The size of the benchmark sets.')
-  parser.add_argument(
-      '--benchmark_set_output_file',
-      required=True,
-      help='The JSON output file containing optimal benchmark sets with their '
-      'metrics. For every optimal benchmark set, the file contains the list of '
-      'benchmarks, the pair of metrics and a dictionary with the pair of '
-      'metrics for each group covered by the benchmark set.')
-  parser.add_argument(
-      '--metric',
-      required=True,
-      help='The metric used to select the optimal benchmark set. The possible '
-      'values are: distance_variation, function_count and score_fraction.')
-  parser.add_argument(
-      '--cwp_function_groups_file',
-      required=True,
-      help='The file that contains the CWP function groups. A line consists in '
-      'the group name and a file path describing the group. A group must '
-      'represent a Chrome OS component.')
-
-  options = parser.parse_args(arguments)
-
-  return options
-
-
-def Main(argv):
-  options = ParseArguments(argv)
-  benchmark_set = BenchmarkSet(options.benchmark_set_size,
-                               options.benchmark_set_output_file,
-                               options.benchmark_set_common_functions_path,
-                               options.cwp_inclusive_count_file,
-                               options.cwp_function_groups_file, options.metric)
-  benchmark_set.SelectOptimalBenchmarkSet()
-
-
-if __name__ == '__main__':
-  Main(sys.argv[1:])