1 files changed, 0 insertions, 482 deletions
diff --git a/user_activity_benchmarks/process_hot_functions.py b/user_activity_benchmarks/process_hot_functions.py
deleted file mode 100755
index 2fbf3f93..00000000
--- a/user_activity_benchmarks/process_hot_functions.py
+++ /dev/null
@@ -1,482 +0,0 @@
-#!/usr/bin/python2
-
-# Copyright 2016 The Chromium OS Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-"""Processes the functions from the pprof(go/pprof) files and CWP(go/cwp) data.
-
-The pprof --top and pprof --tree outputs should be extracted from the benchmark
-profiles. The outputs contain the hot functions and the call chains.
-
-For each pair of pprof --top and --tree output files, the tool will create a
-file that contains the hot functions present also in the extracted CWP data.
-The common functions are organized in groups that represent a Chrome OS
-component. A function belongs to a group that is defined by a given file path
-if it is declared in a file that shares that path.
-
-A set of metrics are computed for each function, benchmark and Chrome OS group
-covered by a benchmark.
-
-Afterwards, this script extracts the functions that are present in the CWP
-data and not in the benchmark profiles. The extra functions are also groupped
-in Chrome OS components.
-"""
-
-from collections import defaultdict
-
-import argparse
-import os
-import shutil
-import sys
-
-import benchmark_metrics
-import utils
-
-
-class HotFunctionsProcessor(object):
-  """Does the pprof and CWP output processing.
-
-  Extracts the common, extra functions from the pprof files, groups them in
-  Chrome OS components. Computes the metrics for the common functions,
-  benchmark and Chrome OS groups covered by a benchmark.
-  """
-
-  def __init__(self, pprof_top_path, pprof_tree_path, cwp_inclusive_count_file,
-               cwp_pairwise_inclusive_count_file, cwp_function_groups_file,
-               common_functions_path, common_functions_groups_path,
-               benchmark_set_metrics_file, extra_cwp_functions_file,
-               extra_cwp_functions_groups_file,
-               extra_cwp_functions_groups_path):
-    """Initializes the HotFunctionsProcessor.
-
-    Args:
-      pprof_top_path: The directory containing the files with the pprof --top
-        output.
-      pprof_tree_path: The directory containing the files with the pprof --tree
-        output.
-      cwp_inclusive_count_file: The CSV file containing the CWP functions with
-        the inclusive count values.
-      cwp_pairwise_inclusive_count_file: The CSV file containing the CWP pairs
-        of parent and child functions with their inclusive count values.
-      cwp_function_groups_file: The file that contains the CWP function groups.
-      common_functions_path: The directory containing the CSV output files
-        with the common functions of the benchmark profiles and CWP data.
-      common_functions_groups_path: The directory containing the CSV output
-        files with the CWP groups and their metrics that match the common
-        functions of the benchmark profiles and CWP.
-      benchmark_set_metrics_file: The CSV output file containing the metrics for
-        each benchmark.
-      extra_cwp_functions_file: The CSV output file containing the functions
-        that are in the CWP data, but are not in any of the benchmark profiles.
-      extra_cwp_functions_groups_file: The CSV output file containing the groups
-        that match the extra CWP functions and their statistics.
-      extra_cwp_functions_groups_path: The directory containing the CSV output
-        files with the extra CWP functions that match a particular group.
-    """
-    self._pprof_top_path = pprof_top_path
-    self._pprof_tree_path = pprof_tree_path
-    self._cwp_inclusive_count_file = cwp_inclusive_count_file
-    self._cwp_pairwise_inclusive_count_file = cwp_pairwise_inclusive_count_file
-    self._cwp_function_groups_file = cwp_function_groups_file
-    self._common_functions_path = common_functions_path
-    self._common_functions_groups_path = common_functions_groups_path
-    self._benchmark_set_metrics_file = benchmark_set_metrics_file
-    self._extra_cwp_functions_file = extra_cwp_functions_file
-    self._extra_cwp_functions_groups_file = extra_cwp_functions_groups_file
-    self._extra_cwp_functions_groups_path = extra_cwp_functions_groups_path
-
-  def ProcessHotFunctions(self):
-    """Does the processing of the hot functions."""
-    with open(self._cwp_function_groups_file) as input_file:
-      cwp_function_groups = utils.ParseFunctionGroups(input_file.readlines())
-    cwp_statistics = \
-      self.ExtractCommonFunctions(self._pprof_top_path,
-                                  self._pprof_tree_path,
-                                  self._cwp_inclusive_count_file,
-                                  self._cwp_pairwise_inclusive_count_file,
-                                  cwp_function_groups,
-                                  self._common_functions_path,
-                                  self._common_functions_groups_path,
-                                  self._benchmark_set_metrics_file)
-    self.ExtractExtraFunctions(cwp_statistics, self._extra_cwp_functions_file)
-    self.GroupExtraFunctions(cwp_statistics, cwp_function_groups,
-                             self._extra_cwp_functions_groups_path,
-                             self._extra_cwp_functions_groups_file)
-
-  def ExtractCommonFunctions(self, pprof_top_path, pprof_tree_path,
-                             cwp_inclusive_count_file,
-                             cwp_pairwise_inclusive_count_file,
-                             cwp_function_groups, common_functions_path,
-                             common_functions_groups_path,
-                             benchmark_set_metrics_file):
-    """Extracts the common functions of the benchmark profiles and the CWP data.
-
-    For each pair of pprof --top and --tree output files, it creates a separate
-    file with the same name containing the common functions specifications and
-    metrics, that will be placed in the common_functions_path directory.
-
-    The resulting file is in CSV format, containing the following fields:
-    function name, file name, object, inclusive count, inclusive_count_fraction,
-    flat, flat%, sum%, cum, cum%, distance and score.
-
-    For each pair of pprof files, an additional file is created with the
-    Chrome OS groups that match the common functions.
-
-    The file is in CSV format containing the fields: group name, group path,
-    the number of functions that match the group, the average and cumulative
-    distance, the average and cumulative score.
-    The file has the same name with the pprof file and it is placed in the
-    common_functions_groups_path directory.
-
-    For all the analyzed benchmarks, the method creates a CSV output file
-    containing the metrics for each benchmark. The CSV fields include the
-    benchmark name, the number of common functions, the average and
-    cumulative distance and score.
-
-    It builds a dict of the CWP statistics by calling the
-    utils.ParseCWPInclusiveCountFile method and if a function is common, it is
-    marked as a COMMON_FUNCTION.
-
-    Args:
-      pprof_top_path: The name of the directory with the files with the
-        pprof --top output.
-      pprof_tree_path: The name of the directory with the files with the
-        pprof --tree output.
-      cwp_inclusive_count_file: A dict with the inclusive count values.
-      cwp_pairwise_inclusive_count_file: A dict with the pairwise inclusive
-        count values.
-      cwp_function_groups: A list of tuples containing the name of the group
-        and the corresponding file path.
-      common_functions_path: The path containing the output files with the
-        common functions and their metrics.
-      common_functions_groups_path: The path containing the output files with
-        the Chrome OS groups that match the common functions and their metrics.
-      benchmark_set_metrics_file: The CSV output file containing the metrics for
-        all the analyzed benchmarks.
-
-    Returns:
-      A dict containing the CWP statistics with the common functions marked as
-      COMMON_FUNCTION.
-    """
-    cwp_inclusive_count_statistics = \
-        utils.ParseCWPInclusiveCountFile(cwp_inclusive_count_file)
-    cwp_pairwise_inclusive_count_statistics = \
-        utils.ParseCWPPairwiseInclusiveCountFile(
-            cwp_pairwise_inclusive_count_file)
-    cwp_inclusive_count_statistics_cumulative = \
-        utils.ComputeCWPCummulativeInclusiveStatistics(
-            cwp_inclusive_count_statistics)
-    cwp_pairwise_inclusive_count_fractions = \
-        utils.ComputeCWPChildFunctionsFractions(
-            cwp_inclusive_count_statistics_cumulative,
-            cwp_pairwise_inclusive_count_statistics)
-    benchmark_set_metrics = {}
-    pprof_files = os.listdir(pprof_top_path)
-
-    for pprof_file in pprof_files:
-      pprof_top_statistics = \
-          utils.ParsePprofTopOutput(os.path.join(pprof_top_path, pprof_file))
-      pprof_tree_statistics = \
-          utils.ParsePprofTreeOutput(os.path.join(pprof_tree_path, pprof_file))
-      common_functions_lines = []
-      benchmark_function_metrics = {}
-
-      for function_key, function_statistic in pprof_top_statistics.iteritems():
-        if function_key not in cwp_inclusive_count_statistics:
-          continue
-
-        cwp_dso_name, cwp_inclusive_count, cwp_inclusive_count_fraction, _ = \
-            cwp_inclusive_count_statistics[function_key]
-        cwp_inclusive_count_statistics[function_key] = \
-            (cwp_dso_name, cwp_inclusive_count, cwp_inclusive_count_fraction,
-             utils.COMMON_FUNCTION)
-
-        function_name, _ = function_key.split(',')
-        distance = benchmark_metrics.ComputeDistanceForFunction(
-            pprof_tree_statistics[function_key],
-            cwp_pairwise_inclusive_count_fractions.get(function_name, {}))
-        benchmark_cum_p = float(function_statistic[4])
-        score = benchmark_metrics.ComputeScoreForFunction(
-            distance, cwp_inclusive_count_fraction, benchmark_cum_p)
-        benchmark_function_metrics[function_key] = (distance, score)
-
-        common_functions_lines.append(','.join([function_key, cwp_dso_name, str(
-            cwp_inclusive_count), str(cwp_inclusive_count_fraction), ','.join(
-                function_statistic), str(distance), str(score)]))
-      benchmark_function_groups_statistics = \
-          benchmark_metrics.ComputeMetricsForComponents(
-              cwp_function_groups, benchmark_function_metrics)
-      benchmark_set_metrics[pprof_file] = \
-          benchmark_metrics.ComputeMetricsForBenchmark(
-              benchmark_function_metrics)
-
-      with open(os.path.join(common_functions_path, pprof_file), 'w') \
-          as output_file:
-        common_functions_lines.sort(
-            key=lambda x: float(x.split(',')[11]), reverse=True)
-        common_functions_lines.insert(0, 'function,file,dso,inclusive_count,'
-                                      'inclusive_count_fraction,flat,flat%,'
-                                      'sum%,cum,cum%,distance,score')
-        output_file.write('\n'.join(common_functions_lines))
-
-      with open(os.path.join(common_functions_groups_path, pprof_file), 'w') \
-          as output_file:
-        common_functions_groups_lines = \
-            [','.join([group_name, ','.join(
-                [str(statistic) for statistic in group_statistic])])
-             for group_name, group_statistic in
-             benchmark_function_groups_statistics.iteritems()]
-        common_functions_groups_lines.sort(
-            key=lambda x: float(x.split(',')[5]), reverse=True)
-        common_functions_groups_lines.insert(
-            0, 'group_name,file_path,number_of_functions,distance_cum,'
-            'distance_avg,score_cum,score_avg')
-        output_file.write('\n'.join(common_functions_groups_lines))
-
-    with open(benchmark_set_metrics_file, 'w') as output_file:
-      benchmark_set_metrics_lines = []
-
-      for benchmark_name, metrics in benchmark_set_metrics.iteritems():
-        benchmark_set_metrics_lines.append(','.join([benchmark_name, ','.join(
-            [str(metric) for metric in metrics])]))
-      benchmark_set_metrics_lines.sort(
-          key=lambda x: float(x.split(',')[4]), reverse=True)
-      benchmark_set_metrics_lines.insert(
-          0, 'benchmark_name,number_of_functions,distance_cum,distance_avg,'
-          'score_cum,score_avg')
-      output_file.write('\n'.join(benchmark_set_metrics_lines))
-
-    return cwp_inclusive_count_statistics
-
-  def GroupExtraFunctions(self, cwp_statistics, cwp_function_groups,
-                          extra_cwp_functions_groups_path,
-                          extra_cwp_functions_groups_file):
-    """Groups the extra functions.
-
-    Writes the data of the functions that belong to each group in a separate
-    file, sorted by their inclusive count value, in descending order. The file
-    name is the same as the group name.
-
-    The file is in CSV format, containing the fields: function name, file name,
-    object name, inclusive count, inclusive count fraction.
-
-    It creates a CSV file containing the name of the group, their
-    common path, the total inclusive count and inclusive count fraction values
-    of all the functions declared in files that share the common path, sorted
-    in descending order by the inclusive count value.
-
-    Args:
-      cwp_statistics: A dict containing the CWP statistics.
-      cwp_function_groups: A list of tuples with the groups names and the path
-        describing the groups.
-      extra_cwp_functions_groups_path: The name of the directory containing
-        the CSV output files with the extra CWP functions that match a
-        particular group.
-      extra_cwp_functions_groups_file: The CSV output file containing the groups
-        that match the extra functions and their statistics.
-    """
-    cwp_function_groups_statistics = defaultdict(lambda: ([], '', 0, 0.0))
-    for function, statistics in cwp_statistics.iteritems():
-      if statistics[3] == utils.COMMON_FUNCTION:
-        continue
-
-      file_name = function.split(',')[1]
-      group_inclusive_count = int(statistics[1])
-      group_inclusive_count_fraction = float(statistics[2])
-
-      for group in cwp_function_groups:
-        group_common_path = group[1]
-
-        if group_common_path not in file_name:
-          continue
-
-        group_name = group[0]
-        group_statistics = cwp_function_groups_statistics[group_name]
-        group_lines = group_statistics[0]
-        group_inclusive_count += group_statistics[2]
-        group_inclusive_count_fraction += group_statistics[3]
-
-        group_lines.append(','.join([function, statistics[0],
-                                     str(statistics[1]), str(statistics[2])]))
-        cwp_function_groups_statistics[group_name] = \
-            (group_lines, group_common_path, group_inclusive_count,
-             group_inclusive_count_fraction)
-        break
-
-    extra_cwp_functions_groups_lines = []
-    for group_name, group_statistics \
-        in cwp_function_groups_statistics.iteritems():
-      group_output_lines = group_statistics[0]
-      group_output_lines.sort(key=lambda x: int(x.split(',')[3]), reverse=True)
-      group_output_lines.insert(
-          0, 'function,file,dso,inclusive_count,inclusive_count_fraction')
-      with open(os.path.join(extra_cwp_functions_groups_path, group_name),
-                'w') as output_file:
-        output_file.write('\n'.join(group_output_lines))
-      extra_cwp_functions_groups_lines.append(','.join(
-          [group_name, group_statistics[1], str(group_statistics[2]), str(
-              group_statistics[3])]))
-
-    extra_cwp_functions_groups_lines.sort(
-        key=lambda x: int(x.split(',')[2]), reverse=True)
-    extra_cwp_functions_groups_lines.insert(
-        0, 'group,shared_path,inclusive_count,inclusive_count_fraction')
-    with open(extra_cwp_functions_groups_file, 'w') as output_file:
-      output_file.write('\n'.join(extra_cwp_functions_groups_lines))
-
-  def ExtractExtraFunctions(self, cwp_statistics, extra_cwp_functions_file):
-    """Gets the functions that are in the CWP data, but not in the pprof output.
-
-    Writes the functions and their statistics in the extra_cwp_functions_file
-    file. The output is sorted based on the inclusive_count value. The file is
-    in CSV format, containing the fields: function name, file name, object name,
-    inclusive count and inclusive count fraction.
-
-    Args:
-      cwp_statistics: A dict containing the CWP statistics indexed by the
-        function and the file name, comma separated.
-      extra_cwp_functions_file: The file where it should be stored the CWP
-        functions and statistics that are marked as EXTRA_FUNCTION.
-    """
-    output_lines = []
-
-    for function, statistics in cwp_statistics.iteritems():
-      if statistics[3] == utils.EXTRA_FUNCTION:
-        output_lines.append(','.join([function, statistics[0],
-                                      str(statistics[1]), str(statistics[2])]))
-
-    with open(extra_cwp_functions_file, 'w') as output_file:
-      output_lines.sort(key=lambda x: int(x.split(',')[3]), reverse=True)
-      output_lines.insert(0, 'function,file,dso,inclusive_count,'
-                          'inclusive_count_fraction')
-      output_file.write('\n'.join(output_lines))
-
-
-def ParseArguments(arguments):
-  parser = argparse.ArgumentParser()
-
-  parser.add_argument(
-      '--pprof_top_path',
-      required=True,
-      help='The directory containing the files with the pprof --top output of '
-      'the benchmark profiles (the hot functions). The name of the files '
-      'should match with the ones from the pprof tree output files.')
-  parser.add_argument(
-      '--pprof_tree_path',
-      required=True,
-      help='The directory containing the files with the pprof --tree output '
-      'of the benchmark profiles (the call chains). The name of the files '
-      'should match with the ones of the pprof top output files.')
-  parser.add_argument(
-      '--cwp_inclusive_count_file',
-      required=True,
-      help='The CSV file containing the CWP hot functions with their '
-      'inclusive_count values. The CSV fields include the name of the '
-      'function, the file and the object with the definition, the inclusive '
-      'count value and the inclusive count fraction out of the total amount of '
-      'inclusive count values.')
-  parser.add_argument(
-      '--cwp_pairwise_inclusive_count_file',
-      required=True,
-      help='The CSV file containing the CWP pairs of parent and child '
-      'functions with their inclusive count values. The CSV fields include the '
-      'name of the parent and child functions concatenated by ;;, the file '
-      'and the object with the definition of the child function, and the '
-      'inclusive count value.')
-  parser.add_argument(
-      '--cwp_function_groups_file',
-      required=True,
-      help='The file that contains the CWP function groups. A line consists in '
-      'the group name and a file path describing the group. A group must '
-      'represent a ChromeOS component.')
-  parser.add_argument(
-      '--common_functions_path',
-      required=True,
-      help='The directory containing the CSV output files with the common '
-      'functions of the benchmark profiles and CWP data. A file will contain '
-      'all the hot functions from a pprof top output file that are also '
-      'included in the file containing the cwp inclusive count values. The CSV '
-      'fields are: the function name, the file and the object where the '
-      'function is declared, the CWP inclusive count and inclusive count '
-      'fraction values, the cumulative and average distance, the cumulative '
-      'and average score. The files with the common functions will have the '
-      'same names with the corresponding pprof output files.')
-  parser.add_argument(
-      '--common_functions_groups_path',
-      required=True,
-      help='The directory containing the CSV output files with the Chrome OS '
-      'groups and their metrics that match the common functions of the '
-      'benchmark profiles and CWP. The files with the groups will have the '
-      'same names with the corresponding pprof output files. The CSV fields '
-      'include the group name, group path, the number of functions that match '
-      'the group, the average and cumulative distance, the average and '
-      'cumulative score.')
-  parser.add_argument(
-      '--benchmark_set_metrics_file',
-      required=True,
-      help='The CSV output file containing the metrics for each benchmark. The '
-      'CSV fields include the benchmark name, the number of common functions, '
-      'the average and cumulative distance and score.')
-  parser.add_argument(
-      '--extra_cwp_functions_file',
-      required=True,
-      help='The CSV output file containing the functions that are in the CWP '
-      'data, but are not in any of the benchmark profiles. The CSV fields '
-      'include the name of the function, the file name and the object with the '
-      'definition, and the CWP inclusive count and inclusive count fraction '
-      'values. The entries are sorted in descending order based on the '
-      'inclusive count value.')
-  parser.add_argument(
-      '--extra_cwp_functions_groups_file',
-      required=True,
-      help='The CSV output file containing the groups that match the extra CWP '
-      'functions and their statistics. The CSV fields include the group name, '
-      'the file path, the total inclusive count and inclusive count fraction '
-      'values of the functions matching a particular group.')
-  parser.add_argument(
-      '--extra_cwp_functions_groups_path',
-      required=True,
-      help='The directory containing the CSV output files with the extra CWP '
-      'functions that match a particular group. The name of the file is the '
-      'same as the group name. The CSV fields include the name of the '
-      'function, the file name and the object with the definition, and the CWP '
-      'inclusive count and inclusive count fraction values. The entries are '
-      'sorted in descending order based on the inclusive count value.')
-
-  options = parser.parse_args(arguments)
-
-  return options
-
-
-def Main(argv):
-  options = ParseArguments(argv)
-
-  if os.path.exists(options.common_functions_path):
-    shutil.rmtree(options.common_functions_path)
-
-  os.makedirs(options.common_functions_path)
-
-  if os.path.exists(options.common_functions_groups_path):
-    shutil.rmtree(options.common_functions_groups_path)
-
-  os.makedirs(options.common_functions_groups_path)
-
-  if os.path.exists(options.extra_cwp_functions_groups_path):
-    shutil.rmtree(options.extra_cwp_functions_groups_path)
-
-  os.makedirs(options.extra_cwp_functions_groups_path)
-
-  hot_functions_processor = HotFunctionsProcessor(
-      options.pprof_top_path, options.pprof_tree_path,
-      options.cwp_inclusive_count_file,
-      options.cwp_pairwise_inclusive_count_file,
-      options.cwp_function_groups_file, options.common_functions_path,
-      options.common_functions_groups_path, options.benchmark_set_metrics_file,
-      options.extra_cwp_functions_file, options.extra_cwp_functions_groups_file,
-      options.extra_cwp_functions_groups_path)
-
-  hot_functions_processor.ProcessHotFunctions()
-
-
-if __name__ == '__main__':
-  Main(sys.argv[1:])