user_activity: Computed the metrics for the common functions.

Integrated the script with the utils module. BUG=None TEST=None Change-Id: I4c8d0bdbda6df5996eae873655cfaf355cf559a2 Reviewed-on: https://chrome-internal-review.googlesource.com/290455 Reviewed-by: Luis Lozano <llozano@chromium.org> Reviewed-by: Evelina Dumitrescu <evelinad@google.com> Tested-by: Evelina Dumitrescu <evelinad@google.com> Reviewed-on: https://chromium-review.googlesource.com/435916 Commit-Ready: Luis Lozano <llozano@chromium.org> Tested-by: Luis Lozano <llozano@chromium.org>
author: Evelina Dumitrescu <evelinad@google.com> 2016-09-23 15:32:49 -0700
committer: chrome-bot <chrome-bot@chromium.org> 2017-02-01 18:13:44 -0800
commit: 0228f92f87bc785c3f8184ce79e2d016c95941fc (patch)
tree: 78a7c04a2942e2a944379150ebee59fcee6f4756
parent: c7faa09f456ca5c651ac373ad897aa4be6ad2717 (diff)
download: toolchain-utils-0228f92f87bc785c3f8184ce79e2d016c95941fc.tar.gz
1 files changed, 442 insertions, 282 deletions
diff --git a/user_activity_benchmarks/process_hot_functions.py b/user_activity_benchmarks/process_hot_functions.py
index f021e8c7..13d228bb 100755
--- a/user_activity_benchmarks/process_hot_functions.py
+++ b/user_activity_benchmarks/process_hot_functions.py
@@ -5,312 +5,427 @@
 # found in the LICENSE file.
 """Processes the functions from the pprof(go/pprof) files and CWP(go/cwp) data.
 
-The pprof output files should have the format given by the output of the
-pprof --top command. A line containing a statistic should include the flat,
-flat%, sum%, cum, cum%, function name and file name, separated by a space.
-
-The CWP hot functions should be specified in a CSV file that should contain the
-fields for the function name, the file and the object where that function is
-declared and the inclusive count value.
-
-For each pprof output file, the tool will output a file that contains the hot
-functions present also in the CWP hot functions file. Afterwards, it extracts
-the functions that are present in the CWP functions file and not in the
-pprof output files.
-
-Optionally, it will organize the extra CWP functions in groups that have to
-represent a ChromeOS component. A function belongs to a group that is defined
-by a given file path if it is declared in a file that shares that path.
+The pprof --top and pprof --tree outputs should be extracted from the benchmark
+profiles. The outputs contain the hot functions and the call chains.
+
+For each pair of pprof --top and --tree output files, the tool will create a
+file that contains the hot functions present also in the extracted CWP data.
+The common functions are organized in groups that represent a Chrome OS
+component. A function belongs to a group that is defined by a given file path
+if it is declared in a file that shares that path.
+
+A set of metrics are computed for each function, benchmark and Chrome OS group
+covered by a benchmark.
+
+Afterwards, this script extracts the functions that are present in the CWP
+data and not in the benchmark profiles. The extra functions are also groupped
+in Chrome OS components.
 """
 
+from collections import defaultdict
+
 import argparse
-import csv
 import os
-import re
+import shutil
 import sys
 
+import benchmark_metrics
+import utils
+
 
 class HotFunctionsProcessor(object):
   """Does the pprof and CWP output processing.
 
-  Extracts the common and extra functions from the pprof output files, based on
-  the provided CWP functions.
+  Extracts the common, extra functions from the pprof files, groups them in
+  Chrome OS components. Computes the metrics for the common functions,
+  benchmark and Chrome OS groups covered by a benchmark.
   """
 
-  # Constants used to identify if a function is common in the pprof and CWP
-  # files.
-  COMMON_FUNCTION = 1
-  NOT_COMMON_FUNCTION = 0
-
-  def __init__(self, pprof_path, cwp_functions_file, common_functions_path,
-               extra_cwp_functions_file, cwp_function_groups_file,
-               cwp_function_groups_statistics_file,
-               cwp_function_groups_file_prefix):
+  def __init__(self, pprof_top_path, pprof_tree_path, cwp_inclusive_count_file,
+               cwp_pairwise_inclusive_count_file, cwp_function_groups_file,
+               common_functions_path, common_functions_groups_path,
+               benchmark_set_metrics_file, extra_cwp_functions_file,
+               extra_cwp_functions_groups_file,
+               extra_cwp_functions_groups_path):
     """Initializes the HotFunctionsProcessor.
 
     Args:
-      pprof_path: The directory containing the pprof output files.
-      cwp_functions_file: The file containing the CWP data.
-      common_functions_path: The directory where the files with the CWP and
-        pprof common functions should be stored.
-      extra_cwp_functions_file: The file where should be stored the CWP
-        functions that are not in the given pprof output files.
-      cwp_function_groups_file: The name of the file containing the groups of
-        functions.
-      cwp_function_groups_statistics_file: The name of the file containing the
-        statistics for the function groups.
-      cwp_function_groups_file_prefix: The prefix of the files that will store
-        the function statistics for each function group.
+      pprof_top_path: The directory containing the files with the pprof --top
+        output.
+      pprof_tree_path: The directory containing the files with the pprof --tree
+        output.
+      cwp_inclusive_count_file: The CSV file containing the CWP functions with
+        the inclusive count values.
+      cwp_pairwise_inclusive_count_file: The CSV file containing the CWP pairs
+        of parent and child functions with their inclusive count values.
+      cwp_function_groups_file: The file that contains the CWP function groups.
+      common_functions_path: The directory containing the CSV output files
+        with the common functions of the benchmark profiles and CWP data.
+      common_functions_groups_path: The directory containing the CSV output
+        files with the CWP groups and their metrics that match the common
+        functions of the benchmark profiles and CWP.
+      benchmark_set_metrics_file: The CSV output file containing the metrics for
+        each benchmark.
+      extra_cwp_functions_file: The CSV output file containing the functions
+        that are in the CWP data, but are not in any of the benchmark profiles.
+      extra_cwp_functions_groups_file: The CSV output file containing the groups
+        that match the extra CWP functions and their statistics.
+      extra_cwp_functions_groups_path: The directory containing the CSV output
+        files with the extra CWP functions that match a particular group.
     """
-    self._pprof_path = pprof_path
-    self._cwp_functions_file = cwp_functions_file
+    self._pprof_top_path = pprof_top_path
+    self._pprof_tree_path = pprof_tree_path
+    self._cwp_inclusive_count_file = cwp_inclusive_count_file
+    self._cwp_pairwise_inclusive_count_file = cwp_pairwise_inclusive_count_file
+    self._cwp_function_groups_file = cwp_function_groups_file
     self._common_functions_path = common_functions_path
+    self._common_functions_groups_path = common_functions_groups_path
+    self._benchmark_set_metrics_file = benchmark_set_metrics_file
     self._extra_cwp_functions_file = extra_cwp_functions_file
-    self._cwp_function_groups_file = cwp_function_groups_file
-    self._cwp_function_groups_statistics_file = \
-        cwp_function_groups_statistics_file
-    self._cwp_function_groups_file_prefix = cwp_function_groups_file_prefix
+    self._extra_cwp_functions_groups_file = extra_cwp_functions_groups_file
+    self._extra_cwp_functions_groups_path = extra_cwp_functions_groups_path
 
   def ProcessHotFunctions(self):
     """Does the processing of the hot functions."""
+    with open(self._cwp_function_groups_file) as input_file:
+      cwp_function_groups = utils.ParseFunctionGroups(input_file.readlines())
     cwp_statistics = \
-      self.ExtractCommonFunctions(self._pprof_path,
-                                   self._common_functions_path,
-                                   self._cwp_functions_file)
-
+      self.ExtractCommonFunctions(self._pprof_top_path,
+                                  self._pprof_tree_path,
+                                  self._cwp_inclusive_count_file,
+                                  self._cwp_pairwise_inclusive_count_file,
+                                  cwp_function_groups,
+                                  self._common_functions_path,
+                                  self._common_functions_groups_path,
+                                  self._benchmark_set_metrics_file)
     self.ExtractExtraFunctions(cwp_statistics, self._extra_cwp_functions_file)
-    if all([self._cwp_function_groups_file,
-            self._cwp_function_groups_statistics_file,
-            self._cwp_function_groups_file_prefix]):
-      self.GroupExtraFunctions(cwp_statistics,
-                               self._cwp_function_groups_file_prefix,
-                               self._cwp_function_groups_file,
-                               self._cwp_function_groups_statistics_file)
+    self.GroupExtraFunctions(cwp_statistics, cwp_function_groups,
+                             self._extra_cwp_functions_groups_path,
+                             self._extra_cwp_functions_groups_file)
 
-  def ParseCWPStatistics(self, cwp_statistics_file_name):
-    """Parses the contents of the file containing the CWP data.
+  @staticmethod
+  def ComputeCWPCummulativeInclusiveStatistics(cwp_inclusive_count_statistics):
+    """Computes the cumulative inclusive count value of a function.
 
-    A line contains the name of the function, the corresponding filenames, the
-    object files and their inclusive count values in CSV format.
+    A function might appear declared in multiple files or objects. When
+    computing the fraction of the inclusive count value from a child function to
+    the parent function, we take into consideration the sum of the
+    inclusive_count
+    count values from all the ocurences of that function.
 
     Args:
-      cwp_statistics_file_name: The name of the file containing the CWP data
-      in CSV format.
+      cwp_inclusive_count_statistics: A dict containing the inclusive count
+      statistics extracted by the ParseCWPInclusiveCountFile method.
 
     Returns:
-      A dict containing the CWP statistics. The key contains the name of the
-      functions with the file name comma separated. The value represents a
-      tuple with the statistics and a marker to identify if the function is
-      present in one of the pprof files.
+      A dict having as a ket the name of the function and as a value the sum of
+      the inclusive count values of the occurences of the functions from all
+      the files and objects.
     """
-    cwp_statistics = {}
+    cwp_inclusive_count_statistics_cumulative = defaultdict(int)
 
-    with open(cwp_statistics_file_name) as cwp_statistics_file:
-      statistics_reader = csv.DictReader(cwp_statistics_file, delimiter=',')
+    for function_key, function_statistics \
+        in cwp_inclusive_count_statistics.iteritems():
+      function_name, _ = function_key.split(',')
+      cwp_inclusive_count_statistics_cumulative[function_name] += \
+          function_statistics[1]
 
-      for statistic in statistics_reader:
-        function_name = statistic['function']
-        file_name = os.path.normpath(statistic['file'])
-        dso_name = statistic['dso']
-        inclusive_count = statistic['inclusive_count']
+    return cwp_inclusive_count_statistics_cumulative
 
-        # We ignore the lines that have empty fields(i.e they specify only the
-        # addresses of the functions and the inclusive counts values).
-        if all([function_name, file_name, dso_name, inclusive_count]):
-          key = '%s,%s' % (function_name, file_name)
-          value = \
-            ('%s,%s' % (dso_name, inclusive_count), self.NOT_COMMON_FUNCTION)
-          # All the functions are marked as NOT_COMMON_FUNCTION.
-          cwp_statistics[key] = value
-
-    return cwp_statistics
+  @staticmethod
+  def ComputeCWPChildFunctionsFractions(
+      cwp_inclusive_count_statistics_cumulative,
+      cwp_pairwise_inclusive_count_statistics):
+    """Computes the fractions of the inclusive count values for child functions.
 
-  def ExtractCommonFunctions(self, pprof_path, common_functions_path,
-                             cwp_functions_file):
-    """Extracts the common functions of the pprof files and the CWP file.
+    The fraction represents the inclusive count value of a child function over
+    the one of the parent function.
 
-    For each pprof file, it creates a separate file with the same name
-    containing the common functions, that will be placed in the
-    common_functions_path directory.
+    Args:
+      cwp_inclusive_count_statistics_cumulative: A dict containing the
+        cumulative inclusive count values of the CWP functions.
+      cwp_pairwise_inclusive_count_statistics: A dict containing the inclusive
+        count statistics for pairs of parent and child functions. The key is the
+        parent function. The value is a dict with the key the name of the child
+        function and the file name, comma separated, and the value is the
+        inclusive count value of the pair of parent and child functions.
 
-    The resulting file is CSV format, containing the following fields:
-    function name, file name, object, inclusive count, flat, flat%, sum%, cum,
-    cum%.
+    Returns:
+        A dict containing the inclusive count statistics for pairs of parent
+        and child functions. The key is the parent function. The value is a
+        dict with the key the name of the child function and the file name,
+        comma separated, and the value is the inclusive count fraction of the
+        child function out of the parent function.
+    """
 
-    It builds a dict of the CWP statistics and if a function is common, it is
+    pairwise_inclusive_count_fractions = {}
+
+    for parent_function_key, child_functions_metrics in \
+        cwp_pairwise_inclusive_count_statistics.iteritems():
+      child_functions_fractions = {}
+      parent_function_inclusive_count = \
+      cwp_inclusive_count_statistics_cumulative.get(parent_function_key, 0.0)
+
+      if parent_function_key in cwp_inclusive_count_statistics_cumulative:
+        for child_function_key, child_function_inclusive_count \
+            in child_functions_metrics.iteritems():
+          child_functions_fractions[child_function_key] = \
+             child_function_inclusive_count / parent_function_inclusive_count
+      else:
+        for child_function_key, child_function_inclusive_count \
+            in child_functions_metrics.iteritems():
+          child_functions_fractions[child_function_key] = 0.0
+      pairwise_inclusive_count_fractions[parent_function_key] = \
+          child_functions_fractions
+
+    return pairwise_inclusive_count_fractions
+
+  def ExtractCommonFunctions(self, pprof_top_path, pprof_tree_path,
+                             cwp_inclusive_count_file,
+                             cwp_pairwise_inclusive_count_file,
+                             cwp_function_groups, common_functions_path,
+                             common_functions_groups_path,
+                             benchmark_set_metrics_file):
+    """Extracts the common functions of the benchmark profiles and the CWP data.
+
+    For each pair of pprof --top and --tree output files, it creates a separate
+    file with the same name containing the common functions specifications and
+    metrics, that will be placed in the common_functions_path directory.
+
+    The resulting file is in CSV format, containing the following fields:
+    function name, file name, object, inclusive count, inclusive_count_fraction,
+    flat, flat%, sum%, cum, cum%, distance and score.
+
+    For each pair of pprof files, an additional file is created with the
+    Chrome OS groups that match the common functions.
+
+    The file is in CSV format containing the fields: group name, group path,
+    the number of functions that match the group, the average and cumulative
+    distance, the average and cumulative score.
+    The file has the same name with the pprof file and it is placed in the
+    common_functions_groups_path directory.
+
+    For all the analyzed benchmarks, the method creates a CSV output file
+    containing the metrics for each benchmark. The CSV fields include the
+    benchmark name, the number of common functions, the average and
+    cumulative distance and score.
+
+    It builds a dict of the CWP statistics by calling the
+    utils.ParseCWPInclusiveCountFile method and if a function is common, it is
     marked as a COMMON_FUNCTION.
 
     Args:
-      pprof_path: The directory with the pprof files.
-      common_functions_path: The directory with the common functions files.
-      cwp_functions_file: The file with the CWP data.
+      pprof_top_path: The name of the directory with the files with the
+        pprof --top output.
+      pprof_tree_path: The name of the directory with the files with the
+        pprof --tree output.
+      cwp_inclusive_count_file: A dict with the inclusive count values.
+      cwp_pairwise_inclusive_count_file: A dict with the pairwise inclusive
+        count values.
+      cwp_function_groups: A list of tuples containing the name of the group
+        and the corresponding file path.
+      common_functions_path: The path containing the output files with the
+        common functions and their metrics.
+      common_functions_groups_path: The path containing the output files with
+        the Chrome OS groups that match the common functions and their metrics.
+      benchmark_set_metrics_file: The CSV output file containing the metrics for
+        all the analyzed benchmarks.
 
     Returns:
       A dict containing the CWP statistics with the common functions marked as
       COMMON_FUNCTION.
     """
-    # Get the list of pprof files from the given path.
-    pprof_files = os.listdir(pprof_path)
-    cwp_statistics = self.ParseCWPStatistics(cwp_functions_file)
-    function_statistic_regex = re.compile(r'\S+\s+\S+%\s+\S+%\s+\S+\s+\S+%')
-    function_regex = re.compile(r'[a-zA-Z0-9-_:.~\[\]]+')
-    # TODO(evelinad): Consider the case where the file name can have other
-    # characters.
-    file_regex = re.compile(r'[a-zA-Z0-9-/_.]+')
+    cwp_inclusive_count_statistics = \
+        utils.ParseCWPInclusiveCountFile(cwp_inclusive_count_file)
+    cwp_pairwise_inclusive_count_statistics = \
+        utils.ParseCWPPairwiseInclusiveCountFile(
+            cwp_pairwise_inclusive_count_file)
+    cwp_inclusive_count_statistics_cumulative = \
+        self.ComputeCWPCummulativeInclusiveStatistics(
+            cwp_inclusive_count_statistics)
+    cwp_pairwise_inclusive_count_fractions = \
+        self.ComputeCWPChildFunctionsFractions(
+            cwp_inclusive_count_statistics_cumulative,
+            cwp_pairwise_inclusive_count_statistics)
+    benchmark_set_metrics = {}
+    pprof_files = os.listdir(pprof_top_path)
 
     for pprof_file in pprof_files:
-      # In the pprof output, the statistics of the functions start from the
-      # 8th line.
-      with open(os.path.join(pprof_path, pprof_file), 'r') as input_file:
-        pprof_statistics = input_file.readlines()[6:]
-      output_lines = \
-        ['function,file,dso,inclusive_count,flat,flat%,sum%,cum,cum%']
-
-      for pprof_statistic in pprof_statistics:
-        function_statistic_match = \
-          function_statistic_regex.search(pprof_statistic)
-        function_statistic = \
-          ','.join(function_statistic_match.group(0).split())
-        lookup_index = function_statistic_match.end()
-        function_match = function_regex.search(pprof_statistic[lookup_index:])
-        function_name = function_match.group(0)
-        lookup_index += function_match.end()
-        file_match = file_regex.search(pprof_statistic[lookup_index:])
-        if file_match:
-          key = ",".join([function_name, os.path.normpath(file_match.group(0))])
-        else:
-          key = function_name
-
-        if key in cwp_statistics:
-          cwp_statistic = cwp_statistics[key]
-          output_lines.append(','.join([key, cwp_statistic[0],
-                                        function_statistic]))
-          cwp_statistics[key] = (cwp_statistic[0], self.COMMON_FUNCTION)
+      pprof_top_statistics = \
+          utils.ParsePprofTopOutput(os.path.join(pprof_top_path, pprof_file))
+      pprof_tree_statistics = \
+          utils.ParsePprofTreeOutput(os.path.join(pprof_tree_path, pprof_file))
+      common_functions_lines = []
+      benchmark_function_metrics = {}
+
+      for function_key, function_statistic in pprof_top_statistics.iteritems():
+        if function_key not in cwp_inclusive_count_statistics:
+          continue
+
+        cwp_dso_name, cwp_inclusive_count, cwp_inclusive_count_fraction, _ = \
+            cwp_inclusive_count_statistics[function_key]
+        cwp_inclusive_count_statistics[function_key] = \
+            (cwp_dso_name, cwp_inclusive_count, cwp_inclusive_count_fraction,
+             utils.COMMON_FUNCTION)
+
+        function_name, _ = function_key.split(',')
+        distance = benchmark_metrics.ComputeDistanceForFunction(
+            pprof_tree_statistics[function_key],
+            cwp_pairwise_inclusive_count_fractions.get(function_name, {}))
+        benchmark_cum_p = float(function_statistic[4])
+        score = benchmark_metrics.ComputeScoreForFunction(
+            distance, cwp_inclusive_count_fraction, benchmark_cum_p)
+        benchmark_function_metrics[function_key] = (distance, score)
+
+        common_functions_lines.append(','.join([function_key, cwp_dso_name, str(
+            cwp_inclusive_count), str(cwp_inclusive_count_fraction), ','.join(
+                function_statistic), str(distance), str(score)]))
+      benchmark_function_groups_statistics = \
+          benchmark_metrics.ComputeMetricsForComponents(
+              cwp_function_groups, benchmark_function_metrics)
+      benchmark_set_metrics[pprof_file] = \
+          benchmark_metrics.ComputeMetricsForBenchmark(
+              benchmark_function_metrics)
 
       with open(os.path.join(common_functions_path, pprof_file), 'w') \
-        as output_file:
-        output_file.write('\n'.join(output_lines))
+          as output_file:
+        common_functions_lines.sort(
+            key=lambda x: float(x.split(',')[11]), reverse=True)
+        common_functions_lines.insert(0, 'function,file,dso,inclusive_count,'
+                                      'inclusive_count_fraction,flat,flat%,'
+                                      'sum%,cum,cum%,distance,score')
+        output_file.write('\n'.join(common_functions_lines))
+
+      with open(os.path.join(common_functions_groups_path, pprof_file), 'w') \
+          as output_file:
+        common_functions_groups_lines = \
+            [','.join([group_name, ','.join(
+                [str(statistic) for statistic in group_statistic])])
+             for group_name, group_statistic in
+             benchmark_function_groups_statistics.iteritems()]
+        common_functions_groups_lines.sort(
+            key=lambda x: float(x.split(',')[5]), reverse=True)
+        common_functions_groups_lines.insert(
+            0, 'group_name,file_path,number_of_functions,distance_cum,'
+            'distance_avg,score_cum,score_avg')
+        output_file.write('\n'.join(common_functions_groups_lines))
+
+    with open(benchmark_set_metrics_file, 'w') as output_file:
+      benchmark_set_metrics_lines = []
+
+      for benchmark_name, metrics in benchmark_set_metrics.iteritems():
+        benchmark_set_metrics_lines.append(','.join([benchmark_name, ','.join(
+            [str(metric) for metric in metrics])]))
+      benchmark_set_metrics_lines.sort(
+          key=lambda x: float(x.split(',')[4]), reverse=True)
+      benchmark_set_metrics_lines.insert(
+          0, 'benchmark_name,number_of_functions,distance_cum,distance_avg,'
+          'score_cum,score_avg')
+      output_file.write('\n'.join(benchmark_set_metrics_lines))
+
+    return cwp_inclusive_count_statistics
+
+  def GroupExtraFunctions(self, cwp_statistics, cwp_function_groups,
+                          extra_cwp_functions_groups_path,
+                          extra_cwp_functions_groups_file):
+    """Groups the extra functions.
+
+    Writes the data of the functions that belong to each group in a separate
+    file, sorted by their inclusive count value, in descending order. The file
+    name is the same as the group name.
 
-    return cwp_statistics
-
-  @staticmethod
-  def ParseFunctionGroups(cwp_function_groups_lines):
-    """Parses the contents of the function groups file.
-
-    Args:
-      cwp_function_groups_lines: A list of the lines contained in the CWP
-        function groups file.
-    Returns:
-      A list of tuples containing the group name, the file path, the total
-      number of inclusive count values for that group, a list that will contain
-      the CWP statistics of the functions declared in files that share the file
-      path.
-    """
-    cwp_function_groups = []
-
-    for line in cwp_function_groups_lines:
-      group_name, file_path = line.split()
-      cwp_function_groups.append((group_name, file_path, 0, []))
-
-    return cwp_function_groups
-
-  def GroupExtraFunctions(self, cwp_statistics, cwp_function_groups_file_prefix,
-                          cwp_function_groups_file,
-                          cwp_function_groups_statistics_file):
-    """Groups the functions that are in the CWP statistics and not in the pprof
-    output. A function belongs to a group that is defined by a given file path
-    if it is declared in a file that shares that path.
-
-    Writes the data of the functions that belong to a group in a file, sorted
-    by their inclusive count value, in descendant order. The file name is
-    composed by the cwp_function_groups_file_prefix and the name of the group.
     The file is in CSV format, containing the fields: function name, file name,
-    object name, inclusive count.
+    object name, inclusive count, inclusive count fraction.
 
-    It creates a CSV file containing the name of the groups, their
-    common path, the total inclusive count value of all the functions declared
-    in files that share the common path, sorted in descendant order by the
-    inclusive count value.
+    It creates a CSV file containing the name of the group, their
+    common path, the total inclusive count and inclusive count fraction values
+    of all the functions declared in files that share the common path, sorted
+    in descending order by the inclusive count value.
 
     Args:
       cwp_statistics: A dict containing the CWP statistics.
-      cwp_function_groups_file_prefix: The prefix used for naming the files that
-        the function data for a specific group.
-      cwp_function_groups_file: The name of the file containing the groups of
-        functions.
-      cwp_function_groups_statistics_file: The name of the file that will
-        contain the statistics for the function groups.
+      cwp_function_groups: A list of tuples with the groups names and the path
+        describing the groups.
+      extra_cwp_functions_groups_path: The name of the directory containing
+        the CSV output files with the extra CWP functions that match a
+        particular group.
+      extra_cwp_functions_groups_file: The CSV output file containing the groups
+        that match the extra functions and their statistics.
     """
-    with open(cwp_function_groups_file, 'r') as input_file:
-      cwp_function_groups = self.ParseFunctionGroups(input_file.readlines())
-
+    cwp_function_groups_statistics = defaultdict(lambda: ([], '', 0, 0.0))
     for function, statistics in cwp_statistics.iteritems():
-      if statistics[1] == self.COMMON_FUNCTION:
+      if statistics[3] == utils.COMMON_FUNCTION:
         continue
+
       file_name = function.split(',')[1]
-      group_inclusive_count = int(statistics[0].split(',')[1])
-      for i, group in enumerate(cwp_function_groups):
+      group_inclusive_count = int(statistics[1])
+      group_inclusive_count_fraction = float(statistics[2])
+
+      for group in cwp_function_groups:
         group_common_path = group[1]
 
-        # The order of the groups mentioned in the cwp_functions_groups
-        # matters. A function declared in a file will belong to the first
-        # mentioned group that matches it's path to the one of the file.
-        # It is possible to have multiple paths that belong to the same group.
-        if group_common_path in file_name:
-          group_name = group[0]
-          group_inclusive_count += group[2]
-          group_lines = group[3]
-
-          group_lines.append(','.join([function, statistics[0]]))
-          cwp_function_groups[i] = (group_name, group_common_path,
-                                    group_inclusive_count, group_lines)
-          break
-
-    group_statistics_lines = []
-
-    for group_name, group_path, group_inclusive_count, group_lines in \
-        cwp_function_groups:
-      group_statistics_lines.append(','.join([group_name, group_path,
-                                              str(group_inclusive_count)]))
-      if group_lines:
-        # Sort the output in descendant order based on the inclusive_count
-        # value.
-        group_lines.sort(key=lambda x: int(x.split(',')[-1]), reverse=True)
-        group_lines.insert(0, 'function,file,dso,inclusive_count')
-        group_file_name = cwp_function_groups_file_prefix + group_name
-
-        with open(group_file_name, 'w') as output_file:
-          output_file.write('\n'.join(group_lines))
-
-    group_statistics_lines.sort(
+        if group_common_path not in file_name:
+          continue
+
+        group_name = group[0]
+        group_statistics = cwp_function_groups_statistics[group_name]
+        group_lines = group_statistics[0]
+        group_inclusive_count += group_statistics[2]
+        group_inclusive_count_fraction += group_statistics[3]
+
+        group_lines.append(','.join([function, statistics[0],
+                                     str(statistics[1]), str(statistics[2])]))
+        cwp_function_groups_statistics[group_name] = \
+            (group_lines, group_common_path, group_inclusive_count,
+             group_inclusive_count_fraction)
+        break
+
+    extra_cwp_functions_groups_lines = []
+    for group_name, group_statistics \
+        in cwp_function_groups_statistics.iteritems():
+      group_output_lines = group_statistics[0]
+      group_output_lines.sort(key=lambda x: int(x.split(',')[3]), reverse=True)
+      group_output_lines.insert(
+          0, 'function,file,dso,inclusive_count,inclusive_count_fraction')
+      with open(os.path.join(extra_cwp_functions_groups_path, group_name),
+                'w') as output_file:
+        output_file.write('\n'.join(group_output_lines))
+      extra_cwp_functions_groups_lines.append(','.join(
+          [group_name, group_statistics[1], str(group_statistics[2]), str(
+              group_statistics[3])]))
+
+    extra_cwp_functions_groups_lines.sort(
         key=lambda x: int(x.split(',')[2]), reverse=True)
-    group_statistics_lines.insert(0, 'group,shared_path,inclusive_count')
-
-    with open(cwp_function_groups_statistics_file, 'w') as output_file:
-      output_file.write('\n'.join(group_statistics_lines))
+    extra_cwp_functions_groups_lines.insert(
+        0, 'group,shared_path,inclusive_count,inclusive_count_fraction')
+    with open(extra_cwp_functions_groups_file, 'w') as output_file:
+      output_file.write('\n'.join(extra_cwp_functions_groups_lines))
 
   def ExtractExtraFunctions(self, cwp_statistics, extra_cwp_functions_file):
-    """Gets the functions that are in the CWP file, but not in the pprof output.
+    """Gets the functions that are in the CWP data, but not in the pprof output.
 
     Writes the functions and their statistics in the extra_cwp_functions_file
     file. The output is sorted based on the inclusive_count value. The file is
     in CSV format, containing the fields: function name, file name, object name,
-    inclusive count.
+    inclusive count and inclusive count fraction.
 
     Args:
-      cwp_statistics: A dict containing the CWP statistics.
-      extra_cwp_functions_file: The file where should be stored the CWP
-        functions and statistics that are marked as NOT_COMMON_FUNCTIONS.
+      cwp_statistics: A dict containing the CWP statistics indexed by the
+        function and the file name, comma separated.
+      extra_cwp_functions_file: The file where it should be stored the CWP
+        functions and statistics that are marked as EXTRA_FUNCTION.
     """
     output_lines = []
 
     for function, statistics in cwp_statistics.iteritems():
-      if statistics[1] == self.NOT_COMMON_FUNCTION:
-        output_lines.append(','.join([function, statistics[0]]))
+      if statistics[3] == utils.EXTRA_FUNCTION:
+        output_lines.append(','.join([function, statistics[0],
+                                      str(statistics[1]), str(statistics[2])]))
 
     with open(extra_cwp_functions_file, 'w') as output_file:
-      output_lines.sort(key=lambda x: int(x.split(',')[-1]), reverse=True)
-      output_lines.insert(0, 'function,file,dso,inclusive_count')
+      output_lines.sort(key=lambda x: int(x.split(',')[3]), reverse=True)
+      output_lines.insert(0, 'function,file,dso,inclusive_count,'
+                          'inclusive_count_fraction')
       output_file.write('\n'.join(output_lines))
 
 
@@ -318,65 +433,92 @@ def ParseArguments(arguments):
   parser = argparse.ArgumentParser()
 
   parser.add_argument(
-      '-p',
-      '--pprof_path',
-      dest='pprof_path',
+      '--pprof_top_path',
       required=True,
-      help='The directory containing the pprof output files.')
+      help='The directory containing the files with the pprof --top output of '
+      'the benchmark profiles (the hot functions). The name of the files '
+      'should match with the ones from the pprof tree output files.')
   parser.add_argument(
-      '-w',
-      '--cwp_hot_functions_file',
-      dest='cwp_hot_functions_file',
+      '--pprof_tree_path',
       required=True,
-      help='The CSV file containing the CWP hot functions. The '
-      'file should include the name of the functions, the '
-      'file names with the definition, the object file '
-      'and the CWP inclusive count values, comma '
-      'separated.')
+      help='The directory containing the files with the pprof --tree output '
+      'of the benchmark profiles (the call chains). The name of the files '
+      'should match with the ones of the pprof top output files.')
   parser.add_argument(
-      '-c',
-      '--common_functions_path',
-      dest='common_functions_path',
+      '--cwp_inclusive_count_file',
       required=True,
-      help='The directory containing the files with the pprof '
-      'and CWP common functions. A file will contain all '
-      'the hot functions from a pprof output file that '
-      'are also included in the CWP hot functions file. '
-      'The files with the common functions will have the '
-      'same names with the corresponding pprof output '
-      'files.')
+      help='The CSV file containing the CWP hot functions with their '
+      'inclusive_count values. The CSV fields include the name of the '
+      'function, the file and the object with the definition, the inclusive '
+      'count value and the inclusive count fraction out of the total amount of '
+      'inclusive count values.')
   parser.add_argument(
-      '-e',
-      '--extra_cwp_functions_file',
-      dest='extra_cwp_functions_file',
+      '--cwp_pairwise_inclusive_count_file',
       required=True,
-      help='The file that will contain the CWP hot functions '
-      'that are not in any of the pprof output files. '
-      'The file should include the name of the functions, '
-      'the file names with the definition, the object '
-      'file and the CWP inclusive count values, comma '
-      'separated.')
+      help='The CSV file containing the CWP pairs of parent and child '
+      'functions with their inclusive count values. The CSV fields include the '
+      'name of the parent and child functions concatenated by ;;, the file '
+      'and the object with the definition of the child function, and the '
+      'inclusive count value.')
   parser.add_argument(
-      '-g',
       '--cwp_function_groups_file',
-      dest='cwp_function_groups_file',
-      help='The file that will contain the CWP function groups.'
-      'A line consists in the group name and a file path. A group must '
+      required=True,
+      help='The file that contains the CWP function groups. A line consists in '
+      'the group name and a file path describing the group. A group must '
       'represent a ChromeOS component.')
   parser.add_argument(
-      '-s',
-      '--cwp_function_groups_statistics_file',
-      dest='cwp_function_groups_statistics_file',
-      help='The file that will contain the total inclusive count values of CWP '
-      'function groups in CSV format. A line will contain the name of the '
-      'group, the common path, the total inclusive count value of all the'
-      'functions declared in files that share the common path.')
+      '--common_functions_path',
+      required=True,
+      help='The directory containing the CSV output files with the common '
+      'functions of the benchmark profiles and CWP data. A file will contain '
+      'all the hot functions from a pprof top output file that are also '
+      'included in the file containing the cwp inclusive count values. The CSV '
+      'fields are: the function name, the file and the object where the '
+      'function is declared, the CWP inclusive count and inclusive count '
+      'fraction values, the cumulative and average distance, the cumulative '
+      'and average score. The files with the common functions will have the '
+      'same names with the corresponding pprof output files.')
+  parser.add_argument(
+      '--common_functions_groups_path',
+      required=True,
+      help='The directory containing the CSV output files with the Chrome OS '
+      'groups and their metrics that match the common functions of the '
+      'benchmark profiles and CWP. The files with the groups will have the '
+      'same names with the corresponding pprof output files. The CSV fields '
+      'include the group name, group path, the number of functions that match '
+      'the group, the average and cumulative distance, the average and '
+      'cumulative score.')
+  parser.add_argument(
+      '--benchmark_set_metrics_file',
+      required=True,
+      help='The CSV output file containing the metrics for each benchmark. The '
+      'CSV fields include the benchmark name, the number of common functions, '
+      'the average and cumulative distance and score.')
   parser.add_argument(
-      '-x',
-      '--cwp_function_groups_file_prefix',
-      dest='cwp_function_groups_file_prefix',
-      help='The prefix of the files that will store the function statistics '
-      'for each function group.')
+      '--extra_cwp_functions_file',
+      required=True,
+      help='The CSV output file containing the functions that are in the CWP '
+      'data, but are not in any of the benchmark profiles. The CSV fields '
+      'include the name of the function, the file name and the object with the '
+      'definition, and the CWP inclusive count and inclusive count fraction '
+      'values. The entries are sorted in descending order based on the '
+      'inclusive count value.')
+  parser.add_argument(
+      '--extra_cwp_functions_groups_file',
+      required=True,
+      help='The CSV output file containing the groups that match the extra CWP '
+      'functions and their statistics. The CSV fields include the group name, '
+      'the file path, the total inclusive count and inclusive count fraction '
+      'values of the functions matching a particular group.')
+  parser.add_argument(
+      '--extra_cwp_functions_groups_path',
+      required=True,
+      help='The directory containing the CSV output files with the extra CWP '
+      'functions that match a particular group. The name of the file is the '
+      'same as the group name. The CSV fields include the name of the '
+      'function, the file name and the object with the definition, and the CWP '
+      'inclusive count and inclusive count fraction values. The entries are '
+      'sorted in descending order based on the inclusive count value.')
 
   options = parser.parse_args(arguments)
 
@@ -386,11 +528,29 @@ def ParseArguments(arguments):
 def Main(argv):
   options = ParseArguments(argv)
 
-  hot_functions_processor = HotFunctionsProcessor(options.pprof_path,
-    options.cwp_hot_functions_file, options.common_functions_path,
-    options.extra_cwp_functions_file, options.cwp_function_groups_file,
-    options.cwp_function_groups_statistics_file,
-    options.cwp_function_groups_file_prefix)
+  if os.path.exists(options.common_functions_path):
+    shutil.rmtree(options.common_functions_path)
+
+  os.makedirs(options.common_functions_path)
+
+  if os.path.exists(options.common_functions_groups_path):
+    shutil.rmtree(options.common_functions_groups_path)
+
+  os.makedirs(options.common_functions_groups_path)
+
+  if os.path.exists(options.extra_cwp_functions_groups_path):
+    shutil.rmtree(options.extra_cwp_functions_groups_path)
+
+  os.makedirs(options.extra_cwp_functions_groups_path)
+
+  hot_functions_processor = HotFunctionsProcessor(
+      options.pprof_top_path, options.pprof_tree_path,
+      options.cwp_inclusive_count_file,
+      options.cwp_pairwise_inclusive_count_file,
+      options.cwp_function_groups_file, options.common_functions_path,
+      options.common_functions_groups_path, options.benchmark_set_metrics_file,
+      options.extra_cwp_functions_file, options.extra_cwp_functions_groups_file,
+      options.extra_cwp_functions_groups_path)
 
   hot_functions_processor.ProcessHotFunctions()
author	Evelina Dumitrescu <evelinad@google.com>	2016-09-23 15:32:49 -0700
committer	chrome-bot <chrome-bot@chromium.org>	2017-02-01 18:13:44 -0800
commit	0228f92f87bc785c3f8184ce79e2d016c95941fc (patch)
tree	78a7c04a2942e2a944379150ebee59fcee6f4756
parent	c7faa09f456ca5c651ac373ad897aa4be6ad2717 (diff)
download	toolchain-utils-0228f92f87bc785c3f8184ce79e2d016c95941fc.tar.gz