2 files changed, 17 insertions, 153 deletions
diff --git a/user_activity_benchmarks/benchmark_metrics_experiment.py b/user_activity_benchmarks/benchmark_metrics_experiment.py
index 9c04a894..b21dfad8 100755
--- a/user_activity_benchmarks/benchmark_metrics_experiment.py
+++ b/user_activity_benchmarks/benchmark_metrics_experiment.py
@@ -23,12 +23,15 @@ the functions matching the group. The output is stored in the file
 cwp_function_groups_statistics_file.
 """
 
-import argparse
 from collections import defaultdict
+
+import argparse
 import csv
 import os
 import sys
+
 import benchmark_metrics
+import utils
 
 
 class MetricsExperiment(object):
@@ -65,93 +68,6 @@ class MetricsExperiment(object):
         cwp_function_groups_statistics_file
     self._cwp_function_statistics_file = cwp_function_statistics_file
 
-  @staticmethod
-  def ParsePairwiseInclusiveStatisticsFile(file_name):
-    """Parses the pairwise inclusive statistics files.
-
-    A line of the file should contain a pair of a parent and a child function,
-    concatenated by a ;;, the name of the file where the child function is
-    defined and the inclusive count fractions of the pair of functions out of
-    the total amount of inclusive count values.
-
-    Args:
-      file_name: The file containing the pairwise inclusive statistics of the
-      CWP functions.
-
-    Returns:
-      A dict containing the statistics of the parent functions and each of
-      their child functions. The key of the dict is the name of the parent
-      function. The value is a dict having as a key the name of the child
-      function with its file name separated by a ',' and as a value the
-      inclusive count fraction of the child function.
-    """
-    pairwise_inclusive_statistics = defaultdict(lambda: defaultdict(float))
-
-    with open(file_name) as \
-        pairwise_inclusive_statistics_file:
-      statistics_reader = csv.DictReader(
-          pairwise_inclusive_statistics_file, delimiter=',')
-      for statistic in statistics_reader:
-        parent_function_name, child_function_name = \
-            statistic['parent_child_functions'].split(';;')
-        child_function_file_name = \
-            os.path.normpath(statistic['child_function_file'])
-        inclusive_count_fraction = \
-            float(statistic['inclusive_count_fraction'])
-
-        if all([parent_function_name, child_function_name, \
-                inclusive_count_fraction]):
-
-          # There might be situations where a child function appears in
-          # multiple files or objects. Such situations can occur when in the
-          # Dremel queries there are not specified the Chrome OS version and the
-          # name of the board (i.e the files can belong to different kernel or
-          # library versions), when the child function is a template function
-          # that is declared in a header file or there are name collisions
-          # between multiple executable objects.
-          # If a pair of child and parent functions appears multiple times, we
-          # add their inclusive count values.
-          child_function_key = ','.join([child_function_name,
-                                         child_function_file_name])
-          pairwise_inclusive_statistics[parent_function_name]\
-              [child_function_key] += inclusive_count_fraction
-
-    return pairwise_inclusive_statistics
-
-  @staticmethod
-  def ParseInclusiveStatisticsFile(inclusive_statistics_file_name):
-    """Parses the inclusive statistics files.
-
-    Args:
-      inclusive_statistics_file_name: The file containing the inclusive
-        statistics of the CWP functions.
-
-    Returns:
-      A dict having as a key the function name and file where the function is
-      defined separated by a ',' and as a value the inclusive count fraction.
-    """
-    inclusive_statistics = defaultdict(float)
-
-    with open(inclusive_statistics_file_name) as inclusive_statistics_file:
-      statistics_reader = \
-          csv.DictReader(inclusive_statistics_file, delimiter=',')
-
-      for statistic in statistics_reader:
-        function_name = statistic['function']
-        file_name = os.path.normpath(statistic['file'])
-        inclusive_count_fraction = \
-            float(statistic['inclusive_count_fraction'])
-
-        # There might be situations where a function appears in multiple files
-        # or objects. Such situations can occur when in the Dremel queries there
-        # are not specified the Chrome OS version and the name of the board (i.e
-        # the files can belong to different kernel or library versions).
-        if all([function_name, file_name, inclusive_count_fraction]):
-          parent_function_key = ','.join([function_name, file_name])
-          inclusive_statistics[parent_function_key] += inclusive_count_fraction
-
-    return inclusive_statistics
-
   def PerformComputation(self):
     """Does the benchmark metrics experimental computation.
 
@@ -168,27 +84,28 @@ class MetricsExperiment(object):
     """
 
     inclusive_statistics_reference = \
-        self.ParseInclusiveStatisticsFile(self._cwp_inclusive_reference)
+        utils.ParseCWPInclusiveCountFile(self._cwp_inclusive_reference)
     inclusive_statistics_test = \
-        self.ParseInclusiveStatisticsFile(self._cwp_inclusive_test)
+        utils.ParseCWPInclusiveCountFile(self._cwp_inclusive_test)
     pairwise_inclusive_statistics_reference = \
-        self.ParsePairwiseInclusiveStatisticsFile(
+        utils.ParseCWPPairwiseInclusiveCountFile(
             self._cwp_pairwise_inclusive_reference)
     pairwise_inclusive_statistics_test = \
-        self.ParsePairwiseInclusiveStatisticsFile(
+        utils.ParseCWPPairwiseInclusiveCountFile(
             self._cwp_pairwise_inclusive_test)
     parent_function_statistics = {}
 
-    with open(self._cwp_function_groups_file, 'r') as input_file:
-      cwp_function_groups = [line.split() for line in input_file]
+    with open(self._cwp_function_groups_file) as input_file:
+      cwp_function_groups = utils.ParseFunctionGroups(input_file.readlines())
 
-    for parent_function_key, parent_function_fraction_test \
+    for parent_function_key, parent_function_statistics_test \
         in inclusive_statistics_test.iteritems():
       parent_function_name, parent_function_file_name = \
           parent_function_key.split(',')
+      parent_function_fraction_test = parent_function_statistics_test[2]
 
       parent_function_fraction_reference = \
-          inclusive_statistics_reference.get(parent_function_key, 0.0)
+          inclusive_statistics_reference[parent_function_key][2]
 
       child_functions_statistics_test = \
           pairwise_inclusive_statistics_test.get(parent_function_name, {})
diff --git a/user_activity_benchmarks/benchmark_metrics_experiment_unittest.py b/user_activity_benchmarks/benchmark_metrics_experiment_unittest.py
index 81d2d0e4..dc5cefd5 100755
--- a/user_activity_benchmarks/benchmark_metrics_experiment_unittest.py
+++ b/user_activity_benchmarks/benchmark_metrics_experiment_unittest.py
@@ -5,13 +5,13 @@
 # found in the LICENSE file.
 """Unit tests for the benchmark_metrics_experiment module."""
 
-from benchmark_metrics_experiment import MetricsExperiment
-
 import mock
 import os
 import tempfile
 import unittest
 
+from benchmark_metrics_experiment import MetricsExperiment
+
 
 class MetricsExperimentTest(unittest.TestCase):
   """Test class for MetricsExperiment class."""
@@ -27,63 +27,10 @@ class MetricsExperimentTest(unittest.TestCase):
     self._inclusive_count_reference_file = \
         'testdata/input/inclusive_count_reference.csv'
     self._cwp_function_groups_file = \
-        'testdata/input/cwp_function_groups'
-
-  def testParseInclusiveStatisticsFile(self):
-    expected_inclusive_statistics_test = {
-        'func_f,/a/b/file_f': 2.3,
-        'func_g,/a/b/file_g': 2.2,
-        'func_h,/c/d/file_h': 3.3,
-        'func_i,/c/d/file_i': 4.4,
-        'func_j,/e/file_j': 5.5,
-        'func_k,/e/file_k': 6.6
-    }
-    expected_inclusive_statistics_reference = {
-        'func_f,/a/b/file_f': 1.0,
-        'func_g,/a/b/file_g': 4.4,
-        'func_h,/c/d/file_h': 3.0,
-        'func_i,/c/d/file_i': 4.0,
-        'func_j,/e/file_j': 5.0,
-        'func_l,/e/file_l': 6.0
-    }
-    result_inclusive_statistics_test = \
-        MetricsExperiment.ParseInclusiveStatisticsFile(
-            self._inclusive_count_test_file)
-    result_inclusive_statistics_reference = \
-        MetricsExperiment.ParseInclusiveStatisticsFile(
-            self._inclusive_count_reference_file)
-    self.assertEqual(result_inclusive_statistics_test,
-                     expected_inclusive_statistics_test)
-    self.assertEqual(result_inclusive_statistics_reference,
-                     expected_inclusive_statistics_reference)
-
-  def testParsePairwiseInclusiveStatisticsFile(self):
-    expected_pairwise_inclusive_statistics_test = {
-        'func_f': {'func_g,/a/b/file_g2': 0.01,
-                   'func_h,/c/d/file_h': 0.02,
-                   'func_i,/c/d/file_i': 0.03},
-        'func_g': {'func_j,/e/file_j': 0.4,
-                   'func_m,/e/file_m': 0.6}
-    }
-    expected_pairwise_inclusive_statistics_reference = {
-        'func_f': {'func_g,/a/b/file_g': 0.1,
-                   'func_h,/c/d/file_h': 0.2,
-                   'func_i,/c/d/file_i': 0.3},
-        'func_g': {'func_j,/e/file_j': 0.4}
-    }
-    result_pairwise_inclusive_statistics_test = \
-        MetricsExperiment.ParsePairwiseInclusiveStatisticsFile(
-            self._pairwise_inclusive_count_test_file)
-    result_pairwise_inclusive_statistics_reference = \
-        MetricsExperiment.ParsePairwiseInclusiveStatisticsFile(
-            self._pairwise_inclusive_count_reference_file)
-    self.assertEqual(result_pairwise_inclusive_statistics_test,
-                     expected_pairwise_inclusive_statistics_test)
-    self.assertEqual(result_pairwise_inclusive_statistics_reference,
-                     expected_pairwise_inclusive_statistics_reference)
+        'testdata/input/cwp_function_groups.txt'
 
   def _CheckFileContents(self, file_name, expected_content_lines):
-    with open(file_name, 'r') as input_file:
+    with open(file_name) as input_file:
       result_content_lines = input_file.readlines()
       self.assertListEqual(expected_content_lines, result_content_lines)