user_activity: Created utility module for grouping parsing functions.

The modules contains the functions that do the parsing for the pprof top and tree, CWP data and Chrome OS groups files. BUG=None TEST=utils_unittest passes Change-Id: Ida94a2d456250afe4630a146349937eb0bab0767 Reviewed-on: https://chrome-internal-review.googlesource.com/291201 Reviewed-by: Luis Lozano <llozano@chromium.org> Reviewed-by: Evelina Dumitrescu <evelinad@google.com> Tested-by: Evelina Dumitrescu <evelinad@google.com> Reviewed-on: https://chromium-review.googlesource.com/435915 Commit-Ready: Luis Lozano <llozano@chromium.org> Tested-by: Luis Lozano <llozano@chromium.org>
author: Evelina Dumitrescu <evelinad@google.com> 2016-09-28 15:13:29 -0700
committer: chrome-bot <chrome-bot@chromium.org> 2017-02-01 18:13:43 -0800
commit: c7faa09f456ca5c651ac373ad897aa4be6ad2717 (patch)
tree: d589cb479f102ba95248d93c783009c85ee50371
parent: edc6eb87218afbde3c3df6e0faa99958f709c825 (diff)
download: toolchain-utils-c7faa09f456ca5c651ac373ad897aa4be6ad2717.tar.gz
13 files changed, 531 insertions, 18 deletions
diff --git a/user_activity_benchmarks/testdata/input/cwp_function_groups.txt b/user_activity_benchmarks/testdata/input/cwp_function_groups.txt
new file mode 100644
index 00000000..4233d035
--- /dev/null
+++ b/user_activity_benchmarks/testdata/input/cwp_function_groups.txt
@@ -0,0 +1,3 @@
+ab /a/b
+cd /c/d
+e /e
diff --git a/user_activity_benchmarks/testdata/input/inclusive_count_reference.csv b/user_activity_benchmarks/testdata/input/inclusive_count_reference.csv
index 9930abfd..bc0cca6c 100644
--- a/user_activity_benchmarks/testdata/input/inclusive_count_reference.csv
+++ b/user_activity_benchmarks/testdata/input/inclusive_count_reference.csv
@@ -1,8 +1,8 @@
-function,file,inclusive_count_fraction
-func_f,/a/b/file_f,1
-func_g,/a/b/file_g,2
-func_g,/a/b/../../a/b/file_g,2.4
-func_h,/c/d/file_h,3
-func_i,/c/d/file_i,4
-func_j,/e/file_j,5
-func_l,/e/file_l,6
+function,file,dso,inclusive_count,inclusive_count_fraction
+func_f,/a/b/file_f,f,1,1
+func_g,/a/b/file_g,g,2,2
+func_g,/a/b/../../a/b/file_g,g,3,2.4
+func_h,/c/d/file_h,h,4,3
+func_i,/c/d/file_i,i,5,4
+func_j,/e/file_j,j,6,5
+func_l,/e/file_l,l,7,6
diff --git a/user_activity_benchmarks/testdata/input/inclusive_count_test.csv b/user_activity_benchmarks/testdata/input/inclusive_count_test.csv
index 4a9b47db..c9938276 100644
--- a/user_activity_benchmarks/testdata/input/inclusive_count_test.csv
+++ b/user_activity_benchmarks/testdata/input/inclusive_count_test.csv
@@ -1,8 +1,8 @@
-function,file,inclusive_count_fraction
-func_f,/a/b/file_f,1.1
-func_g,/a/b/file_g,2.2
-func_f,/a/b/file_f,1.2
-func_h,/c/d/../../c/d/file_h,3.3
-func_i,/c/d/file_i,4.4
-func_j,/e/file_j,5.5
-func_k,/e/file_k,6.6
+function,file,dso,inclusive_count,inclusive_count_fraction
+func_f,/a/b/file_f,f,1,1.1
+func_g,/a/b/file_g,g,2,2.2
+func_f,/a/b/file_f,f,3,1.2
+func_h,/c/d/../../c/d/file_h,h,1,3.3
+func_i,/c/d/file_i,i,5,4.4
+func_j,/e/file_j,j,6,5.5
+func_k,/e/file_k,k,7,6.6
diff --git a/user_activity_benchmarks/testdata/input/pairwise_inclusive_count_reference.csv b/user_activity_benchmarks/testdata/input/pairwise_inclusive_count_reference.csv
index 46002357..7d7a49a1 100644
--- a/user_activity_benchmarks/testdata/input/pairwise_inclusive_count_reference.csv
+++ b/user_activity_benchmarks/testdata/input/pairwise_inclusive_count_reference.csv
@@ -1,4 +1,4 @@
-parent_child_functions,child_function_file,inclusive_count_fraction
+parent_child_functions,child_function_file,inclusive_count
 func_f;;func_g,/a/../a/b/file_g,0.1
 func_f;;func_h,/c/d/../d/file_h,0.2
 func_f;;func_i,/c/d/file_i,0.3
diff --git a/user_activity_benchmarks/testdata/input/pairwise_inclusive_count_test.csv b/user_activity_benchmarks/testdata/input/pairwise_inclusive_count_test.csv
index 68cf5774..a3fb72f5 100644
--- a/user_activity_benchmarks/testdata/input/pairwise_inclusive_count_test.csv
+++ b/user_activity_benchmarks/testdata/input/pairwise_inclusive_count_test.csv
@@ -1,4 +1,4 @@
-parent_child_functions,child_function_file,inclusive_count_fraction
+parent_child_functions,child_function_file,inclusive_count
 func_f;;func_g,/a/b/file_g2,0.01
 func_f;;func_h,/c/../c/d/file_h,0.02
 func_f;;func_i,/c/../c/d/file_i,0.03
diff --git a/user_activity_benchmarks/testdata/input/pprof/file1.pprof b/user_activity_benchmarks/testdata/input/pprof_top/file1.pprof
index 62e327b8..62e327b8 100644
--- a/user_activity_benchmarks/testdata/input/pprof/file1.pprof
+++ b/user_activity_benchmarks/testdata/input/pprof_top/file1.pprof
diff --git a/user_activity_benchmarks/testdata/input/pprof/file2.pprof b/user_activity_benchmarks/testdata/input/pprof_top/file2.pprof
index 6d22bff3..6d22bff3 100644
--- a/user_activity_benchmarks/testdata/input/pprof/file2.pprof
+++ b/user_activity_benchmarks/testdata/input/pprof_top/file2.pprof
diff --git a/user_activity_benchmarks/testdata/input/pprof/file3.pprof b/user_activity_benchmarks/testdata/input/pprof_top/file3.pprof
index 6cbf1247..6cbf1247 100644
--- a/user_activity_benchmarks/testdata/input/pprof/file3.pprof
+++ b/user_activity_benchmarks/testdata/input/pprof_top/file3.pprof
diff --git a/user_activity_benchmarks/testdata/input/pprof_top_csv/file1.csv b/user_activity_benchmarks/testdata/input/pprof_top_csv/file1.csv
new file mode 100644
index 00000000..67af7248
--- /dev/null
+++ b/user_activity_benchmarks/testdata/input/pprof_top_csv/file1.csv
@@ -0,0 +1,15 @@
+function,file,flat,flat_p,sum_p,cum,cum_p
+v8::internal::Bitmap::MarkBitFromIndex,/home/chrome-bot/chrome_root/src/v8/src/heap/spaces.h,3139339048,0.0084,0.4135,3144663928,0.0085
+v8::base::NoBarrier_Load,/home/chrome-bot/chrome_root/src/v8/src/base/atomicops_internals_x86_gcc.h,2907238921,0.0078,0.4294,2930031660,0.0079
+v8::Object::GetAlignedPointerFromInternalField,/home/chrome-bot/chrome_root/src/v8/include/v8.h,6054608957,0.0163,0.3538,8069380147,0.0217
+[anon],,115734836217,0.3111,0.3111,329503350629,0.8856
+base::RunLoop::Run,/home/chrome-bot/chrome_root/src/base/run_loop.cc,2725201614,0.0073,0.4517,3511333688,0.0094
+WTF::hashInt,/home/chrome-bot/chrome_root/src/third_party/WebKit/Source/wtf/HashFunctions.h,2786321388,0.0075,0.4444,2794002850,0.0075
+blink::ElementV8Internal::idAttributeGetterCallback,/var/cache/chromeos-chrome/chrome-src-internal/src/out_gnawty/Release/gen/blink/bindings/core/v8/V8Element.cpp,4651723038,0.0125,0.3663,8205985387,0.0221
+v8::internal::Internals::ReadField,/home/chrome-bot/chrome_root/src/v8/include/v8.h,3354819815,0.009,0.3876,3361796139,0.009
+blink::v8StringToWebCoreString,/home/chrome-bot/chrome_root/src/third_party/WebKit/Source/bindings/core/v8/V8StringResource.cpp,9839378797,0.0264,0.3375,14384869492,0.0387
+blink::NodeV8Internal::firstChildAttributeGetterCallbackForMainWorld,/var/cache/chromeos-chrome/chrome-src-internal/src/out_gnawty/Release/gen/blink/bindings/core/v8/V8Node.cpp,4569044106,0.0123,0.3786,6408862507,0.0172
+blink::DocumentV8Internal::getElementByIdMethodCallbackForMainWorld,/var/cache/chromeos-chrome/chrome-src-internal/src/out_gnawty/Release/gen/blink/bindings/core/v8/V8Document.cpp,3277220829,0.0088,0.3964,14077115947,0.0378
+v8::internal::MarkCompactMarkingVisitor::VisitUnmarkedObjects,/home/chrome-bot/chrome_root/src/v8/src/heap/mark-compact.cc,2791274646,0.0075,0.4369,11058283504,0.0297
+blink::ElementV8Internal::getAttributeMethodCallback,/var/cache/chromeos-chrome/chrome-src-internal/src/out_gnawty/Release/gen/blink/bindings/core/v8/V8Element.cpp,3007599556,0.0081,0.4216,13057167098,0.0351
+v8::internal::Internals::HasHeapObjectTag,/home/chrome-bot/chrome_root/src/v8/include/v8.h,3225711531,0.0087,0.4051,3228415743,0.0087
diff --git a/user_activity_benchmarks/testdata/input/pprof_tree/file1.pprof b/user_activity_benchmarks/testdata/input/pprof_tree/file1.pprof
new file mode 100644
index 00000000..69b5606d
--- /dev/null
+++ b/user_activity_benchmarks/testdata/input/pprof_tree/file1.pprof
@@ -0,0 +1,29 @@
+File: perf
+Build ID: 37750b32016528ac896fc238e0d00513e218fd9e
+Type: instructions_event
+Showing nodes accounting for 234768811461, 63.10% of 372058624378 total
+Dropped 33979 nodes (cum <= 1860293121)
+Showing top 80 nodes out of 271
+----------------------------------------------------------+-------------
+      flat  flat%   sum%        cum   cum%   calls calls% + context 	 	 
+----------------------------------------------------------+-------------
+                                       13412390629 93.24% |   blink::V8StringResource::toString /home/chrome-bot/chrome_root/src/third_party/WebKit/Source/bindings/core/v8/V8StringResource.h
+                                         437497332  3.04% |   [anon]
+                                         378465996  2.63% |   blink::V8StringResource::operator WTF::AtomicString /home/chrome-bot/chrome_root/src/third_party/WebKit/Source/bindings/core/v8/V8StringResource.h
+9839378797  2.64% 33.75% 14384869492  3.87%                | blink::v8StringToWebCoreString /home/chrome-bot/chrome_root/src/third_party/WebKit/Source/bindings/core/v8/V8StringResource.cpp
+                                        3180428647 22.11% |   v8::String::GetExternalStringResourceBase /home/chrome-bot/chrome_root/src/v8/include/v8.h (inline)
+                                         514301458  3.58% |   WTF::RefPtr::RefPtr /home/chrome-bot/chrome_root/src/third_party/WebKit/Source/wtf/RefPtr.h (inline)
+----------------------------------------------------------+-------------
+                                        8205985387   100% |   [anon]
+4651723038  1.25% 36.63% 8205985387  2.21%                | blink::ElementV8Internal::idAttributeGetterCallback /var/cache/chromeos-chrome/chrome-src-internal/src/out_gnawty/Release/gen/blink/bindings/core/v8/V8Element.cpp
+                                         717786059  8.75% |   v8::Object::GetAlignedPointerFromInternalField /home/chrome-bot/chrome_root/src/v8/include/v8.h (inline)
+----------------------------------------------------------+-------------
+                                        6408862507   100% |   [anon]
+4569044106  1.23% 37.86% 6408862507  1.72%                | blink::NodeV8Internal::firstChildAttributeGetterCallbackForMainWorld /var/cache/chromeos-chrome/chrome-src-internal/src/out_gnawty/Release/gen/blink/bindings/core/v8/V8Node.cpp
+                                         773479621 12.07% |   v8::Object::GetAlignedPointerFromInternalField /home/chrome-bot/chrome_root/src/v8/include/v8.h (inline)
+                                         690710254 10.78% |   blink::v8SetReturnValueForMainWorld /home/chrome-bot/chrome_root/src/third_party/WebKit/Source/bindings/core/v8/V8Binding.h (inline)
+----------------------------------------------------------+-------------
+                                        2005371070 59.65% |   v8::Object::GetAlignedPointerFromInternalField /home/chrome-bot/chrome_root/src/v8/include/v8.h (inline)
+                                         954968101 28.41% |   v8::String::GetExternalStringResourceBase /home/chrome-bot/chrome_root/src/v8/include/v8.h (inline)
+3354819815   0.9% 38.76% 3361796139   0.9%                | v8::internal::Internals::ReadField /home/chrome-bot/chrome_root/src/v8/include/v8.h
+----------------------------------------------------------+-------------
diff --git a/user_activity_benchmarks/testdata/input/pprof_tree_csv/file1.csv b/user_activity_benchmarks/testdata/input/pprof_tree_csv/file1.csv
new file mode 100644
index 00000000..9b155614
--- /dev/null
+++ b/user_activity_benchmarks/testdata/input/pprof_tree_csv/file1.csv
@@ -0,0 +1,6 @@
+parent_function,parent_function_file,child_function,child_function_file,inclusive_count_fraction
+blink::ElementV8Internal::idAttributeGetterCallback,/var/cache/chromeos-chrome/chrome-src-internal/src/out_gnawty/Release/gen/blink/bindings/core/v8/V8Element.cpp,v8::Object::GetAlignedPointerFromInternalField,/home/chrome-bot/chrome_root/src/v8/include/v8.h,0.0875
+blink::v8StringToWebCoreString,/home/chrome-bot/chrome_root/src/third_party/WebKit/Source/bindings/core/v8/V8StringResource.cpp,WTF::RefPtr::RefPtr,/home/chrome-bot/chrome_root/src/third_party/WebKit/Source/wtf/RefPtr.h,0.0358
+blink::v8StringToWebCoreString,/home/chrome-bot/chrome_root/src/third_party/WebKit/Source/bindings/core/v8/V8StringResource.cpp,v8::String::GetExternalStringResourceBase,/home/chrome-bot/chrome_root/src/v8/include/v8.h,0.2211
+blink::NodeV8Internal::firstChildAttributeGetterCallbackForMainWorld,/var/cache/chromeos-chrome/chrome-src-internal/src/out_gnawty/Release/gen/blink/bindings/core/v8/V8Node.cpp,blink::v8SetReturnValueForMainWorld,/home/chrome-bot/chrome_root/src/third_party/WebKit/Source/bindings/core/v8/V8Binding.h,0.10779999999999999
+blink::NodeV8Internal::firstChildAttributeGetterCallbackForMainWorld,/var/cache/chromeos-chrome/chrome-src-internal/src/out_gnawty/Release/gen/blink/bindings/core/v8/V8Node.cpp,v8::Object::GetAlignedPointerFromInternalField,/home/chrome-bot/chrome_root/src/v8/include/v8.h,0.1207
diff --git a/user_activity_benchmarks/utils.py b/user_activity_benchmarks/utils.py
new file mode 100644
index 00000000..ead56df6
--- /dev/null
+++ b/user_activity_benchmarks/utils.py
@@ -0,0 +1,327 @@
+# Copyright 2016 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""Utility functions for parsing pprof, CWP data and Chrome OS groups files."""
+
+from collections import defaultdict
+
+import csv
+import os
+import re
+
+SEPARATOR_REGEX = re.compile(r'-+\+-+')
+FUNCTION_STATISTIC_REGEX = \
+    re.compile(r'(\S+)\s+(\S+)%\s+(\S+)%\s+(\S+)\s+(\S+)%')
+CHILD_FUNCTION_PERCENTAGE_REGEX = re.compile(r'([0-9.]+)%')
+FUNCTION_KEY_SEPARATOR_REGEX = re.compile(r'\|\s+')
+# Constants used to identify if a function is common in the pprof and CWP
+# files.
+COMMON_FUNCTION = 'common'
+EXTRA_FUNCTION = 'extra'
+PARENT_CHILD_FUNCTIONS_SEPARATOR = ';;'
+# List of pairs of strings used for make substitutions in file names to make
+# CWP and pprof data consistent.
+FILE_NAME_REPLACING_PAIR_STRINGS = [('/build/gnawty', '/build/BOARD'),
+                                    ('/build/amd64-generic', '/build/BOARD'),
+                                    (' ../sysdeps', ',sysdeps'),
+                                    (' ../nptl', ',nptl'),
+                                    ('  aes-x86_64.s', ',aes-x86_64.s'),
+                                    (' (inline)', ''),
+                                    (' (partial-inline)', ''),
+                                    (' ../', ','),
+                                    ('../', '')]
+# Separator used to delimit the function from the file name.
+FUNCTION_FILE_SEPARATOR = ' /'
+
+
+def MakeCWPAndPprofFileNamesConsistent(file_name):
+  """Makes the CWP and pprof file names consistent.
+
+  For the same function, it may happen for some file paths to differ slightly
+  in the CWP data compared to the pprof output. In a file name, for each tuple
+  element of the list, we substitute the first element with the second one.
+
+  Args:
+    file_name: A string representing the name of the file.
+
+  Returns:
+    A string representing the modified name of tihe file.
+  """
+  file_name = file_name.replace(', ', '; ')
+  for replacing_pair_string in FILE_NAME_REPLACING_PAIR_STRINGS:
+    file_name = file_name.replace(replacing_pair_string[0],
+                                  replacing_pair_string[1])
+
+  return file_name
+
+def MakePprofFunctionKey(function_and_file_name):
+  """Creates the function key from the function and file name.
+
+  Parsing the the pprof --top and --tree outputs is difficult due to the fact
+  that it hard to extract the function and file name (i.e the function names
+  can have a lot of unexpected charachters such as spaces, operators etc).
+  For the moment, we used FUNCTION_FILE_SEPARATOR as delimiter between the
+  function and the file name. However, there are some cases where the file name
+  does not start with / and we treat this cases separately (i.e ../sysdeps,
+  ../nptl, aes-x86_64.s).
+
+  Args:
+    function_and_file_name: A string representing the function and the file name
+      as it appears in the pprof output.
+
+  Returns:
+    A string representing the function key, composed from the function and file
+    name, comma separated.
+  """
+  # TODO(evelinad): Use pprof --topproto instead of pprof --top to parse
+  # protobuffers instead of text output. Investigate if there is an equivalent
+  # for pprof --tree that gives protobuffer output.
+  #
+  # In the CWP output, we replace the , with ; as a workaround for parsing
+  # csv files. We do the same for the pprof output.
+  #
+  # TODO(evelinad): Use dremel --csv_dialect=excel-tab in the queries for
+  # replacing the , delimiter with tab.
+  function_and_file_name = function_and_file_name.replace(', ', '; ')
+  # If the function and file name sequence contains the FUNCTION_FILE_SEPARATOR,
+  # we normalize the path name of the file and make the string subtitutions
+  # to make the CWP and pprof data  consistent. The returned key is composed
+  # from the function name and normalized file path name, separated by a comma.
+  # If the function and file name does not contain the FUNCTION_FILE_SEPARATOR,
+  # we just do the strings substitution.
+  if FUNCTION_FILE_SEPARATOR in function_and_file_name:
+    function_name, file_name = \
+        function_and_file_name.split(FUNCTION_FILE_SEPARATOR)
+    file_name = \
+        MakeCWPAndPprofFileNamesConsistent(os.path.normpath("/" + file_name))
+    return ','.join([function_name, file_name])
+
+  return MakeCWPAndPprofFileNamesConsistent(function_and_file_name)
+
+def ParseFunctionGroups(cwp_function_groups_lines):
+  """Parses the contents of the function groups file.
+
+  Args:
+    cwp_function_groups_lines: A list of the lines contained in the CWP
+      function groups file. A line contains the group name and the file path
+      that describes the group, separated by a space.
+
+  Returns:
+    A list of tuples containing the group name and the file path.
+  """
+  # The order of the groups mentioned in the cwp_function_groups file
+  # matters. A function declared in a file will belong to the first
+  # mentioned group that matches its path to the one of the file.
+  # It is possible to have multiple paths that belong to the same group.
+  return [tuple(line.split()) for line in cwp_function_groups_lines]
+
+
+def ParsePprofTopOutput(file_name):
+  """Parses a file that contains the output of the pprof --top command.
+
+  Args:
+    file_name: The name of the file containing the pprof --top output.
+
+  Returns:
+    A dict having as a key the name of the function and the file containing
+    the declaration of the function, separated by a comma, and as a value
+    a tuple containing the flat, flat percentage, sum percentage, cummulative
+    and cummulative percentage values.
+  """
+
+  pprof_top_statistics = {}
+
+  # In the pprof top output, the statistics of the functions start from the
+  # 6th line.
+  with open(file_name) as input_file:
+    pprof_top_content = input_file.readlines()[6:]
+
+  for line in pprof_top_content:
+    function_statistic_match = FUNCTION_STATISTIC_REGEX.search(line)
+    flat, flat_p, sum_p, cum, cum_p = function_statistic_match.groups()
+    flat_p = str(float(flat_p) / 100.0)
+    sum_p = str(float(sum_p) / 100.0)
+    cum_p = str(float(cum_p) / 100.0)
+    lookup_index = function_statistic_match.end()
+    function_and_file_name = line[lookup_index + 2 : -1]
+    key = MakePprofFunctionKey(function_and_file_name)
+    pprof_top_statistics[key] = (flat, flat_p, sum_p, cum, cum_p)
+  return pprof_top_statistics
+
+
+def ParsePprofTreeOutput(file_name):
+  """Parses a file that contains the output of the pprof --tree command.
+
+  Args:
+    file_name: The name of the file containing the pprof --tree output.
+
+  Returns:
+    A dict including the statistics for pairs of parent and child functions.
+    The key is the name of the parent function and the file where the
+    function is declared, separated by a comma. The value is a dict having as
+    a key the name of the child function and the file where the function is
+    delcared, comma separated and as a value the percentage of time the
+    parent function spends in the child function.
+  """
+
+  # In the pprof output, the statistics of the functions start from the 9th
+  # line.
+  with open(file_name) as input_file:
+    pprof_tree_content = input_file.readlines()[9:]
+
+  pprof_tree_statistics = defaultdict(lambda: defaultdict(float))
+  track_child_functions = False
+
+  # The statistics of a given function, its parent and child functions are
+  # included between two separator marks.
+  # All the parent function statistics are above the line containing the
+  # statistics of the given function.
+  # All the statistics of a child function are below the statistics of the
+  # given function.
+  # The statistics of a parent or a child function contain the calls, calls
+  # percentage, the function name and the file where the function is declared.
+  # The statistics of the given function contain the flat, flat percentage,
+  # sum percentage, cummulative, cummulative percentage, function name and the
+  # name of the file containing the declaration of the function.
+  for line in pprof_tree_content:
+    separator_match = SEPARATOR_REGEX.search(line)
+
+    if separator_match:
+      track_child_functions = False
+      continue
+
+    parent_function_statistic_match = FUNCTION_STATISTIC_REGEX.search(line)
+
+    if parent_function_statistic_match:
+      track_child_functions = True
+      lookup_index = parent_function_statistic_match.end()
+      parent_function_key_match = \
+          FUNCTION_KEY_SEPARATOR_REGEX.search(line, pos=lookup_index)
+      lookup_index = parent_function_key_match.end()
+      parent_function_key = MakePprofFunctionKey(line[lookup_index:-1])
+      continue
+
+    if not track_child_functions:
+      continue
+
+    child_function_statistic_match = \
+        CHILD_FUNCTION_PERCENTAGE_REGEX.search(line)
+    child_function_percentage = \
+        float(child_function_statistic_match.group(1))
+    lookup_index = child_function_statistic_match.end()
+    child_function_key_match = \
+        FUNCTION_KEY_SEPARATOR_REGEX.search(line, pos=lookup_index)
+    lookup_index = child_function_key_match.end()
+    child_function_key = MakePprofFunctionKey(line[lookup_index:-1])
+
+    pprof_tree_statistics[parent_function_key][child_function_key] += \
+        child_function_percentage / 100.0
+
+  return pprof_tree_statistics
+
+
+def ParseCWPInclusiveCountFile(file_name):
+  """Parses the CWP inclusive count files.
+
+  A line should contain the name of the function, the file name with the
+  declaration, the inclusive count and inclusive count fraction out of the
+  total extracted inclusive count values.
+
+  Args:
+    file_name: The file containing the inclusive count values of the CWP
+    functions.
+
+  Returns:
+    A dict containing the inclusive count statistics. The key is the name of
+    the function and the file name, comma separated. The value represents a
+    tuple with the object name containing the function declaration, the
+    inclusive count and inclusive count fraction values, and a marker to
+    identify if the function is present in one of the benchmark profiles.
+  """
+  cwp_inclusive_count_statistics = defaultdict(lambda: ('', 0, 0.0, 0))
+
+  with open(file_name) as input_file:
+    statistics_reader = csv.DictReader(input_file, delimiter=',')
+    for statistic in statistics_reader:
+      function_name = statistic['function']
+      file_name = MakeCWPAndPprofFileNamesConsistent(
+          os.path.normpath(statistic['file']))
+      dso_name = statistic['dso']
+      inclusive_count = statistic['inclusive_count']
+      inclusive_count_fraction = statistic['inclusive_count_fraction']
+
+      # We ignore the lines that have empty fields(i.e they specify only the
+      # addresses of the functions and the inclusive counts values).
+      if all([
+          function_name, file_name, dso_name, inclusive_count,
+          inclusive_count_fraction
+      ]):
+        key = '%s,%s' % (function_name, file_name)
+
+        # There might be situations where a function appears in multiple files
+        # or objects. Such situations can occur when in the Dremel queries there
+        # are not specified the Chrome OS version and the name of the board (i.e
+        # the files can belong to different kernel or library versions).
+        inclusive_count_sum = \
+            cwp_inclusive_count_statistics[key][1] + int(inclusive_count)
+        inclusive_count_fraction_sum = \
+            cwp_inclusive_count_statistics[key][2] + \
+            float(inclusive_count_fraction)
+
+        # All the functions are initially marked as EXTRA_FUNCTION.
+        value = \
+            (dso_name, inclusive_count_sum, inclusive_count_fraction_sum,
+             EXTRA_FUNCTION)
+        cwp_inclusive_count_statistics[key] = value
+
+  return cwp_inclusive_count_statistics
+
+
+def ParseCWPPairwiseInclusiveCountFile(file_name):
+  """Parses the CWP pairwise inclusive count files.
+
+  A line of the file should contain a pair of a parent and a child function,
+  concatenated by the PARENT_CHILD_FUNCTIONS_SEPARATOR, the name of the file
+  where the child function is declared and the inclusive count fractions of
+  the pair of functions out of the total amount of inclusive count values.
+
+  Args:
+    file_name: The file containing the pairwise inclusive_count statistics of
+      the
+    CWP functions.
+
+  Returns:
+    A dict containing the statistics of the parent functions and each of
+    their child functions. The key of the dict is the name of the parent
+    function. The value is a dict having as a key the name of the child
+    function with its file name separated by a ',' and as a value the
+    inclusive count value of the parent-child function pair.
+  """
+  pairwise_inclusive_count_statistics = defaultdict(lambda: defaultdict(float))
+
+  with open(file_name) as input_file:
+    statistics_reader = csv.DictReader(input_file, delimiter=',')
+
+    for statistic in statistics_reader:
+      parent_function_name, child_function_name = \
+          statistic['parent_child_functions'].split(
+              PARENT_CHILD_FUNCTIONS_SEPARATOR)
+      child_function_file_name = MakeCWPAndPprofFileNamesConsistent(
+          os.path.normpath(statistic['child_function_file']))
+      inclusive_count = statistic['inclusive_count']
+
+      # There might be situations where a child function appears in
+      # multiple files or objects. Such situations can occur when in the
+      # Dremel queries are not specified the Chrome OS version and the
+      # name of the board (i.e the files can belong to different kernel or
+      # library versions), when the child function is a template function
+      # that is declared in a header file or there are name collisions
+      # between multiple executable objects.
+      # If a pair of child and parent functions appears multiple times, we
+      # add their inclusive count values.
+      child_function_key = ','.join(
+          [child_function_name, child_function_file_name])
+      pairwise_inclusive_count_statistics[parent_function_name] \
+          [child_function_key] += float(inclusive_count)
+
+  return pairwise_inclusive_count_statistics
diff --git a/user_activity_benchmarks/utils_unittest.py b/user_activity_benchmarks/utils_unittest.py
new file mode 100755
index 00000000..31bf83d3
--- /dev/null
+++ b/user_activity_benchmarks/utils_unittest.py
@@ -0,0 +1,133 @@
+#!/usr/bin/python2
+
+# Copyright 2016 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""Unit tests for the utility module."""
+
+import collections
+import csv
+import unittest
+
+import utils
+
+
+class UtilsTest(unittest.TestCase):
+  """Test class for utility module."""
+
+  def __init__(self, *args, **kwargs):
+    super(UtilsTest, self).__init__(*args, **kwargs)
+    self._pprof_top_csv_file = 'testdata/input/pprof_top_csv/file1.csv'
+    self._pprof_top_file = 'testdata/input/pprof_top/file1.pprof'
+    self._pprof_tree_csv_file = 'testdata/input/pprof_tree_csv/file1.csv'
+    self._pprof_tree_file = 'testdata/input/pprof_tree/file1.pprof'
+    self._pairwise_inclusive_count_test_file = \
+        'testdata/input/pairwise_inclusive_count_test.csv'
+    self._pairwise_inclusive_count_reference_file = \
+        'testdata/input/pairwise_inclusive_count_reference.csv'
+    self._inclusive_count_test_file = \
+       'testdata/input/inclusive_count_test.csv'
+    self._inclusive_count_reference_file = \
+        'testdata/input/inclusive_count_reference.csv'
+
+  def testParseFunctionGroups(self):
+    cwp_function_groups_lines = \
+        ['group1 /a\n', 'group2 /b\n', 'group3 /c\n', 'group4 /d\n']
+    expected_output = [('group1', '/a'), ('group2', '/b'), ('group3', '/c'),
+                       ('group4', '/d')]
+    result = utils.ParseFunctionGroups(cwp_function_groups_lines)
+
+    self.assertListEqual(expected_output, result)
+
+  def testParsePProfTopOutput(self):
+    result_pprof_top_output = utils.ParsePprofTopOutput(self._pprof_top_file)
+    expected_pprof_top_output = {}
+
+    with open(self._pprof_top_csv_file) as input_file:
+      statistics_reader = csv.DictReader(input_file, delimiter=',')
+
+      for statistic in statistics_reader:
+        if statistic['file']:
+          function_key = ','.join([statistic['function'], statistic['file']])
+        else:
+          function_key = statistic['function']
+        expected_pprof_top_output[function_key] = \
+            (statistic['flat'], statistic['flat_p'], statistic['sum_p'],
+             statistic['cum'], statistic['cum_p'])
+
+    self.assertDictEqual(result_pprof_top_output, expected_pprof_top_output)
+
+  def testParsePProfTreeOutput(self):
+    result_pprof_tree_output = utils.ParsePprofTreeOutput(self._pprof_tree_file)
+    expected_pprof_tree_output = collections.defaultdict(dict)
+
+    with open(self._pprof_tree_csv_file) as input_file:
+      statistics_reader = csv.DictReader(input_file, delimiter=',')
+
+      for statistic in statistics_reader:
+        parent_function_key = \
+            ','.join([statistic['parent_function'],
+                      statistic['parent_function_file']])
+        child_function_key = \
+            ','.join([statistic['child_function'],
+                      statistic['child_function_file']])
+
+        expected_pprof_tree_output[parent_function_key][child_function_key] = \
+            float(statistic['inclusive_count_fraction'])
+
+    self.assertDictEqual(result_pprof_tree_output, expected_pprof_tree_output)
+
+  def testParseCWPInclusiveCountFile(self):
+    expected_inclusive_statistics_test = \
+        {'func_i,/c/d/file_i': ('i', 5, 4.4, utils.EXTRA_FUNCTION),
+         'func_j,/e/file_j': ('j', 6, 5.5, utils.EXTRA_FUNCTION),
+         'func_f,/a/b/file_f': ('f', 4, 2.3, utils.EXTRA_FUNCTION),
+         'func_h,/c/d/file_h': ('h', 1, 3.3, utils.EXTRA_FUNCTION),
+         'func_k,/e/file_k': ('k', 7, 6.6, utils.EXTRA_FUNCTION),
+         'func_g,/a/b/file_g': ('g', 2, 2.2, utils.EXTRA_FUNCTION)}
+    expected_inclusive_statistics_reference = \
+        {'func_i,/c/d/file_i': ('i', 5, 4.0, utils.EXTRA_FUNCTION),
+         'func_j,/e/file_j': ('j', 6, 5.0, utils.EXTRA_FUNCTION),
+         'func_f,/a/b/file_f': ('f', 1, 1.0, utils.EXTRA_FUNCTION),
+         'func_l,/e/file_l': ('l', 7, 6.0, utils.EXTRA_FUNCTION),
+         'func_h,/c/d/file_h': ('h', 4, 3.0, utils.EXTRA_FUNCTION),
+         'func_g,/a/b/file_g': ('g', 5, 4.4, utils.EXTRA_FUNCTION)}
+    result_inclusive_statistics_test = \
+        utils.ParseCWPInclusiveCountFile(self._inclusive_count_test_file)
+    result_inclusive_statistics_reference = \
+        utils.ParseCWPInclusiveCountFile(self._inclusive_count_reference_file)
+
+    self.assertDictEqual(result_inclusive_statistics_test,
+                         expected_inclusive_statistics_test)
+    self.assertDictEqual(result_inclusive_statistics_reference,
+                         expected_inclusive_statistics_reference)
+
+  def testParseCWPPairwiseInclusiveCountFile(self):
+    expected_pairwise_inclusive_statistics_test = {
+        'func_f': {'func_g,/a/b/file_g2': 0.01,
+                   'func_h,/c/d/file_h': 0.02,
+                   'func_i,/c/d/file_i': 0.03},
+        'func_g': {'func_j,/e/file_j': 0.4,
+                   'func_m,/e/file_m': 0.6}
+    }
+    expected_pairwise_inclusive_statistics_reference = {
+        'func_f': {'func_g,/a/b/file_g': 0.1,
+                   'func_h,/c/d/file_h': 0.2,
+                   'func_i,/c/d/file_i': 0.3},
+        'func_g': {'func_j,/e/file_j': 0.4}
+    }
+    result_pairwise_inclusive_statistics_test = \
+        utils.ParseCWPPairwiseInclusiveCountFile(
+            self._pairwise_inclusive_count_test_file)
+    result_pairwise_inclusive_statistics_reference = \
+        utils.ParseCWPPairwiseInclusiveCountFile(
+            self._pairwise_inclusive_count_reference_file)
+
+    self.assertDictEqual(result_pairwise_inclusive_statistics_test,
+                         expected_pairwise_inclusive_statistics_test)
+    self.assertDictEqual(result_pairwise_inclusive_statistics_reference,
+                         expected_pairwise_inclusive_statistics_reference)
+
+
+if __name__ == '__main__':
+  unittest.main()
author	Evelina Dumitrescu <evelinad@google.com>	2016-09-28 15:13:29 -0700
committer	chrome-bot <chrome-bot@chromium.org>	2017-02-01 18:13:43 -0800
commit	c7faa09f456ca5c651ac373ad897aa4be6ad2717 (patch)
tree	d589cb479f102ba95248d93c783009c85ee50371
parent	edc6eb87218afbde3c3df6e0faa99958f709c825 (diff)
download	toolchain-utils-c7faa09f456ca5c651ac373ad897aa4be6ad2717.tar.gz