1 files changed, 332 insertions, 0 deletions
diff --git a/cros_utils/perf_diff.py b/cros_utils/perf_diff.py
new file mode 100755
index 00000000..31cde994
--- /dev/null
+++ b/cros_utils/perf_diff.py
@@ -0,0 +1,332 @@
+#!/usr/bin/env python2
+# Copyright 2012 Google Inc. All Rights Reserved.
+"""One-line documentation for perf_diff module.
+
+A detailed description of perf_diff.
+"""
+
+from __future__ import print_function
+
+__author__ = 'asharif@google.com (Ahmad Sharif)'
+
+import argparse
+import re
+import sys
+
+import misc
+import tabulator
+
+ROWS_TO_SHOW = 'Rows_to_show_in_the_perf_table'
+TOTAL_EVENTS = 'Total_events_of_this_profile'
+
+
+def GetPerfDictFromReport(report_file):
+  output = {}
+  perf_report = PerfReport(report_file)
+  for k, v in perf_report.sections.items():
+    if k not in output:
+      output[k] = {}
+    output[k][ROWS_TO_SHOW] = 0
+    output[k][TOTAL_EVENTS] = 0
+    for function in v.functions:
+      out_key = '%s' % (function.name)
+      output[k][out_key] = function.count
+      output[k][TOTAL_EVENTS] += function.count
+      if function.percent > 1:
+        output[k][ROWS_TO_SHOW] += 1
+  return output
+
+
+def _SortDictionaryByValue(d):
+  l = [(k, v) for (k, v) in d.iteritems()]
+
+  def GetFloat(x):
+    if misc.IsFloat(x):
+      return float(x)
+    else:
+      return x
+
+  sorted_l = sorted(l, key=lambda x: GetFloat(x[1]))
+  sorted_l.reverse()
+  return [f[0] for f in sorted_l]
+
+
+class Tabulator(object):
+  """Make tables."""
+
+  def __init__(self, all_dicts):
+    self._all_dicts = all_dicts
+
+  def PrintTable(self):
+    for dicts in self._all_dicts:
+      self.PrintTableHelper(dicts)
+
+  def PrintTableHelper(self, dicts):
+    """Transfrom dicts to tables."""
+    fields = {}
+    for d in dicts:
+      for f in d.keys():
+        if f not in fields:
+          fields[f] = d[f]
+        else:
+          fields[f] = max(fields[f], d[f])
+    table = []
+    header = ['name']
+    for i in range(len(dicts)):
+      header.append(i)
+
+    table.append(header)
+
+    sorted_fields = _SortDictionaryByValue(fields)
+
+    for f in sorted_fields:
+      row = [f]
+      for d in dicts:
+        if f in d:
+          row.append(d[f])
+        else:
+          row.append('0')
+      table.append(row)
+
+    print(tabulator.GetSimpleTable(table))
+
+
+class Function(object):
+  """Function for formatting."""
+
+  def __init__(self):
+    self.count = 0
+    self.name = ''
+    self.percent = 0
+
+
+class Section(object):
+  """Section formatting."""
+
+  def __init__(self, contents):
+    self.name = ''
+    self.raw_contents = contents
+    self._ParseSection()
+
+  def _ParseSection(self):
+    matches = re.findall(r'Events: (\w+)\s+(.*)', self.raw_contents)
+    assert len(matches) <= 1, 'More than one event found in 1 section'
+    if not matches:
+      return
+    match = matches[0]
+    self.name = match[1]
+    self.count = misc.UnitToNumber(match[0])
+
+    self.functions = []
+    for line in self.raw_contents.splitlines():
+      if not line.strip():
+        continue
+      if '%' not in line:
+        continue
+      if not line.startswith('#'):
+        fields = [f for f in line.split(' ') if f]
+        function = Function()
+        function.percent = float(fields[0].strip('%'))
+        function.count = int(fields[1])
+        function.name = ' '.join(fields[2:])
+        self.functions.append(function)
+
+
+class PerfReport(object):
+  """Get report from raw report."""
+
+  def __init__(self, perf_file):
+    self.perf_file = perf_file
+    self._ReadFile()
+    self.sections = {}
+    self.metadata = {}
+    self._section_contents = []
+    self._section_header = ''
+    self._SplitSections()
+    self._ParseSections()
+    self._ParseSectionHeader()
+
+  def _ParseSectionHeader(self):
+    """Parse a header of a perf report file."""
+    # The "captured on" field is inaccurate - this actually refers to when the
+    # report was generated, not when the data was captured.
+    for line in self._section_header.splitlines():
+      line = line[2:]
+      if ':' in line:
+        key, val = line.strip().split(':', 1)
+        key = key.strip()
+        val = val.strip()
+        self.metadata[key] = val
+
+  def _ReadFile(self):
+    self._perf_contents = open(self.perf_file).read()
+
+  def _ParseSections(self):
+    self.event_counts = {}
+    self.sections = {}
+    for section_content in self._section_contents:
+      section = Section(section_content)
+      section.name = self._GetHumanReadableName(section.name)
+      self.sections[section.name] = section
+
+  # TODO(asharif): Do this better.
+  def _GetHumanReadableName(self, section_name):
+    if not 'raw' in section_name:
+      return section_name
+    raw_number = section_name.strip().split(' ')[-1]
+    for line in self._section_header.splitlines():
+      if raw_number in line:
+        name = line.strip().split(' ')[5]
+        return name
+
+  def _SplitSections(self):
+    self._section_contents = []
+    indices = [m.start() for m in re.finditer('# Events:', self._perf_contents)]
+    indices.append(len(self._perf_contents))
+    for i in range(len(indices) - 1):
+      section_content = self._perf_contents[indices[i]:indices[i + 1]]
+      self._section_contents.append(section_content)
+    self._section_header = ''
+    if indices:
+      self._section_header = self._perf_contents[0:indices[0]]
+
+
+class PerfDiffer(object):
+  """Perf differ class."""
+
+  def __init__(self, reports, num_symbols, common_only):
+    self._reports = reports
+    self._num_symbols = num_symbols
+    self._common_only = common_only
+    self._common_function_names = {}
+
+  def DoDiff(self):
+    """The function that does the diff."""
+    section_names = self._FindAllSections()
+
+    filename_dicts = []
+    summary_dicts = []
+    for report in self._reports:
+      d = {}
+      filename_dicts.append({'file': report.perf_file})
+      for section_name in section_names:
+        if section_name in report.sections:
+          d[section_name] = report.sections[section_name].count
+      summary_dicts.append(d)
+
+    all_dicts = [filename_dicts, summary_dicts]
+
+    for section_name in section_names:
+      function_names = self._GetTopFunctions(section_name, self._num_symbols)
+      self._FindCommonFunctions(section_name)
+      dicts = []
+      for report in self._reports:
+        d = {}
+        if section_name in report.sections:
+          section = report.sections[section_name]
+
+          # Get a common scaling factor for this report.
+          common_scaling_factor = self._GetCommonScalingFactor(section)
+
+          for function in section.functions:
+            if function.name in function_names:
+              key = '%s %s' % (section.name, function.name)
+              d[key] = function.count
+              # Compute a factor to scale the function count by in common_only
+              # mode.
+              if self._common_only and (
+                  function.name in self._common_function_names[section.name]):
+                d[key + ' scaled'] = common_scaling_factor * function.count
+        dicts.append(d)
+
+      all_dicts.append(dicts)
+
+    mytabulator = Tabulator(all_dicts)
+    mytabulator.PrintTable()
+
+  def _FindAllSections(self):
+    sections = {}
+    for report in self._reports:
+      for section in report.sections.values():
+        if section.name not in sections:
+          sections[section.name] = section.count
+        else:
+          sections[section.name] = max(sections[section.name], section.count)
+    return _SortDictionaryByValue(sections)
+
+  def _GetCommonScalingFactor(self, section):
+    unique_count = self._GetCount(
+        section, lambda x: x in self._common_function_names[section.name])
+    return 100.0 / unique_count
+
+  def _GetCount(self, section, filter_fun=None):
+    total_count = 0
+    for function in section.functions:
+      if not filter_fun or filter_fun(function.name):
+        total_count += int(function.count)
+    return total_count
+
+  def _FindCommonFunctions(self, section_name):
+    function_names_list = []
+    for report in self._reports:
+      if section_name in report.sections:
+        section = report.sections[section_name]
+        function_names = [f.name for f in section.functions]
+        function_names_list.append(function_names)
+
+    self._common_function_names[section_name] = (
+        reduce(set.intersection, map(set, function_names_list)))
+
+  def _GetTopFunctions(self, section_name, num_functions):
+    all_functions = {}
+    for report in self._reports:
+      if section_name in report.sections:
+        section = report.sections[section_name]
+        for f in section.functions[:num_functions]:
+          if f.name in all_functions:
+            all_functions[f.name] = max(all_functions[f.name], f.count)
+          else:
+            all_functions[f.name] = f.count
+    # FIXME(asharif): Don't really need to sort these...
+    return _SortDictionaryByValue(all_functions)
+
+  def _GetFunctionsDict(self, section, function_names):
+    d = {}
+    for function in section.functions:
+      if function.name in function_names:
+        d[function.name] = function.count
+    return d
+
+
+def Main(argv):
+  """The entry of the main."""
+  parser = argparse.ArgumentParser()
+  parser.add_argument('-n',
+                      '--num_symbols',
+                      dest='num_symbols',
+                      default='5',
+                      help='The number of symbols to show.')
+  parser.add_argument('-c',
+                      '--common_only',
+                      dest='common_only',
+                      action='store_true',
+                      default=False,
+                      help='Diff common symbols only.')
+
+  options, args = parser.parse_known_args(argv)
+
+  try:
+    reports = []
+    for report in args[1:]:
+      report = PerfReport(report)
+      reports.append(report)
+    pd = PerfDiffer(reports, int(options.num_symbols), options.common_only)
+    pd.DoDiff()
+  finally:
+    pass
+
+  return 0
+
+
+if __name__ == '__main__':
+  sys.exit(Main(sys.argv))