1 files changed, 45 insertions, 203 deletions
diff --git a/crosperf/results_cache.py b/crosperf/results_cache.py
index c5c85942..98062194 100644
--- a/crosperf/results_cache.py
+++ b/crosperf/results_cache.py
@@ -30,19 +30,10 @@ SCRATCH_DIR = os.path.expanduser('~/cros_scratch')
 RESULTS_FILE = 'results.txt'
 MACHINE_FILE = 'machine.txt'
 AUTOTEST_TARBALL = 'autotest.tbz2'
-RESULTS_TARBALL = 'results.tbz2'
 PERF_RESULTS_FILE = 'perf-results.txt'
 CACHE_KEYS_FILE = 'cache_keys.txt'
 
 
-class PidVerificationError(Exception):
-  """Error of perf PID verification in per-process mode."""
-
-
-class PerfDataReadError(Exception):
-  """Error of reading a perf.data header."""
-
-
 class Result(object):
   """Class for holding the results of a single test run.
 
@@ -66,7 +57,6 @@ class Result(object):
     self.results_file = []
     self.turbostat_log_file = ''
     self.cpustats_log_file = ''
-    self.cpuinfo_file = ''
     self.top_log_file = ''
     self.wait_time_log_file = ''
     self.chrome_version = ''
@@ -85,34 +75,22 @@ class Result(object):
     """Get the list of top commands consuming CPU on the machine."""
     return self.top_cmds
 
-  def FormatStringTopCommands(self):
-    """Get formatted string of top commands.
+  def FormatStringTop5(self):
+    """Get formatted top5 string.
 
-    Get the formatted string with top commands consuming CPU on DUT machine.
-    Number of "non-chrome" processes in the list is limited to 5.
+    Get the formatted string with top5 commands consuming CPU on DUT machine.
     """
     format_list = [
-        'Top commands with highest CPU usage:',
+        'Top 5 commands with highest CPU usage:',
         # Header.
         '%20s %9s %6s   %s' % ('COMMAND', 'AVG CPU%', 'COUNT', 'HIGHEST 5'),
         '-' * 50,
     ]
     if self.top_cmds:
-      # After switching to top processes we have to expand the list since there
-      # will be a lot of 'chrome' processes (up to 10, sometimes more) in the
-      # top.
-      # Let's limit the list size by the number of non-chrome processes.
-      limit_of_non_chrome_procs = 5
-      num_of_non_chrome_procs = 0
-      for topcmd in self.top_cmds:
-        print_line = '%20s %9.2f %6s   %s' % (
-            topcmd['cmd'], topcmd['cpu_use_avg'], topcmd['count'],
-            topcmd['top5_cpu_use'])
+      for topcmd in self.top_cmds[:5]:
+        print_line = '%20s %9.2f %6s   %s' % (topcmd['cmd'], topcmd['cpu_avg'],
+                                              topcmd['count'], topcmd['top5'])
         format_list.append(print_line)
-        if not topcmd['cmd'].startswith('chrome'):
-          num_of_non_chrome_procs += 1
-          if num_of_non_chrome_procs >= limit_of_non_chrome_procs:
-            break
     else:
       format_list.append('[NO DATA FROM THE TOP LOG]')
     format_list.append('-' * 50)
@@ -131,37 +109,10 @@ class Result(object):
         raise IOError('Could not copy results file: %s' % file_to_copy)
 
   def CopyResultsTo(self, dest_dir):
-    self.CopyFilesTo(dest_dir, self.results_file)
     self.CopyFilesTo(dest_dir, self.perf_data_files)
     self.CopyFilesTo(dest_dir, self.perf_report_files)
-    extra_files = []
-    if self.top_log_file:
-      extra_files.append(self.top_log_file)
-    if self.cpuinfo_file:
-      extra_files.append(self.cpuinfo_file)
-    if extra_files:
-      self.CopyFilesTo(dest_dir, extra_files)
-    if self.results_file or self.perf_data_files or self.perf_report_files:
-      self._logger.LogOutput('Results files stored in %s.' % dest_dir)
-
-  def CompressResultsTo(self, dest_dir):
-    tarball = os.path.join(self.results_dir, RESULTS_TARBALL)
-    # Test_that runs hold all output under TEST_NAME_HASHTAG/results/,
-    # while tast runs hold output under TEST_NAME/.
-    # Both ensure to be unique.
-    result_dir_name = self.test_name if self.suite == 'tast' else 'results'
-    results_dir = self.FindFilesInResultsDir('-name %s' %
-                                             result_dir_name).split('\n')[0]
-
-    if not results_dir:
-      self._logger.LogOutput('WARNING: No results dir matching %r found' %
-                             result_dir_name)
-      return
-
-    self.CreateTarball(results_dir, tarball)
-    self.CopyFilesTo(dest_dir, [tarball])
-    if results_dir:
-      self._logger.LogOutput('Results files compressed into %s.' % dest_dir)
+    if self.perf_data_files or self.perf_report_files:
+      self._logger.LogOutput('Perf results files stored in %s.' % dest_dir)
 
   def GetNewKeyvals(self, keyvals_dict):
     # Initialize 'units' dictionary.
@@ -247,8 +198,8 @@ class Result(object):
       command = 'cp -r {0}/* {1}'.format(self.results_dir, self.temp_dir)
       self.ce.RunCommand(command, print_to_console=False)
 
-    command = ('./generate_test_report --no-color --csv %s' %
-               (os.path.join('/tmp', os.path.basename(self.temp_dir))))
+    command = ('./generate_test_report --no-color --csv %s' % (os.path.join(
+        '/tmp', os.path.basename(self.temp_dir))))
     _, out, _ = self.ce.ChrootRunCommandWOutput(
         self.chromeos_root, command, print_to_console=False)
     keyvals_dict = {}
@@ -316,10 +267,7 @@ class Result(object):
     return [samples, u'samples']
 
   def GetResultsDir(self):
-    if self.suite == 'tast':
-      mo = re.search(r'Writing results to (\S+)', self.out)
-    else:
-      mo = re.search(r'Results placed in (\S+)', self.out)
+    mo = re.search(r'Results placed in (\S+)', self.out)
     if mo:
       result = mo.group(1)
       return result
@@ -365,10 +313,6 @@ class Result(object):
     """Get cpustats log path string."""
     return self.FindFilesInResultsDir('-name cpustats.log').split('\n')[0]
 
-  def GetCpuinfoFile(self):
-    """Get cpustats log path string."""
-    return self.FindFilesInResultsDir('-name cpuinfo.log').split('\n')[0]
-
   def GetTopFile(self):
     """Get cpustats log path string."""
     return self.FindFilesInResultsDir('-name top.log').split('\n')[0]
@@ -398,8 +342,8 @@ class Result(object):
                                                        perf_data_file)
       perf_report_file = '%s.report' % perf_data_file
       if os.path.exists(perf_report_file):
-        raise RuntimeError('Perf report file already exists: %s' %
-                           perf_report_file)
+        raise RuntimeError(
+            'Perf report file already exists: %s' % perf_report_file)
       chroot_perf_report_file = misc.GetInsideChrootPath(
           self.chromeos_root, perf_report_file)
       perf_path = os.path.join(self.chromeos_root, 'chroot', 'usr/bin/perf')
@@ -437,8 +381,8 @@ class Result(object):
         if self.log_level != 'verbose':
           self._logger.LogOutput('Perf report generated successfully.')
       else:
-        raise RuntimeError('Perf report not generated correctly. CMD: %s' %
-                           command)
+        raise RuntimeError(
+            'Perf report not generated correctly. CMD: %s' % command)
 
       # Add a keyval to the dictionary for the events captured.
       perf_report_files.append(
@@ -475,7 +419,6 @@ class Result(object):
     self.perf_report_files = self.GeneratePerfReportFiles()
     self.turbostat_log_file = self.GetTurbostatFile()
     self.cpustats_log_file = self.GetCpustatsFile()
-    self.cpuinfo_file = self.GetCpuinfoFile()
     self.top_log_file = self.GetTopFile()
     self.wait_time_log_file = self.GetWaitTimeFile()
     # TODO(asharif): Do something similar with perf stat.
@@ -592,9 +535,9 @@ class Result(object):
     Returns:
       List of dictionaries with the following keyvals:
        'cmd': command name (string),
-       'cpu_use_avg': average cpu usage (float),
+       'cpu_avg': average cpu usage (float),
        'count': number of occurrences (int),
-       'top5_cpu_use': up to 5 highest cpu usages (descending list of floats)
+       'top5': up to 5 highest cpu usages (descending list of floats)
 
     Example of the top log:
       PID USER      PR  NI    VIRT    RES    SHR S  %CPU  %MEM     TIME+ COMMAND
@@ -645,7 +588,7 @@ class Result(object):
           process = {
               # NOTE: One command may be represented by multiple processes.
               'cmd': match.group('cmd'),
-              'pid': match.group('pid'),
+              'pid': int(match.group('pid')),
               'cpu_use': float(match.group('cpu_use')),
           }
 
@@ -667,10 +610,7 @@ class Result(object):
     # not running.
     # On 1-core DUT 90% chrome cpu load occurs in 55%, 95% in 33% and 100% in 2%
     # of snapshots accordingly.
-    # Threshold of "high load" is reduced to 70% (from 90) when we switched to
-    # topstats per process. From experiment data the rest 20% are distributed
-    # among other chrome processes.
-    CHROME_HIGH_CPU_LOAD = 70
+    CHROME_HIGH_CPU_LOAD = 90
     # Number of snapshots where chrome is heavily used.
     high_load_snapshots = 0
     # Total CPU use per process in ALL active snapshots.
@@ -681,41 +621,35 @@ class Result(object):
     topcmds = []
 
     for snapshot_processes in snapshots:
-      # CPU usage per command, per PID in one snapshot.
-      cmd_cpu_use_per_snapshot = collections.defaultdict(dict)
+      # CPU usage per command in one snapshot.
+      cmd_cpu_use_per_snapshot = collections.defaultdict(float)
       for process in snapshot_processes:
         cmd = process['cmd']
         cpu_use = process['cpu_use']
-        pid = process['pid']
-        cmd_cpu_use_per_snapshot[cmd][pid] = cpu_use
 
-      # Chrome processes, pid: cpu_usage.
-      chrome_processes = cmd_cpu_use_per_snapshot.get('chrome', {})
-      chrome_cpu_use_list = chrome_processes.values()
+        # Collect CPU usage per command.
+        cmd_cpu_use_per_snapshot[cmd] += cpu_use
 
-      if chrome_cpu_use_list and max(
-          chrome_cpu_use_list) > CHROME_HIGH_CPU_LOAD:
-        # CPU usage of any of the "chrome" processes exceeds "High load"
-        # threshold which means DUT is busy running a benchmark.
+      if cmd_cpu_use_per_snapshot.setdefault('chrome',
+                                             0.0) > CHROME_HIGH_CPU_LOAD:
+        # Combined CPU usage of "chrome" command exceeds "High load" threshold
+        # which means DUT is busy running a benchmark.
         high_load_snapshots += 1
-        for cmd, cpu_use_per_pid in cmd_cpu_use_per_snapshot.items():
-          for pid, cpu_use in cpu_use_per_pid.items():
-            # Append PID to the name of the command.
-            cmd_with_pid = cmd + '-' + pid
-            cmd_total_cpu_use[cmd_with_pid] += cpu_use
+        for cmd, cpu_use in cmd_cpu_use_per_snapshot.items():
+          # Update total CPU usage.
+          cmd_total_cpu_use[cmd] += cpu_use
 
-            # Add cpu_use into command top cpu usages, sorted in descending
-            # order.
-            heapq.heappush(cmd_top5_cpu_use[cmd_with_pid], round(cpu_use, 1))
+          # Add cpu_use into command top cpu usages, sorted in descending order.
+          heapq.heappush(cmd_top5_cpu_use[cmd], round(cpu_use, 1))
 
     for consumer, usage in sorted(
         cmd_total_cpu_use.items(), key=lambda x: x[1], reverse=True):
       # Iterate through commands by descending order of total CPU usage.
       topcmd = {
           'cmd': consumer,
-          'cpu_use_avg': usage / high_load_snapshots,
+          'cpu_avg': usage / high_load_snapshots,
           'count': len(cmd_top5_cpu_use[consumer]),
-          'top5_cpu_use': heapq.nlargest(5, cmd_top5_cpu_use[consumer]),
+          'top5': heapq.nlargest(5, cmd_top5_cpu_use[consumer]),
       }
       topcmds.append(topcmd)
 
@@ -852,88 +786,6 @@ class Result(object):
       keyvals[key] = [result, unit]
     return keyvals
 
-  def ReadPidFromPerfData(self):
-    """Read PIDs from perf.data files.
-
-    Extract PID from perf.data if "perf record" was running per process,
-    i.e. with "-p <PID>" and no "-a".
-
-    Returns:
-      pids: list of PIDs.
-
-    Raises:
-      PerfDataReadError when perf.data header reading fails.
-    """
-    cmd = ['/usr/bin/perf', 'report', '--header-only', '-i']
-    pids = []
-
-    for perf_data_path in self.perf_data_files:
-      perf_data_path_in_chroot = misc.GetInsideChrootPath(
-          self.chromeos_root, perf_data_path)
-      path_str = ' '.join(cmd + [perf_data_path_in_chroot])
-      status, output, _ = self.ce.ChrootRunCommandWOutput(
-          self.chromeos_root, path_str)
-      if status:
-        # Error of reading a perf.data profile is fatal.
-        raise PerfDataReadError(f'Failed to read perf.data profile: {path_str}')
-
-      # Pattern to search a line with "perf record" command line:
-      # # cmdline : /usr/bin/perf record -e instructions -p 123"
-      cmdline_regex = re.compile(
-          r'^\#\scmdline\s:\s+(?P<cmd>.*perf\s+record\s+.*)$')
-      # Pattern to search PID in a command line.
-      pid_regex = re.compile(r'^.*\s-p\s(?P<pid>\d+)\s*.*$')
-      for line in output.splitlines():
-        cmd_match = cmdline_regex.match(line)
-        if cmd_match:
-          # Found a perf command line.
-          cmdline = cmd_match.group('cmd')
-          # '-a' is a system-wide mode argument.
-          if '-a' not in cmdline.split():
-            # It can be that perf was attached to PID and was still running in
-            # system-wide mode.
-            # We filter out this case here since it's not per-process.
-            pid_match = pid_regex.match(cmdline)
-            if pid_match:
-              pids.append(pid_match.group('pid'))
-          # Stop the search and move to the next perf.data file.
-          break
-      else:
-        # cmdline wasn't found in the header. It's a fatal error.
-        raise PerfDataReadError(f'Perf command line is not found in {path_str}')
-    return pids
-
-  def VerifyPerfDataPID(self):
-    """Verify PIDs in per-process perf.data profiles.
-
-    Check that at list one top process is profiled if perf was running in
-    per-process mode.
-
-    Raises:
-      PidVerificationError if PID verification of per-process perf.data profiles
-      fail.
-    """
-    perf_data_pids = self.ReadPidFromPerfData()
-    if not perf_data_pids:
-      # In system-wide mode there are no PIDs.
-      self._logger.LogOutput('System-wide perf mode. Skip verification.')
-      return
-
-    # PIDs will be present only in per-process profiles.
-    # In this case we need to verify that profiles are collected on the
-    # hottest processes.
-    top_processes = [top_cmd['cmd'] for top_cmd in self.top_cmds]
-    # top_process structure: <cmd>-<pid>
-    top_pids = [top_process.split('-')[-1] for top_process in top_processes]
-    for top_pid in top_pids:
-      if top_pid in perf_data_pids:
-        self._logger.LogOutput('PID verification passed! '
-                               f'Top process {top_pid} is profiled.')
-        return
-    raise PidVerificationError(
-        f'top processes {top_processes} are missing in perf.data traces with'
-        f' PID: {perf_data_pids}.')
-
   def ProcessResults(self, use_cache=False):
     # Note that this function doesn't know anything about whether there is a
     # cache hit or miss. It should process results agnostic of the cache hit
@@ -972,9 +824,6 @@ class Result(object):
       cpustats = self.ProcessCpustatsResults()
     if self.top_log_file:
       self.top_cmds = self.ProcessTopResults()
-    # Verify that PID in non system-wide perf.data and top_cmds are matching.
-    if self.perf_data_files and self.top_cmds:
-      self.VerifyPerfDataPID()
     if self.wait_time_log_file:
       with open(self.wait_time_log_file) as f:
         wait_time = f.readline().strip()
@@ -1053,19 +902,6 @@ class Result(object):
       command = 'rm -rf %s' % self.temp_dir
       self.ce.RunCommand(command)
 
-  def CreateTarball(self, results_dir, tarball):
-    if not results_dir.strip():
-      raise ValueError('Refusing to `tar` an empty results_dir: %r' %
-                       results_dir)
-
-    ret = self.ce.RunCommand('cd %s && '
-                             'tar '
-                             '--exclude=var/spool '
-                             '--exclude=var/log '
-                             '-cjf %s .' % (results_dir, tarball))
-    if ret:
-      raise RuntimeError("Couldn't compress test output directory.")
-
   def StoreToCacheDir(self, cache_dir, machine_manager, key_list):
     # Create the dir if it doesn't exist.
     temp_dir = tempfile.mkdtemp()
@@ -1087,8 +923,14 @@ class Result(object):
 
     if self.results_dir:
       tarball = os.path.join(temp_dir, AUTOTEST_TARBALL)
-      self.CreateTarball(self.results_dir, tarball)
-
+      command = ('cd %s && '
+                 'tar '
+                 '--exclude=var/spool '
+                 '--exclude=var/log '
+                 '-cjf %s .' % (self.results_dir, tarball))
+      ret = self.ce.RunCommand(command)
+      if ret:
+        raise RuntimeError("Couldn't store autotest output directory.")
     # Store machine info.
     # TODO(asharif): Make machine_manager a singleton, and don't pass it into
     # this function.
@@ -1106,8 +948,8 @@ class Result(object):
     if ret:
       command = 'rm -rf {0}'.format(temp_dir)
       self.ce.RunCommand(command)
-      raise RuntimeError('Could not move dir %s to dir %s' %
-                         (temp_dir, cache_dir))
+      raise RuntimeError(
+          'Could not move dir %s to dir %s' % (temp_dir, cache_dir))
 
   @classmethod
   def CreateFromRun(cls,