1 files changed, 203 insertions, 45 deletions
diff --git a/crosperf/results_cache.py b/crosperf/results_cache.py
index 98062194..c5c85942 100644
--- a/crosperf/results_cache.py
+++ b/crosperf/results_cache.py
@@ -30,10 +30,19 @@ SCRATCH_DIR = os.path.expanduser('~/cros_scratch')
 RESULTS_FILE = 'results.txt'
 MACHINE_FILE = 'machine.txt'
 AUTOTEST_TARBALL = 'autotest.tbz2'
+RESULTS_TARBALL = 'results.tbz2'
 PERF_RESULTS_FILE = 'perf-results.txt'
 CACHE_KEYS_FILE = 'cache_keys.txt'
 
 
+class PidVerificationError(Exception):
+  """Error of perf PID verification in per-process mode."""
+
+
+class PerfDataReadError(Exception):
+  """Error of reading a perf.data header."""
+
+
 class Result(object):
   """Class for holding the results of a single test run.
 
@@ -57,6 +66,7 @@ class Result(object):
     self.results_file = []
     self.turbostat_log_file = ''
     self.cpustats_log_file = ''
+    self.cpuinfo_file = ''
     self.top_log_file = ''
     self.wait_time_log_file = ''
     self.chrome_version = ''
@@ -75,22 +85,34 @@ class Result(object):
     """Get the list of top commands consuming CPU on the machine."""
     return self.top_cmds
 
-  def FormatStringTop5(self):
-    """Get formatted top5 string.
+  def FormatStringTopCommands(self):
+    """Get formatted string of top commands.
 
-    Get the formatted string with top5 commands consuming CPU on DUT machine.
+    Get the formatted string with top commands consuming CPU on DUT machine.
+    Number of "non-chrome" processes in the list is limited to 5.
     """
     format_list = [
-        'Top 5 commands with highest CPU usage:',
+        'Top commands with highest CPU usage:',
         # Header.
         '%20s %9s %6s   %s' % ('COMMAND', 'AVG CPU%', 'COUNT', 'HIGHEST 5'),
         '-' * 50,
     ]
     if self.top_cmds:
-      for topcmd in self.top_cmds[:5]:
-        print_line = '%20s %9.2f %6s   %s' % (topcmd['cmd'], topcmd['cpu_avg'],
-                                              topcmd['count'], topcmd['top5'])
+      # After switching to top processes we have to expand the list since there
+      # will be a lot of 'chrome' processes (up to 10, sometimes more) in the
+      # top.
+      # Let's limit the list size by the number of non-chrome processes.
+      limit_of_non_chrome_procs = 5
+      num_of_non_chrome_procs = 0
+      for topcmd in self.top_cmds:
+        print_line = '%20s %9.2f %6s   %s' % (
+            topcmd['cmd'], topcmd['cpu_use_avg'], topcmd['count'],
+            topcmd['top5_cpu_use'])
         format_list.append(print_line)
+        if not topcmd['cmd'].startswith('chrome'):
+          num_of_non_chrome_procs += 1
+          if num_of_non_chrome_procs >= limit_of_non_chrome_procs:
+            break
     else:
       format_list.append('[NO DATA FROM THE TOP LOG]')
     format_list.append('-' * 50)
@@ -109,10 +131,37 @@ class Result(object):
         raise IOError('Could not copy results file: %s' % file_to_copy)
 
   def CopyResultsTo(self, dest_dir):
+    self.CopyFilesTo(dest_dir, self.results_file)
     self.CopyFilesTo(dest_dir, self.perf_data_files)
     self.CopyFilesTo(dest_dir, self.perf_report_files)
-    if self.perf_data_files or self.perf_report_files:
-      self._logger.LogOutput('Perf results files stored in %s.' % dest_dir)
+    extra_files = []
+    if self.top_log_file:
+      extra_files.append(self.top_log_file)
+    if self.cpuinfo_file:
+      extra_files.append(self.cpuinfo_file)
+    if extra_files:
+      self.CopyFilesTo(dest_dir, extra_files)
+    if self.results_file or self.perf_data_files or self.perf_report_files:
+      self._logger.LogOutput('Results files stored in %s.' % dest_dir)
+
+  def CompressResultsTo(self, dest_dir):
+    tarball = os.path.join(self.results_dir, RESULTS_TARBALL)
+    # Test_that runs hold all output under TEST_NAME_HASHTAG/results/,
+    # while tast runs hold output under TEST_NAME/.
+    # Both ensure to be unique.
+    result_dir_name = self.test_name if self.suite == 'tast' else 'results'
+    results_dir = self.FindFilesInResultsDir('-name %s' %
+                                             result_dir_name).split('\n')[0]
+
+    if not results_dir:
+      self._logger.LogOutput('WARNING: No results dir matching %r found' %
+                             result_dir_name)
+      return
+
+    self.CreateTarball(results_dir, tarball)
+    self.CopyFilesTo(dest_dir, [tarball])
+    if results_dir:
+      self._logger.LogOutput('Results files compressed into %s.' % dest_dir)
 
   def GetNewKeyvals(self, keyvals_dict):
     # Initialize 'units' dictionary.
@@ -198,8 +247,8 @@ class Result(object):
       command = 'cp -r {0}/* {1}'.format(self.results_dir, self.temp_dir)
       self.ce.RunCommand(command, print_to_console=False)
 
-    command = ('./generate_test_report --no-color --csv %s' % (os.path.join(
-        '/tmp', os.path.basename(self.temp_dir))))
+    command = ('./generate_test_report --no-color --csv %s' %
+               (os.path.join('/tmp', os.path.basename(self.temp_dir))))
     _, out, _ = self.ce.ChrootRunCommandWOutput(
         self.chromeos_root, command, print_to_console=False)
     keyvals_dict = {}
@@ -267,7 +316,10 @@ class Result(object):
     return [samples, u'samples']
 
   def GetResultsDir(self):
-    mo = re.search(r'Results placed in (\S+)', self.out)
+    if self.suite == 'tast':
+      mo = re.search(r'Writing results to (\S+)', self.out)
+    else:
+      mo = re.search(r'Results placed in (\S+)', self.out)
     if mo:
       result = mo.group(1)
       return result
@@ -313,6 +365,10 @@ class Result(object):
     """Get cpustats log path string."""
     return self.FindFilesInResultsDir('-name cpustats.log').split('\n')[0]
 
+  def GetCpuinfoFile(self):
+    """Get cpustats log path string."""
+    return self.FindFilesInResultsDir('-name cpuinfo.log').split('\n')[0]
+
   def GetTopFile(self):
     """Get cpustats log path string."""
     return self.FindFilesInResultsDir('-name top.log').split('\n')[0]
@@ -342,8 +398,8 @@ class Result(object):
                                                        perf_data_file)
       perf_report_file = '%s.report' % perf_data_file
       if os.path.exists(perf_report_file):
-        raise RuntimeError(
-            'Perf report file already exists: %s' % perf_report_file)
+        raise RuntimeError('Perf report file already exists: %s' %
+                           perf_report_file)
       chroot_perf_report_file = misc.GetInsideChrootPath(
           self.chromeos_root, perf_report_file)
       perf_path = os.path.join(self.chromeos_root, 'chroot', 'usr/bin/perf')
@@ -381,8 +437,8 @@ class Result(object):
         if self.log_level != 'verbose':
           self._logger.LogOutput('Perf report generated successfully.')
       else:
-        raise RuntimeError(
-            'Perf report not generated correctly. CMD: %s' % command)
+        raise RuntimeError('Perf report not generated correctly. CMD: %s' %
+                           command)
 
       # Add a keyval to the dictionary for the events captured.
       perf_report_files.append(
@@ -419,6 +475,7 @@ class Result(object):
     self.perf_report_files = self.GeneratePerfReportFiles()
     self.turbostat_log_file = self.GetTurbostatFile()
     self.cpustats_log_file = self.GetCpustatsFile()
+    self.cpuinfo_file = self.GetCpuinfoFile()
     self.top_log_file = self.GetTopFile()
     self.wait_time_log_file = self.GetWaitTimeFile()
     # TODO(asharif): Do something similar with perf stat.
@@ -535,9 +592,9 @@ class Result(object):
     Returns:
       List of dictionaries with the following keyvals:
        'cmd': command name (string),
-       'cpu_avg': average cpu usage (float),
+       'cpu_use_avg': average cpu usage (float),
        'count': number of occurrences (int),
-       'top5': up to 5 highest cpu usages (descending list of floats)
+       'top5_cpu_use': up to 5 highest cpu usages (descending list of floats)
 
     Example of the top log:
       PID USER      PR  NI    VIRT    RES    SHR S  %CPU  %MEM     TIME+ COMMAND
@@ -588,7 +645,7 @@ class Result(object):
           process = {
               # NOTE: One command may be represented by multiple processes.
               'cmd': match.group('cmd'),
-              'pid': int(match.group('pid')),
+              'pid': match.group('pid'),
               'cpu_use': float(match.group('cpu_use')),
           }
 
@@ -610,7 +667,10 @@ class Result(object):
     # not running.
     # On 1-core DUT 90% chrome cpu load occurs in 55%, 95% in 33% and 100% in 2%
     # of snapshots accordingly.
-    CHROME_HIGH_CPU_LOAD = 90
+    # Threshold of "high load" is reduced to 70% (from 90) when we switched to
+    # topstats per process. From experiment data the rest 20% are distributed
+    # among other chrome processes.
+    CHROME_HIGH_CPU_LOAD = 70
     # Number of snapshots where chrome is heavily used.
     high_load_snapshots = 0
     # Total CPU use per process in ALL active snapshots.
@@ -621,35 +681,41 @@ class Result(object):
     topcmds = []
 
     for snapshot_processes in snapshots:
-      # CPU usage per command in one snapshot.
-      cmd_cpu_use_per_snapshot = collections.defaultdict(float)
+      # CPU usage per command, per PID in one snapshot.
+      cmd_cpu_use_per_snapshot = collections.defaultdict(dict)
       for process in snapshot_processes:
         cmd = process['cmd']
         cpu_use = process['cpu_use']
+        pid = process['pid']
+        cmd_cpu_use_per_snapshot[cmd][pid] = cpu_use
 
-        # Collect CPU usage per command.
-        cmd_cpu_use_per_snapshot[cmd] += cpu_use
+      # Chrome processes, pid: cpu_usage.
+      chrome_processes = cmd_cpu_use_per_snapshot.get('chrome', {})
+      chrome_cpu_use_list = chrome_processes.values()
 
-      if cmd_cpu_use_per_snapshot.setdefault('chrome',
-                                             0.0) > CHROME_HIGH_CPU_LOAD:
-        # Combined CPU usage of "chrome" command exceeds "High load" threshold
-        # which means DUT is busy running a benchmark.
+      if chrome_cpu_use_list and max(
+          chrome_cpu_use_list) > CHROME_HIGH_CPU_LOAD:
+        # CPU usage of any of the "chrome" processes exceeds "High load"
+        # threshold which means DUT is busy running a benchmark.
         high_load_snapshots += 1
-        for cmd, cpu_use in cmd_cpu_use_per_snapshot.items():
-          # Update total CPU usage.
-          cmd_total_cpu_use[cmd] += cpu_use
+        for cmd, cpu_use_per_pid in cmd_cpu_use_per_snapshot.items():
+          for pid, cpu_use in cpu_use_per_pid.items():
+            # Append PID to the name of the command.
+            cmd_with_pid = cmd + '-' + pid
+            cmd_total_cpu_use[cmd_with_pid] += cpu_use
 
-          # Add cpu_use into command top cpu usages, sorted in descending order.
-          heapq.heappush(cmd_top5_cpu_use[cmd], round(cpu_use, 1))
+            # Add cpu_use into command top cpu usages, sorted in descending
+            # order.
+            heapq.heappush(cmd_top5_cpu_use[cmd_with_pid], round(cpu_use, 1))
 
     for consumer, usage in sorted(
         cmd_total_cpu_use.items(), key=lambda x: x[1], reverse=True):
       # Iterate through commands by descending order of total CPU usage.
       topcmd = {
           'cmd': consumer,
-          'cpu_avg': usage / high_load_snapshots,
+          'cpu_use_avg': usage / high_load_snapshots,
           'count': len(cmd_top5_cpu_use[consumer]),
-          'top5': heapq.nlargest(5, cmd_top5_cpu_use[consumer]),
+          'top5_cpu_use': heapq.nlargest(5, cmd_top5_cpu_use[consumer]),
       }
       topcmds.append(topcmd)
 
@@ -786,6 +852,88 @@ class Result(object):
       keyvals[key] = [result, unit]
     return keyvals
 
+  def ReadPidFromPerfData(self):
+    """Read PIDs from perf.data files.
+
+    Extract PID from perf.data if "perf record" was running per process,
+    i.e. with "-p <PID>" and no "-a".
+
+    Returns:
+      pids: list of PIDs.
+
+    Raises:
+      PerfDataReadError when perf.data header reading fails.
+    """
+    cmd = ['/usr/bin/perf', 'report', '--header-only', '-i']
+    pids = []
+
+    for perf_data_path in self.perf_data_files:
+      perf_data_path_in_chroot = misc.GetInsideChrootPath(
+          self.chromeos_root, perf_data_path)
+      path_str = ' '.join(cmd + [perf_data_path_in_chroot])
+      status, output, _ = self.ce.ChrootRunCommandWOutput(
+          self.chromeos_root, path_str)
+      if status:
+        # Error of reading a perf.data profile is fatal.
+        raise PerfDataReadError(f'Failed to read perf.data profile: {path_str}')
+
+      # Pattern to search a line with "perf record" command line:
+      # # cmdline : /usr/bin/perf record -e instructions -p 123"
+      cmdline_regex = re.compile(
+          r'^\#\scmdline\s:\s+(?P<cmd>.*perf\s+record\s+.*)$')
+      # Pattern to search PID in a command line.
+      pid_regex = re.compile(r'^.*\s-p\s(?P<pid>\d+)\s*.*$')
+      for line in output.splitlines():
+        cmd_match = cmdline_regex.match(line)
+        if cmd_match:
+          # Found a perf command line.
+          cmdline = cmd_match.group('cmd')
+          # '-a' is a system-wide mode argument.
+          if '-a' not in cmdline.split():
+            # It can be that perf was attached to PID and was still running in
+            # system-wide mode.
+            # We filter out this case here since it's not per-process.
+            pid_match = pid_regex.match(cmdline)
+            if pid_match:
+              pids.append(pid_match.group('pid'))
+          # Stop the search and move to the next perf.data file.
+          break
+      else:
+        # cmdline wasn't found in the header. It's a fatal error.
+        raise PerfDataReadError(f'Perf command line is not found in {path_str}')
+    return pids
+
+  def VerifyPerfDataPID(self):
+    """Verify PIDs in per-process perf.data profiles.
+
+    Check that at list one top process is profiled if perf was running in
+    per-process mode.
+
+    Raises:
+      PidVerificationError if PID verification of per-process perf.data profiles
+      fail.
+    """
+    perf_data_pids = self.ReadPidFromPerfData()
+    if not perf_data_pids:
+      # In system-wide mode there are no PIDs.
+      self._logger.LogOutput('System-wide perf mode. Skip verification.')
+      return
+
+    # PIDs will be present only in per-process profiles.
+    # In this case we need to verify that profiles are collected on the
+    # hottest processes.
+    top_processes = [top_cmd['cmd'] for top_cmd in self.top_cmds]
+    # top_process structure: <cmd>-<pid>
+    top_pids = [top_process.split('-')[-1] for top_process in top_processes]
+    for top_pid in top_pids:
+      if top_pid in perf_data_pids:
+        self._logger.LogOutput('PID verification passed! '
+                               f'Top process {top_pid} is profiled.')
+        return
+    raise PidVerificationError(
+        f'top processes {top_processes} are missing in perf.data traces with'
+        f' PID: {perf_data_pids}.')
+
   def ProcessResults(self, use_cache=False):
     # Note that this function doesn't know anything about whether there is a
     # cache hit or miss. It should process results agnostic of the cache hit
@@ -824,6 +972,9 @@ class Result(object):
       cpustats = self.ProcessCpustatsResults()
     if self.top_log_file:
       self.top_cmds = self.ProcessTopResults()
+    # Verify that PID in non system-wide perf.data and top_cmds are matching.
+    if self.perf_data_files and self.top_cmds:
+      self.VerifyPerfDataPID()
     if self.wait_time_log_file:
       with open(self.wait_time_log_file) as f:
         wait_time = f.readline().strip()
@@ -902,6 +1053,19 @@ class Result(object):
       command = 'rm -rf %s' % self.temp_dir
       self.ce.RunCommand(command)
 
+  def CreateTarball(self, results_dir, tarball):
+    if not results_dir.strip():
+      raise ValueError('Refusing to `tar` an empty results_dir: %r' %
+                       results_dir)
+
+    ret = self.ce.RunCommand('cd %s && '
+                             'tar '
+                             '--exclude=var/spool '
+                             '--exclude=var/log '
+                             '-cjf %s .' % (results_dir, tarball))
+    if ret:
+      raise RuntimeError("Couldn't compress test output directory.")
+
   def StoreToCacheDir(self, cache_dir, machine_manager, key_list):
     # Create the dir if it doesn't exist.
     temp_dir = tempfile.mkdtemp()
@@ -923,14 +1087,8 @@ class Result(object):
 
     if self.results_dir:
       tarball = os.path.join(temp_dir, AUTOTEST_TARBALL)
-      command = ('cd %s && '
-                 'tar '
-                 '--exclude=var/spool '
-                 '--exclude=var/log '
-                 '-cjf %s .' % (self.results_dir, tarball))
-      ret = self.ce.RunCommand(command)
-      if ret:
-        raise RuntimeError("Couldn't store autotest output directory.")
+      self.CreateTarball(self.results_dir, tarball)
+
     # Store machine info.
     # TODO(asharif): Make machine_manager a singleton, and don't pass it into
     # this function.
@@ -948,8 +1106,8 @@ class Result(object):
     if ret:
       command = 'rm -rf {0}'.format(temp_dir)
       self.ce.RunCommand(command)
-      raise RuntimeError(
-          'Could not move dir %s to dir %s' % (temp_dir, cache_dir))
+      raise RuntimeError('Could not move dir %s to dir %s' %
+                         (temp_dir, cache_dir))
 
   @classmethod
   def CreateFromRun(cls,