diff options
Diffstat (limited to 'crosperf/results_cache.py')
-rw-r--r-- | crosperf/results_cache.py | 248 |
1 files changed, 203 insertions, 45 deletions
diff --git a/crosperf/results_cache.py b/crosperf/results_cache.py index 98062194..c5c85942 100644 --- a/crosperf/results_cache.py +++ b/crosperf/results_cache.py @@ -30,10 +30,19 @@ SCRATCH_DIR = os.path.expanduser('~/cros_scratch') RESULTS_FILE = 'results.txt' MACHINE_FILE = 'machine.txt' AUTOTEST_TARBALL = 'autotest.tbz2' +RESULTS_TARBALL = 'results.tbz2' PERF_RESULTS_FILE = 'perf-results.txt' CACHE_KEYS_FILE = 'cache_keys.txt' +class PidVerificationError(Exception): + """Error of perf PID verification in per-process mode.""" + + +class PerfDataReadError(Exception): + """Error of reading a perf.data header.""" + + class Result(object): """Class for holding the results of a single test run. @@ -57,6 +66,7 @@ class Result(object): self.results_file = [] self.turbostat_log_file = '' self.cpustats_log_file = '' + self.cpuinfo_file = '' self.top_log_file = '' self.wait_time_log_file = '' self.chrome_version = '' @@ -75,22 +85,34 @@ class Result(object): """Get the list of top commands consuming CPU on the machine.""" return self.top_cmds - def FormatStringTop5(self): - """Get formatted top5 string. + def FormatStringTopCommands(self): + """Get formatted string of top commands. - Get the formatted string with top5 commands consuming CPU on DUT machine. + Get the formatted string with top commands consuming CPU on DUT machine. + Number of "non-chrome" processes in the list is limited to 5. """ format_list = [ - 'Top 5 commands with highest CPU usage:', + 'Top commands with highest CPU usage:', # Header. '%20s %9s %6s %s' % ('COMMAND', 'AVG CPU%', 'COUNT', 'HIGHEST 5'), '-' * 50, ] if self.top_cmds: - for topcmd in self.top_cmds[:5]: - print_line = '%20s %9.2f %6s %s' % (topcmd['cmd'], topcmd['cpu_avg'], - topcmd['count'], topcmd['top5']) + # After switching to top processes we have to expand the list since there + # will be a lot of 'chrome' processes (up to 10, sometimes more) in the + # top. + # Let's limit the list size by the number of non-chrome processes. + limit_of_non_chrome_procs = 5 + num_of_non_chrome_procs = 0 + for topcmd in self.top_cmds: + print_line = '%20s %9.2f %6s %s' % ( + topcmd['cmd'], topcmd['cpu_use_avg'], topcmd['count'], + topcmd['top5_cpu_use']) format_list.append(print_line) + if not topcmd['cmd'].startswith('chrome'): + num_of_non_chrome_procs += 1 + if num_of_non_chrome_procs >= limit_of_non_chrome_procs: + break else: format_list.append('[NO DATA FROM THE TOP LOG]') format_list.append('-' * 50) @@ -109,10 +131,37 @@ class Result(object): raise IOError('Could not copy results file: %s' % file_to_copy) def CopyResultsTo(self, dest_dir): + self.CopyFilesTo(dest_dir, self.results_file) self.CopyFilesTo(dest_dir, self.perf_data_files) self.CopyFilesTo(dest_dir, self.perf_report_files) - if self.perf_data_files or self.perf_report_files: - self._logger.LogOutput('Perf results files stored in %s.' % dest_dir) + extra_files = [] + if self.top_log_file: + extra_files.append(self.top_log_file) + if self.cpuinfo_file: + extra_files.append(self.cpuinfo_file) + if extra_files: + self.CopyFilesTo(dest_dir, extra_files) + if self.results_file or self.perf_data_files or self.perf_report_files: + self._logger.LogOutput('Results files stored in %s.' % dest_dir) + + def CompressResultsTo(self, dest_dir): + tarball = os.path.join(self.results_dir, RESULTS_TARBALL) + # Test_that runs hold all output under TEST_NAME_HASHTAG/results/, + # while tast runs hold output under TEST_NAME/. + # Both ensure to be unique. + result_dir_name = self.test_name if self.suite == 'tast' else 'results' + results_dir = self.FindFilesInResultsDir('-name %s' % + result_dir_name).split('\n')[0] + + if not results_dir: + self._logger.LogOutput('WARNING: No results dir matching %r found' % + result_dir_name) + return + + self.CreateTarball(results_dir, tarball) + self.CopyFilesTo(dest_dir, [tarball]) + if results_dir: + self._logger.LogOutput('Results files compressed into %s.' % dest_dir) def GetNewKeyvals(self, keyvals_dict): # Initialize 'units' dictionary. @@ -198,8 +247,8 @@ class Result(object): command = 'cp -r {0}/* {1}'.format(self.results_dir, self.temp_dir) self.ce.RunCommand(command, print_to_console=False) - command = ('./generate_test_report --no-color --csv %s' % (os.path.join( - '/tmp', os.path.basename(self.temp_dir)))) + command = ('./generate_test_report --no-color --csv %s' % + (os.path.join('/tmp', os.path.basename(self.temp_dir)))) _, out, _ = self.ce.ChrootRunCommandWOutput( self.chromeos_root, command, print_to_console=False) keyvals_dict = {} @@ -267,7 +316,10 @@ class Result(object): return [samples, u'samples'] def GetResultsDir(self): - mo = re.search(r'Results placed in (\S+)', self.out) + if self.suite == 'tast': + mo = re.search(r'Writing results to (\S+)', self.out) + else: + mo = re.search(r'Results placed in (\S+)', self.out) if mo: result = mo.group(1) return result @@ -313,6 +365,10 @@ class Result(object): """Get cpustats log path string.""" return self.FindFilesInResultsDir('-name cpustats.log').split('\n')[0] + def GetCpuinfoFile(self): + """Get cpustats log path string.""" + return self.FindFilesInResultsDir('-name cpuinfo.log').split('\n')[0] + def GetTopFile(self): """Get cpustats log path string.""" return self.FindFilesInResultsDir('-name top.log').split('\n')[0] @@ -342,8 +398,8 @@ class Result(object): perf_data_file) perf_report_file = '%s.report' % perf_data_file if os.path.exists(perf_report_file): - raise RuntimeError( - 'Perf report file already exists: %s' % perf_report_file) + raise RuntimeError('Perf report file already exists: %s' % + perf_report_file) chroot_perf_report_file = misc.GetInsideChrootPath( self.chromeos_root, perf_report_file) perf_path = os.path.join(self.chromeos_root, 'chroot', 'usr/bin/perf') @@ -381,8 +437,8 @@ class Result(object): if self.log_level != 'verbose': self._logger.LogOutput('Perf report generated successfully.') else: - raise RuntimeError( - 'Perf report not generated correctly. CMD: %s' % command) + raise RuntimeError('Perf report not generated correctly. CMD: %s' % + command) # Add a keyval to the dictionary for the events captured. perf_report_files.append( @@ -419,6 +475,7 @@ class Result(object): self.perf_report_files = self.GeneratePerfReportFiles() self.turbostat_log_file = self.GetTurbostatFile() self.cpustats_log_file = self.GetCpustatsFile() + self.cpuinfo_file = self.GetCpuinfoFile() self.top_log_file = self.GetTopFile() self.wait_time_log_file = self.GetWaitTimeFile() # TODO(asharif): Do something similar with perf stat. @@ -535,9 +592,9 @@ class Result(object): Returns: List of dictionaries with the following keyvals: 'cmd': command name (string), - 'cpu_avg': average cpu usage (float), + 'cpu_use_avg': average cpu usage (float), 'count': number of occurrences (int), - 'top5': up to 5 highest cpu usages (descending list of floats) + 'top5_cpu_use': up to 5 highest cpu usages (descending list of floats) Example of the top log: PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND @@ -588,7 +645,7 @@ class Result(object): process = { # NOTE: One command may be represented by multiple processes. 'cmd': match.group('cmd'), - 'pid': int(match.group('pid')), + 'pid': match.group('pid'), 'cpu_use': float(match.group('cpu_use')), } @@ -610,7 +667,10 @@ class Result(object): # not running. # On 1-core DUT 90% chrome cpu load occurs in 55%, 95% in 33% and 100% in 2% # of snapshots accordingly. - CHROME_HIGH_CPU_LOAD = 90 + # Threshold of "high load" is reduced to 70% (from 90) when we switched to + # topstats per process. From experiment data the rest 20% are distributed + # among other chrome processes. + CHROME_HIGH_CPU_LOAD = 70 # Number of snapshots where chrome is heavily used. high_load_snapshots = 0 # Total CPU use per process in ALL active snapshots. @@ -621,35 +681,41 @@ class Result(object): topcmds = [] for snapshot_processes in snapshots: - # CPU usage per command in one snapshot. - cmd_cpu_use_per_snapshot = collections.defaultdict(float) + # CPU usage per command, per PID in one snapshot. + cmd_cpu_use_per_snapshot = collections.defaultdict(dict) for process in snapshot_processes: cmd = process['cmd'] cpu_use = process['cpu_use'] + pid = process['pid'] + cmd_cpu_use_per_snapshot[cmd][pid] = cpu_use - # Collect CPU usage per command. - cmd_cpu_use_per_snapshot[cmd] += cpu_use + # Chrome processes, pid: cpu_usage. + chrome_processes = cmd_cpu_use_per_snapshot.get('chrome', {}) + chrome_cpu_use_list = chrome_processes.values() - if cmd_cpu_use_per_snapshot.setdefault('chrome', - 0.0) > CHROME_HIGH_CPU_LOAD: - # Combined CPU usage of "chrome" command exceeds "High load" threshold - # which means DUT is busy running a benchmark. + if chrome_cpu_use_list and max( + chrome_cpu_use_list) > CHROME_HIGH_CPU_LOAD: + # CPU usage of any of the "chrome" processes exceeds "High load" + # threshold which means DUT is busy running a benchmark. high_load_snapshots += 1 - for cmd, cpu_use in cmd_cpu_use_per_snapshot.items(): - # Update total CPU usage. - cmd_total_cpu_use[cmd] += cpu_use + for cmd, cpu_use_per_pid in cmd_cpu_use_per_snapshot.items(): + for pid, cpu_use in cpu_use_per_pid.items(): + # Append PID to the name of the command. + cmd_with_pid = cmd + '-' + pid + cmd_total_cpu_use[cmd_with_pid] += cpu_use - # Add cpu_use into command top cpu usages, sorted in descending order. - heapq.heappush(cmd_top5_cpu_use[cmd], round(cpu_use, 1)) + # Add cpu_use into command top cpu usages, sorted in descending + # order. + heapq.heappush(cmd_top5_cpu_use[cmd_with_pid], round(cpu_use, 1)) for consumer, usage in sorted( cmd_total_cpu_use.items(), key=lambda x: x[1], reverse=True): # Iterate through commands by descending order of total CPU usage. topcmd = { 'cmd': consumer, - 'cpu_avg': usage / high_load_snapshots, + 'cpu_use_avg': usage / high_load_snapshots, 'count': len(cmd_top5_cpu_use[consumer]), - 'top5': heapq.nlargest(5, cmd_top5_cpu_use[consumer]), + 'top5_cpu_use': heapq.nlargest(5, cmd_top5_cpu_use[consumer]), } topcmds.append(topcmd) @@ -786,6 +852,88 @@ class Result(object): keyvals[key] = [result, unit] return keyvals + def ReadPidFromPerfData(self): + """Read PIDs from perf.data files. + + Extract PID from perf.data if "perf record" was running per process, + i.e. with "-p <PID>" and no "-a". + + Returns: + pids: list of PIDs. + + Raises: + PerfDataReadError when perf.data header reading fails. + """ + cmd = ['/usr/bin/perf', 'report', '--header-only', '-i'] + pids = [] + + for perf_data_path in self.perf_data_files: + perf_data_path_in_chroot = misc.GetInsideChrootPath( + self.chromeos_root, perf_data_path) + path_str = ' '.join(cmd + [perf_data_path_in_chroot]) + status, output, _ = self.ce.ChrootRunCommandWOutput( + self.chromeos_root, path_str) + if status: + # Error of reading a perf.data profile is fatal. + raise PerfDataReadError(f'Failed to read perf.data profile: {path_str}') + + # Pattern to search a line with "perf record" command line: + # # cmdline : /usr/bin/perf record -e instructions -p 123" + cmdline_regex = re.compile( + r'^\#\scmdline\s:\s+(?P<cmd>.*perf\s+record\s+.*)$') + # Pattern to search PID in a command line. + pid_regex = re.compile(r'^.*\s-p\s(?P<pid>\d+)\s*.*$') + for line in output.splitlines(): + cmd_match = cmdline_regex.match(line) + if cmd_match: + # Found a perf command line. + cmdline = cmd_match.group('cmd') + # '-a' is a system-wide mode argument. + if '-a' not in cmdline.split(): + # It can be that perf was attached to PID and was still running in + # system-wide mode. + # We filter out this case here since it's not per-process. + pid_match = pid_regex.match(cmdline) + if pid_match: + pids.append(pid_match.group('pid')) + # Stop the search and move to the next perf.data file. + break + else: + # cmdline wasn't found in the header. It's a fatal error. + raise PerfDataReadError(f'Perf command line is not found in {path_str}') + return pids + + def VerifyPerfDataPID(self): + """Verify PIDs in per-process perf.data profiles. + + Check that at list one top process is profiled if perf was running in + per-process mode. + + Raises: + PidVerificationError if PID verification of per-process perf.data profiles + fail. + """ + perf_data_pids = self.ReadPidFromPerfData() + if not perf_data_pids: + # In system-wide mode there are no PIDs. + self._logger.LogOutput('System-wide perf mode. Skip verification.') + return + + # PIDs will be present only in per-process profiles. + # In this case we need to verify that profiles are collected on the + # hottest processes. + top_processes = [top_cmd['cmd'] for top_cmd in self.top_cmds] + # top_process structure: <cmd>-<pid> + top_pids = [top_process.split('-')[-1] for top_process in top_processes] + for top_pid in top_pids: + if top_pid in perf_data_pids: + self._logger.LogOutput('PID verification passed! ' + f'Top process {top_pid} is profiled.') + return + raise PidVerificationError( + f'top processes {top_processes} are missing in perf.data traces with' + f' PID: {perf_data_pids}.') + def ProcessResults(self, use_cache=False): # Note that this function doesn't know anything about whether there is a # cache hit or miss. It should process results agnostic of the cache hit @@ -824,6 +972,9 @@ class Result(object): cpustats = self.ProcessCpustatsResults() if self.top_log_file: self.top_cmds = self.ProcessTopResults() + # Verify that PID in non system-wide perf.data and top_cmds are matching. + if self.perf_data_files and self.top_cmds: + self.VerifyPerfDataPID() if self.wait_time_log_file: with open(self.wait_time_log_file) as f: wait_time = f.readline().strip() @@ -902,6 +1053,19 @@ class Result(object): command = 'rm -rf %s' % self.temp_dir self.ce.RunCommand(command) + def CreateTarball(self, results_dir, tarball): + if not results_dir.strip(): + raise ValueError('Refusing to `tar` an empty results_dir: %r' % + results_dir) + + ret = self.ce.RunCommand('cd %s && ' + 'tar ' + '--exclude=var/spool ' + '--exclude=var/log ' + '-cjf %s .' % (results_dir, tarball)) + if ret: + raise RuntimeError("Couldn't compress test output directory.") + def StoreToCacheDir(self, cache_dir, machine_manager, key_list): # Create the dir if it doesn't exist. temp_dir = tempfile.mkdtemp() @@ -923,14 +1087,8 @@ class Result(object): if self.results_dir: tarball = os.path.join(temp_dir, AUTOTEST_TARBALL) - command = ('cd %s && ' - 'tar ' - '--exclude=var/spool ' - '--exclude=var/log ' - '-cjf %s .' % (self.results_dir, tarball)) - ret = self.ce.RunCommand(command) - if ret: - raise RuntimeError("Couldn't store autotest output directory.") + self.CreateTarball(self.results_dir, tarball) + # Store machine info. # TODO(asharif): Make machine_manager a singleton, and don't pass it into # this function. @@ -948,8 +1106,8 @@ class Result(object): if ret: command = 'rm -rf {0}'.format(temp_dir) self.ce.RunCommand(command) - raise RuntimeError( - 'Could not move dir %s to dir %s' % (temp_dir, cache_dir)) + raise RuntimeError('Could not move dir %s to dir %s' % + (temp_dir, cache_dir)) @classmethod def CreateFromRun(cls, |