diff options
author | Denis Nikitin <denik@google.com> | 2020-09-12 17:21:48 -0700 |
---|---|---|
committer | Commit Bot <commit-bot@chromium.org> | 2020-09-18 23:08:07 +0000 |
commit | 7b73dcd8392259b7c67316d09c7bb799429f9a69 (patch) | |
tree | f9ab985f98c84f79c55ea0387423979ca4f8612a | |
parent | 18422a411b81cebc930b7227dc49226bbd140e8d (diff) | |
download | toolchain-utils-7b73dcd8392259b7c67316d09c7bb799429f9a69.tar.gz |
crosperf: Add verification of per-process profiles
Verification is very helpful when we record per-process perf profile
and collect top statistics.
Using verification we can make sure that perf profiles are collected
from the hottest functions in top statistics.
BUG=chromium:1098098
TEST=unittests and crosperf with per-process profiling are passing.
Change-Id: Ib79b070babcdea38932ba709ac3e14fa3311f842
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/toolchain-utils/+/2407223
Reviewed-by: Tiancong Wang <tcwang@google.com>
Reviewed-by: Caroline Tice <cmtice@chromium.org>
Commit-Queue: Denis Nikitin <denik@chromium.org>
Tested-by: Denis Nikitin <denik@chromium.org>
-rw-r--r-- | crosperf/results_cache.py | 121 | ||||
-rwxr-xr-x | crosperf/results_cache_unittest.py | 177 |
2 files changed, 261 insertions, 37 deletions
diff --git a/crosperf/results_cache.py b/crosperf/results_cache.py index b435eee3..c5c85942 100644 --- a/crosperf/results_cache.py +++ b/crosperf/results_cache.py @@ -35,6 +35,14 @@ PERF_RESULTS_FILE = 'perf-results.txt' CACHE_KEYS_FILE = 'cache_keys.txt' +class PidVerificationError(Exception): + """Error of perf PID verification in per-process mode.""" + + +class PerfDataReadError(Exception): + """Error of reading a perf.data header.""" + + class Result(object): """Class for holding the results of a single test run. @@ -142,12 +150,12 @@ class Result(object): # while tast runs hold output under TEST_NAME/. # Both ensure to be unique. result_dir_name = self.test_name if self.suite == 'tast' else 'results' - results_dir = self.FindFilesInResultsDir( - '-name %s' % result_dir_name).split('\n')[0] + results_dir = self.FindFilesInResultsDir('-name %s' % + result_dir_name).split('\n')[0] if not results_dir: - self._logger.LogOutput( - 'WARNING: No results dir matching %r found' % result_dir_name) + self._logger.LogOutput('WARNING: No results dir matching %r found' % + result_dir_name) return self.CreateTarball(results_dir, tarball) @@ -239,8 +247,8 @@ class Result(object): command = 'cp -r {0}/* {1}'.format(self.results_dir, self.temp_dir) self.ce.RunCommand(command, print_to_console=False) - command = ('./generate_test_report --no-color --csv %s' % (os.path.join( - '/tmp', os.path.basename(self.temp_dir)))) + command = ('./generate_test_report --no-color --csv %s' % + (os.path.join('/tmp', os.path.basename(self.temp_dir)))) _, out, _ = self.ce.ChrootRunCommandWOutput( self.chromeos_root, command, print_to_console=False) keyvals_dict = {} @@ -390,8 +398,8 @@ class Result(object): perf_data_file) perf_report_file = '%s.report' % perf_data_file if os.path.exists(perf_report_file): - raise RuntimeError( - 'Perf report file already exists: %s' % perf_report_file) + raise RuntimeError('Perf report file already exists: %s' % + perf_report_file) chroot_perf_report_file = misc.GetInsideChrootPath( self.chromeos_root, perf_report_file) perf_path = os.path.join(self.chromeos_root, 'chroot', 'usr/bin/perf') @@ -429,8 +437,8 @@ class Result(object): if self.log_level != 'verbose': self._logger.LogOutput('Perf report generated successfully.') else: - raise RuntimeError( - 'Perf report not generated correctly. CMD: %s' % command) + raise RuntimeError('Perf report not generated correctly. CMD: %s' % + command) # Add a keyval to the dictionary for the events captured. perf_report_files.append( @@ -844,6 +852,88 @@ class Result(object): keyvals[key] = [result, unit] return keyvals + def ReadPidFromPerfData(self): + """Read PIDs from perf.data files. + + Extract PID from perf.data if "perf record" was running per process, + i.e. with "-p <PID>" and no "-a". + + Returns: + pids: list of PIDs. + + Raises: + PerfDataReadError when perf.data header reading fails. + """ + cmd = ['/usr/bin/perf', 'report', '--header-only', '-i'] + pids = [] + + for perf_data_path in self.perf_data_files: + perf_data_path_in_chroot = misc.GetInsideChrootPath( + self.chromeos_root, perf_data_path) + path_str = ' '.join(cmd + [perf_data_path_in_chroot]) + status, output, _ = self.ce.ChrootRunCommandWOutput( + self.chromeos_root, path_str) + if status: + # Error of reading a perf.data profile is fatal. + raise PerfDataReadError(f'Failed to read perf.data profile: {path_str}') + + # Pattern to search a line with "perf record" command line: + # # cmdline : /usr/bin/perf record -e instructions -p 123" + cmdline_regex = re.compile( + r'^\#\scmdline\s:\s+(?P<cmd>.*perf\s+record\s+.*)$') + # Pattern to search PID in a command line. + pid_regex = re.compile(r'^.*\s-p\s(?P<pid>\d+)\s*.*$') + for line in output.splitlines(): + cmd_match = cmdline_regex.match(line) + if cmd_match: + # Found a perf command line. + cmdline = cmd_match.group('cmd') + # '-a' is a system-wide mode argument. + if '-a' not in cmdline.split(): + # It can be that perf was attached to PID and was still running in + # system-wide mode. + # We filter out this case here since it's not per-process. + pid_match = pid_regex.match(cmdline) + if pid_match: + pids.append(pid_match.group('pid')) + # Stop the search and move to the next perf.data file. + break + else: + # cmdline wasn't found in the header. It's a fatal error. + raise PerfDataReadError(f'Perf command line is not found in {path_str}') + return pids + + def VerifyPerfDataPID(self): + """Verify PIDs in per-process perf.data profiles. + + Check that at list one top process is profiled if perf was running in + per-process mode. + + Raises: + PidVerificationError if PID verification of per-process perf.data profiles + fail. + """ + perf_data_pids = self.ReadPidFromPerfData() + if not perf_data_pids: + # In system-wide mode there are no PIDs. + self._logger.LogOutput('System-wide perf mode. Skip verification.') + return + + # PIDs will be present only in per-process profiles. + # In this case we need to verify that profiles are collected on the + # hottest processes. + top_processes = [top_cmd['cmd'] for top_cmd in self.top_cmds] + # top_process structure: <cmd>-<pid> + top_pids = [top_process.split('-')[-1] for top_process in top_processes] + for top_pid in top_pids: + if top_pid in perf_data_pids: + self._logger.LogOutput('PID verification passed! ' + f'Top process {top_pid} is profiled.') + return + raise PidVerificationError( + f'top processes {top_processes} are missing in perf.data traces with' + f' PID: {perf_data_pids}.') + def ProcessResults(self, use_cache=False): # Note that this function doesn't know anything about whether there is a # cache hit or miss. It should process results agnostic of the cache hit @@ -882,6 +972,9 @@ class Result(object): cpustats = self.ProcessCpustatsResults() if self.top_log_file: self.top_cmds = self.ProcessTopResults() + # Verify that PID in non system-wide perf.data and top_cmds are matching. + if self.perf_data_files and self.top_cmds: + self.VerifyPerfDataPID() if self.wait_time_log_file: with open(self.wait_time_log_file) as f: wait_time = f.readline().strip() @@ -962,8 +1055,8 @@ class Result(object): def CreateTarball(self, results_dir, tarball): if not results_dir.strip(): - raise ValueError( - 'Refusing to `tar` an empty results_dir: %r' % results_dir) + raise ValueError('Refusing to `tar` an empty results_dir: %r' % + results_dir) ret = self.ce.RunCommand('cd %s && ' 'tar ' @@ -1013,8 +1106,8 @@ class Result(object): if ret: command = 'rm -rf {0}'.format(temp_dir) self.ce.RunCommand(command) - raise RuntimeError( - 'Could not move dir %s to dir %s' % (temp_dir, cache_dir)) + raise RuntimeError('Could not move dir %s to dir %s' % + (temp_dir, cache_dir)) @classmethod def CreateFromRun(cls, diff --git a/crosperf/results_cache_unittest.py b/crosperf/results_cache_unittest.py index 812d5ab3..91ceed22 100755 --- a/crosperf/results_cache_unittest.py +++ b/crosperf/results_cache_unittest.py @@ -21,6 +21,8 @@ import test_flag from label import MockLabel from results_cache import CacheConditions +from results_cache import PerfDataReadError +from results_cache import PidVerificationError from results_cache import Result from results_cache import ResultsCache from results_cache import TelemetryResult @@ -158,6 +160,34 @@ keyvals = { 'b_string_strstr___abcdefghijklmnopqrstuvwxyz__': '0.0134553343333' } +PERF_DATA_HEADER = """ +# ======== +# captured on : Thu Jan 01 00:00:00 1980 +# header version : 1 +# data offset : 536 +# data size : 737678672 +# feat offset : 737679208 +# hostname : localhost +# os release : 5.4.61 +# perf version : +# arch : aarch64 +# nrcpus online : 8 +# nrcpus avail : 8 +# total memory : 5911496 kB +# cmdline : /usr/bin/perf record -e instructions -p {pid} +# event : name = instructions, , id = ( 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193 ), type = 8, size = 112 +# event : name = dummy:u, , id = ( 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204 ), type = 1, size = 112, config = 0x9 +# CPU_TOPOLOGY info available, use -I to display +# pmu mappings: software = 1, uprobe = 6, cs_etm = 8, breakpoint = 5, tracepoint = 2, armv8_pmuv3 = 7 +# contains AUX area data (e.g. instruction trace) +# time of first sample : 0.000000 +# time of last sample : 0.000000 +# sample duration : 0.000 ms +# missing features: TRACING_DATA CPUDESC CPUID NUMA_TOPOLOGY BRANCH_STACK GROUP_DESC STAT CACHE MEM_TOPOLOGY CLOCKID DIR_FORMAT +# ======== +# +""" + TURBOSTAT_LOG_OUTPUT = \ """CPU Avg_MHz Busy% Bzy_MHz TSC_MHz IRQ CoreTmp - 329 12.13 2723 2393 10975 77 @@ -470,6 +500,7 @@ class ResultTest(unittest.TestCase): def setUp(self): self.result = Result(self.mock_logger, self.mock_label, 'average', self.mock_cmd_exec) + self.result.chromeos_root = '/tmp/chromeos' @mock.patch.object(os.path, 'isdir') @mock.patch.object(command_executer.CommandExecuter, 'RunCommand') @@ -707,7 +738,8 @@ class ResultTest(unittest.TestCase): self.assertEqual(mock_chrootruncmd.call_count, 1) self.assertEqual( mock_chrootruncmd.call_args_list[0][0], - ('/tmp', ('./generate_test_report --no-color --csv %s') % TMP_DIR1)) + (self.result.chromeos_root, + ('./generate_test_report --no-color --csv %s') % TMP_DIR1)) self.assertEqual(mock_getpath.call_count, 1) self.assertEqual(mock_mkdtemp.call_count, 1) self.assertEqual(res, {'Total': [10, 'score'], 'first_time': [680, 'ms']}) @@ -916,6 +948,101 @@ class ResultTest(unittest.TestCase): found_no_logs = self.result.GetCpustatsFile() self.assertEqual(found_no_logs, '') + def test_verify_perf_data_pid_ok(self): + """Verify perf PID which is present in TOP_DATA.""" + self.result.top_cmds = TOP_DATA + # pid is present in TOP_DATA. + with mock.patch.object( + Result, 'ReadPidFromPerfData', return_value=['5713']): + self.result.VerifyPerfDataPID() + + def test_verify_perf_data_pid_fail(self): + """Test perf PID missing in top raises the error.""" + self.result.top_cmds = TOP_DATA + # pid is not in the list of top processes. + with mock.patch.object( + Result, 'ReadPidFromPerfData', return_value=['9999']): + with self.assertRaises(PidVerificationError): + self.result.VerifyPerfDataPID() + + @mock.patch.object(command_executer.CommandExecuter, + 'ChrootRunCommandWOutput') + def test_read_pid_from_perf_data_ok(self, mock_runcmd): + """Test perf header parser, normal flow.""" + self.result.ce.ChrootRunCommandWOutput = mock_runcmd + self.result.perf_data_files = ['/tmp/chromeos/chroot/tmp/results/perf.data'] + exp_pid = '12345' + mock_runcmd.return_value = (0, PERF_DATA_HEADER.format(pid=exp_pid), '') + pids = self.result.ReadPidFromPerfData() + self.assertEqual(pids, [exp_pid]) + + @mock.patch.object(command_executer.CommandExecuter, + 'ChrootRunCommandWOutput') + def test_read_pid_from_perf_data_mult_profiles(self, mock_runcmd): + """Test multiple perf.data files with PID.""" + self.result.ce.ChrootRunCommandWOutput = mock_runcmd + # self.result.chromeos_root = '/tmp/chromeos' + self.result.perf_data_files = [ + '/tmp/chromeos/chroot/tmp/results/perf.data.0', + '/tmp/chromeos/chroot/tmp/results/perf.data.1', + ] + # There is '-p <pid>' in command line but it's still system-wide: '-a'. + cmd_line = '# cmdline : /usr/bin/perf record -e instructions -p {pid}' + exp_perf_pids = ['1111', '2222'] + mock_runcmd.side_effect = [ + (0, cmd_line.format(pid=exp_perf_pids[0]), ''), + (0, cmd_line.format(pid=exp_perf_pids[1]), ''), + ] + pids = self.result.ReadPidFromPerfData() + self.assertEqual(pids, exp_perf_pids) + + @mock.patch.object(command_executer.CommandExecuter, + 'ChrootRunCommandWOutput') + def test_read_pid_from_perf_data_no_pid(self, mock_runcmd): + """Test perf.data without PID.""" + self.result.ce.ChrootRunCommandWOutput = mock_runcmd + self.result.perf_data_files = ['/tmp/chromeos/chroot/tmp/results/perf.data'] + cmd_line = '# cmdline : /usr/bin/perf record -e instructions' + mock_runcmd.return_value = (0, cmd_line, '') + pids = self.result.ReadPidFromPerfData() + # pids is empty. + self.assertEqual(pids, []) + + @mock.patch.object(command_executer.CommandExecuter, + 'ChrootRunCommandWOutput') + def test_read_pid_from_perf_data_system_wide(self, mock_runcmd): + """Test reading from system-wide profile with PID.""" + self.result.ce.ChrootRunCommandWOutput = mock_runcmd + self.result.perf_data_files = ['/tmp/chromeos/chroot/tmp/results/perf.data'] + # There is '-p <pid>' in command line but it's still system-wide: '-a'. + cmd_line = '# cmdline : /usr/bin/perf record -e instructions -a -p 1234' + mock_runcmd.return_value = (0, cmd_line, '') + pids = self.result.ReadPidFromPerfData() + # pids should be empty since it's not a per-process profiling. + self.assertEqual(pids, []) + + @mock.patch.object(command_executer.CommandExecuter, + 'ChrootRunCommandWOutput') + def test_read_pid_from_perf_data_read_fail(self, mock_runcmd): + """Failure to read perf.data raises the error.""" + self.result.ce.ChrootRunCommandWOutput = mock_runcmd + self.result.perf_data_files = ['/tmp/chromeos/chroot/tmp/results/perf.data'] + # Error status of the profile read. + mock_runcmd.return_value = (1, '', '') + with self.assertRaises(PerfDataReadError): + self.result.ReadPidFromPerfData() + + @mock.patch.object(command_executer.CommandExecuter, + 'ChrootRunCommandWOutput') + def test_read_pid_from_perf_data_fail(self, mock_runcmd): + """Failure to find cmdline in perf.data header raises the error.""" + self.result.ce.ChrootRunCommandWOutput = mock_runcmd + self.result.perf_data_files = ['/tmp/chromeos/chroot/tmp/results/perf.data'] + # Empty output. + mock_runcmd.return_value = (0, '', '') + with self.assertRaises(PerfDataReadError): + self.result.ReadPidFromPerfData() + def test_process_turbostat_results_with_valid_data(self): """Normal case when log exists and contains valid data.""" self.result.turbostat_log_file = '/tmp/somelogfile.log' @@ -1112,10 +1239,11 @@ class ResultTest(unittest.TestCase): # Debug path not found self.result.label.debug_path = '' tmp = self.result.GeneratePerfReportFiles() - self.assertEqual(tmp, ['/tmp/chroot%s' % fake_file]) + self.assertEqual(tmp, ['/tmp/chromeos/chroot%s' % fake_file]) self.assertEqual(mock_chrootruncmd.call_args_list[0][0], - ('/tmp', ('/usr/sbin/perf report -n ' - '-i %s --stdio > %s') % (fake_file, fake_file))) + (self.result.chromeos_root, + ('/usr/sbin/perf report -n ' + '-i %s --stdio > %s') % (fake_file, fake_file))) @mock.patch.object(misc, 'GetInsideChrootPath') @mock.patch.object(command_executer.CommandExecuter, 'ChrootRunCommand') @@ -1130,11 +1258,12 @@ class ResultTest(unittest.TestCase): # Debug path found self.result.label.debug_path = '/tmp/debug' tmp = self.result.GeneratePerfReportFiles() - self.assertEqual(tmp, ['/tmp/chroot%s' % fake_file]) + self.assertEqual(tmp, ['/tmp/chromeos/chroot%s' % fake_file]) self.assertEqual(mock_chrootruncmd.call_args_list[0][0], - ('/tmp', ('/usr/sbin/perf report -n --symfs /tmp/debug ' - '--vmlinux /tmp/debug/boot/vmlinux ' - '-i %s --stdio > %s') % (fake_file, fake_file))) + (self.result.chromeos_root, + ('/usr/sbin/perf report -n --symfs /tmp/debug ' + '--vmlinux /tmp/debug/boot/vmlinux ' + '-i %s --stdio > %s') % (fake_file, fake_file))) @mock.patch.object(misc, 'GetOutsideChrootPath') def test_populate_from_run(self, mock_getpath): @@ -1183,7 +1312,6 @@ class ResultTest(unittest.TestCase): if mock_getpath: pass mock.get_path = '/tmp/chromeos/tmp/results_dir' - self.result.chromeos_root = '/tmp/chromeos' self.callGetResultsDir = False self.callGetResultsFile = False @@ -1453,8 +1581,7 @@ class ResultTest(unittest.TestCase): u'crypto-md5__crypto-md5': [10.5, u'ms'], u'string-tagcloud__string-tagcloud': [52.8, u'ms'], u'access-nbody__access-nbody': [8.5, u'ms'], - 'retval': - 0, + 'retval': 0, u'math-spectral-norm__math-spectral-norm': [6.6, u'ms'], u'math-cordic__math-cordic': [8.7, u'ms'], u'access-binary-trees__access-binary-trees': [4.5, u'ms'], @@ -1492,8 +1619,7 @@ class ResultTest(unittest.TestCase): u'crypto-md5__crypto-md5': [10.5, u'ms'], u'string-tagcloud__string-tagcloud': [52.8, u'ms'], u'access-nbody__access-nbody': [8.5, u'ms'], - 'retval': - 0, + 'retval': 0, u'math-spectral-norm__math-spectral-norm': [6.6, u'ms'], u'math-cordic__math-cordic': [8.7, u'ms'], u'access-binary-trees__access-binary-trees': [4.5, u'ms'], @@ -1722,8 +1848,9 @@ class TelemetryResultTest(unittest.TestCase): 'autotest_dir', 'debug_dir', '/tmp', 'lumpy', 'remote', 'image_args', 'cache_dir', 'average', 'gcc', False, None) - self.mock_machine = machine_manager.MockCrosMachine( - 'falco.cros', '/tmp/chromeos', 'average') + self.mock_machine = machine_manager.MockCrosMachine('falco.cros', + '/tmp/chromeos', + 'average') def test_populate_from_run(self): @@ -1803,10 +1930,12 @@ class ResultsCacheTest(unittest.TestCase): def FakeGetMachines(label): if label: pass - m1 = machine_manager.MockCrosMachine( - 'lumpy1.cros', self.results_cache.chromeos_root, 'average') - m2 = machine_manager.MockCrosMachine( - 'lumpy2.cros', self.results_cache.chromeos_root, 'average') + m1 = machine_manager.MockCrosMachine('lumpy1.cros', + self.results_cache.chromeos_root, + 'average') + m2 = machine_manager.MockCrosMachine('lumpy2.cros', + self.results_cache.chromeos_root, + 'average') return [m1, m2] mock_checksum.return_value = 'FakeImageChecksumabc123' @@ -1848,10 +1977,12 @@ class ResultsCacheTest(unittest.TestCase): def FakeGetMachines(label): if label: pass - m1 = machine_manager.MockCrosMachine( - 'lumpy1.cros', self.results_cache.chromeos_root, 'average') - m2 = machine_manager.MockCrosMachine( - 'lumpy2.cros', self.results_cache.chromeos_root, 'average') + m1 = machine_manager.MockCrosMachine('lumpy1.cros', + self.results_cache.chromeos_root, + 'average') + m2 = machine_manager.MockCrosMachine('lumpy2.cros', + self.results_cache.chromeos_root, + 'average') return [m1, m2] mock_checksum.return_value = 'FakeImageChecksumabc123' |