aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenis Nikitin <denik@google.com>2020-11-06 11:37:38 -0800
committerCommit Bot <commit-bot@chromium.org>2020-11-09 04:04:57 +0000
commit8c34e96ac5b608a1f92c6a23124274010ced28be (patch)
treea2e56df1702d357819657caff1e75f73658ae732
parent6da42e5adaca8dbe139ddfb7b37aa10251214308 (diff)
downloadtoolchain-utils-8c34e96ac5b608a1f92c6a23124274010ced28be.tar.gz
crosperf: Fix BadChecksum failure on coral
Ignore "core id", "apicid", "initial apicid" fields from cpuinfo in machine checksum calculation. The values may differ on the same type of machines. Add more descriptive output for the BadChecksum error. Unittest is updated accordingly. BUG=chromium:1145386 TEST=./run_tests.sh in crosperf. Change-Id: Ifcc91fb70f02c41d77787fbb665741bc130152c0 Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/toolchain-utils/+/2523398 Tested-by: Denis Nikitin <denik@chromium.org> Reviewed-by: Bob Haarman <inglorion@chromium.org> Commit-Queue: Denis Nikitin <denik@chromium.org>
-rw-r--r--crosperf/machine_manager.py58
-rwxr-xr-xcrosperf/machine_manager_unittest.py24
2 files changed, 50 insertions, 32 deletions
diff --git a/crosperf/machine_manager.py b/crosperf/machine_manager.py
index 0b38eef2..aaf09bf5 100644
--- a/crosperf/machine_manager.py
+++ b/crosperf/machine_manager.py
@@ -141,7 +141,12 @@ class CrosMachine(object):
def _ComputeMachineChecksumString(self):
self.checksum_string = ''
- exclude_lines_list = ['MHz', 'BogoMIPS', 'bogomips']
+ # Some lines from cpuinfo have to be excluded because they are not
+ # persistent across DUTs.
+ # MHz, BogoMIPS are dynamically changing values.
+ # core id, apicid are identifiers assigned on startup
+ # and may differ on the same type of machine.
+ exclude_lines_list = ['MHz', 'BogoMIPS', 'bogomips', 'core id', 'apicid']
for line in self.cpuinfo.splitlines():
if not any(e in line for e in exclude_lines_list):
self.checksum_string += line
@@ -220,8 +225,8 @@ class MachineManager(object):
self.logger = lgr or logger.GetLogger()
if self.locks_dir and not os.path.isdir(self.locks_dir):
- raise MissingLocksDirectory(
- 'Cannot access locks directory: %s' % self.locks_dir)
+ raise MissingLocksDirectory('Cannot access locks directory: %s' %
+ self.locks_dir)
self._initialized_machines = []
self.chromeos_root = chromeos_root
@@ -242,8 +247,8 @@ class MachineManager(object):
ret, version, _ = self.ce.CrosRunCommandWOutput(
cmd, machine=machine.name, chromeos_root=self.chromeos_root)
if ret != 0:
- raise CrosCommandError(
- "Couldn't get Chrome version from %s." % machine.name)
+ raise CrosCommandError("Couldn't get Chrome version from %s." %
+ machine.name)
if ret != 0:
version = ''
@@ -312,20 +317,33 @@ class MachineManager(object):
# Since this is used for cache lookups before the machines have been
# compared/verified, check here to make sure they all have the same
# checksum (otherwise the cache lookup may not be valid).
- common_checksum = None
+ base = None
for machine in self.GetMachines(label):
# Make sure the machine's checksums are calculated.
if not machine.machine_checksum:
machine.SetUpChecksumInfo()
- cs = machine.machine_checksum
- # If this is the first machine we've examined, initialize
- # common_checksum.
- if not common_checksum:
- common_checksum = cs
+ # Use the first machine as the basis for comparison.
+ if not base:
+ base = machine
# Make sure this machine's checksum matches our 'common' checksum.
- if cs != common_checksum:
- raise BadChecksum('Machine checksums do not match!')
- self.machine_checksum[label.name] = common_checksum
+ if base.machine_checksum != machine.machine_checksum:
+ # Found a difference. Fatal error.
+ # Extract non-matching part and report it.
+ for mismatch_index in range(len(base.checksum_string)):
+ if (mismatch_index >= len(machine.checksum_string) or
+ base.checksum_string[mismatch_index] !=
+ machine.checksum_string[mismatch_index]):
+ break
+ # We want to show some context after the mismatch.
+ end_ind = mismatch_index + 8
+ # Print a mismatching string.
+ raise BadChecksum(
+ 'Machine checksums do not match!\n'
+ 'Diff:\n'
+ f'{base.name}: {base.checksum_string[:end_ind]}\n'
+ f'{machine.name}: {machine.checksum_string[:end_ind]}\n'
+ '\nCheck for matching /proc/cpuinfo and /proc/meminfo on DUTs.\n')
+ self.machine_checksum[label.name] = base.machine_checksum
def ComputeCommonCheckSumString(self, label):
# The assumption is that this function is only called AFTER
@@ -369,8 +387,8 @@ class MachineManager(object):
if self.log_level != 'verbose':
self.logger.LogOutput('Setting up remote access to %s' % machine_name)
- self.logger.LogOutput(
- 'Checking machine characteristics for %s' % machine_name)
+ self.logger.LogOutput('Checking machine characteristics for %s' %
+ machine_name)
cm = CrosMachine(machine_name, self.chromeos_root, self.log_level)
if cm.machine_checksum:
self._all_machines.append(cm)
@@ -410,8 +428,8 @@ class MachineManager(object):
if self.acquire_timeout < 0:
self.logger.LogFatal('Could not acquire any of the '
- "following machines: '%s'" % ', '.join(
- machine.name for machine in machines))
+ "following machines: '%s'" %
+ ', '.join(machine.name for machine in machines))
### for m in self._machines:
@@ -664,8 +682,8 @@ class MockMachineManager(MachineManager):
for m in self._all_machines:
assert m.name != machine_name, 'Tried to double-add %s' % machine_name
cm = MockCrosMachine(machine_name, self.chromeos_root, self.log_level)
- assert cm.machine_checksum, (
- 'Could not find checksum for machine %s' % machine_name)
+ assert cm.machine_checksum, ('Could not find checksum for machine %s' %
+ machine_name)
# In Original MachineManager, the test is 'if cm.machine_checksum:' - if a
# machine is unreachable, then its machine_checksum is None. Here we
# cannot do this, because machine_checksum is always faked, so we directly
diff --git a/crosperf/machine_manager_unittest.py b/crosperf/machine_manager_unittest.py
index 26eacbd7..f47cc881 100755
--- a/crosperf/machine_manager_unittest.py
+++ b/crosperf/machine_manager_unittest.py
@@ -44,8 +44,8 @@ class MyMachineManager(machine_manager.MachineManager):
assert m.name != machine_name, 'Tried to double-add %s' % machine_name
cm = machine_manager.MockCrosMachine(machine_name, self.chromeos_root,
'average')
- assert cm.machine_checksum, (
- 'Could not find checksum for machine %s' % machine_name)
+ assert cm.machine_checksum, ('Could not find checksum for machine %s' %
+ machine_name)
self._all_machines.append(cm)
@@ -87,9 +87,10 @@ class MachineManagerTest(unittest.TestCase):
def setUp(self, mock_isdir):
mock_isdir.return_value = True
- self.mm = machine_manager.MachineManager(
- '/usr/local/chromeos', 0, 'average', None, self.mock_cmd_exec,
- self.mock_logger)
+ self.mm = machine_manager.MachineManager('/usr/local/chromeos', 0,
+ 'average', None,
+ self.mock_cmd_exec,
+ self.mock_logger)
self.mock_lumpy1.name = 'lumpy1'
self.mock_lumpy2.name = 'lumpy2'
@@ -225,15 +226,14 @@ class MachineManagerTest(unittest.TestCase):
self.assertEqual(mock_sleep.call_count, 0)
def test_compute_common_checksum(self):
-
self.mm.machine_checksum = {}
self.mm.ComputeCommonCheckSum(LABEL_LUMPY)
self.assertEqual(self.mm.machine_checksum['lumpy'], 'lumpy123')
self.assertEqual(len(self.mm.machine_checksum), 1)
self.mm.machine_checksum = {}
- self.assertRaises(machine_manager.BadChecksum,
- self.mm.ComputeCommonCheckSum, LABEL_MIX)
+ self.assertRaisesRegex(machine_manager.BadChecksum, r'daisy.*\n.*lumpy',
+ self.mm.ComputeCommonCheckSum, LABEL_MIX)
def test_compute_common_checksum_string(self):
self.mm.machine_checksum_string = {}
@@ -583,8 +583,8 @@ power management:
CHECKSUM_STRING = ('processor: 0vendor_id: GenuineIntelcpu family: 6model: '
'42model name: Intel(R) Celeron(R) CPU 867 @ '
'1.30GHzstepping: 7microcode: 0x25cache size: 2048 '
- 'KBphysical id: 0siblings: 2core id: 0cpu cores: 2apicid: '
- '0initial apicid: 0fpu: yesfpu_exception: yescpuid level: '
+ 'KBphysical id: 0siblings: 2cpu cores: 2'
+ 'fpu: yesfpu_exception: yescpuid level: '
'13wp: yesflags: fpu vme de pse tsc msr pae mce cx8 apic sep'
' mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse '
'sse2 ss ht tm pbe syscall nx rdtscp lm constant_tsc '
@@ -597,8 +597,8 @@ CHECKSUM_STRING = ('processor: 0vendor_id: GenuineIntelcpu family: 6model: '
'bits virtualpower management:processor: 1vendor_id: '
'GenuineIntelcpu family: 6model: 42model name: Intel(R) '
'Celeron(R) CPU 867 @ 1.30GHzstepping: 7microcode: 0x25cache'
- ' size: 2048 KBphysical id: 0siblings: 2core id: 1cpu cores:'
- ' 2apicid: 2initial apicid: 2fpu: yesfpu_exception: yescpuid'
+ ' size: 2048 KBphysical id: 0siblings: 2cpu cores:'
+ ' 2fpu: yesfpu_exception: yescpuid'
' level: 13wp: yesflags: fpu vme de pse tsc msr pae mce cx8 '
'apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx '
'fxsr sse sse2 ss ht tm pbe syscall nx rdtscp lm '