diff options
-rw-r--r-- | crosperf/benchmark_run.py | 3 | ||||
-rw-r--r-- | crosperf/experiment_runner.py | 16 | ||||
-rw-r--r-- | crosperf/machine_manager.py | 44 | ||||
-rwxr-xr-x | crosperf/machine_manager_unittest.py | 15 | ||||
-rw-r--r-- | crosperf/results_cache.py | 44 |
5 files changed, 62 insertions, 60 deletions
diff --git a/crosperf/benchmark_run.py b/crosperf/benchmark_run.py index dabbdeb2..c8b22000 100644 --- a/crosperf/benchmark_run.py +++ b/crosperf/benchmark_run.py @@ -78,6 +78,7 @@ class BenchmarkRun(threading.Thread): self.test_args, self.profiler_args, self.machine_manager, + self.machine, self.label.board, self.cache_conditions, self._logger, @@ -118,6 +119,7 @@ class BenchmarkRun(threading.Thread): self.timeline.Record(STATUS_WAITING) # Try to acquire a machine now. self.machine = self.AcquireMachine() + self.cache.machine = self.machine self.result = self.RunTest(self.machine) self.cache.remote = self.machine.name @@ -261,6 +263,7 @@ class MockBenchmarkRun(BenchmarkRun): self.test_args, self.profiler_args, self.machine_manager, + self.machine, self.label.board, self.cache_conditions, self._logger, diff --git a/crosperf/experiment_runner.py b/crosperf/experiment_runner.py index c40878b3..8ced8ccd 100644 --- a/crosperf/experiment_runner.py +++ b/crosperf/experiment_runner.py @@ -455,24 +455,8 @@ class Schedv2(object): # Termination flag. self._terminated = False - def _check_machines_are_same(self): - """Check if all machines are the same.""" - - for l in self._labels: - if not self._experiment.machine_manager.AreAllMachineSame(l): - self._l.LogError('All machines are NOT same for ' - 'label "{}" ...'.format(l.name)) - return False - self._l.LogOutput('Machines are same for ' - 'label "{}" ...'.format(l.name)) - return True - def run_sched(self): """Start all dut worker threads and return immediately.""" - - if not self._check_machines_are_same(): - raise RuntimeError("All machines are not same.") - [w.start() for w in self._active_workers] def get_benchmark_run(self, dut): diff --git a/crosperf/machine_manager.py b/crosperf/machine_manager.py index 91fa1302..ed4f6ce9 100644 --- a/crosperf/machine_manager.py +++ b/crosperf/machine_manager.py @@ -22,7 +22,12 @@ from utils.file_utils import FileUtils CHECKSUM_FILE = "/usr/local/osimage_checksum_file" -class NonMatchingMachines(Exception): +class BadChecksum(Exception): + """Raised if all machines for a label don't have the same checksum.""" + pass + +class BadChecksumString(Exception): + """Raised if all machines for a label don't have the same checksum string.""" pass class MissingLocksDirectory(Exception): @@ -270,12 +275,30 @@ class MachineManager(object): return retval def ComputeCommonCheckSum(self, label): + # Since this is used for cache lookups before the machines have been + # compared/verified, check here to make sure they all have the same + # checksum (otherwise the cache lookup may not be valid). + common_checksum = None for machine in self.GetMachines(label): - if machine.machine_checksum: - self.machine_checksum[label.name] = machine.machine_checksum - break + # Make sure the machine's checksums are calculated. + if not machine.machine_checksum: + machine.SetUpChecksumInfo() + cs = machine.machine_checksum + # If this is the first machine we've examined, initialize + # common_checksum. + if not common_checksum: + common_checksum = cs + # Make sure this machine's checksum matches our 'common' checksum. + if cs != common_checksum: + raise BadChecksum("Machine checksums do not match!") + self.machine_checksum[label.name] = common_checksum def ComputeCommonCheckSumString(self, label): + # The assumption is that this function is only called AFTER + # ComputeCommonCheckSum, so there is no need to verify the machines + # are the same here. If this is ever changed, this function should be + # modified to verify that all the machines for a given label are the + # same. for machine in self.GetMachines(label): if machine.checksum_string: self.machine_checksum_string[label.name] = machine.checksum_string @@ -318,11 +341,6 @@ class MachineManager(object): self._all_machines.append(cm) - def AreAllMachineSame(self, label): - checksums = [m.machine_checksum for m in self.GetMachines(label)] - return len(set(checksums)) == 1 - - def RemoveMachine(self, machine_name): with self._lock: self._machines = [m for m in self._machines @@ -352,14 +370,6 @@ class MachineManager(object): self._TryToLockMachine(m) if new_machine: m.released_time = time.time() - if not self.AreAllMachineSame(label): - if not throw: - # Log fatal message, which calls sys.exit. Default behavior. - self.logger.LogFatal("-- not all the machines are identical") - else: - # Raise an exception, which can be caught and handled by calling - # function. - raise NonMatchingMachines("Not all the machines are identical") if self.GetAvailableMachines(label): break else: diff --git a/crosperf/machine_manager_unittest.py b/crosperf/machine_manager_unittest.py index 376f8b0e..f314cce5 100755 --- a/crosperf/machine_manager_unittest.py +++ b/crosperf/machine_manager_unittest.py @@ -99,13 +99,6 @@ class MachineManagerTest(unittest.TestCase): self.mm._all_machines.append(self.mock_lumpy3) - def testAreAllMachineSame(self): - manager = MyMachineManager(CHROMEOS_ROOT) - for m in MACHINE_NAMES: - manager.AddMachine(m) - self.assertEqual(manager.AreAllMachineSame(LABEL_LUMPY), True) - self.assertEqual(manager.AreAllMachineSame(LABEL_MIX), False) - def testGetMachines(self): manager = MyMachineManager(CHROMEOS_ROOT) for m in MACHINE_NAMES: @@ -292,14 +285,6 @@ class MachineManagerTest(unittest.TestCase): self.assertRaises(Exception, self.mm.AddMachine, 'lumpy1') - def test_are_all_machines_same(self): - result = self.mm.AreAllMachineSame(LABEL_LUMPY) - self.assertTrue(result) - - result = self.mm.AreAllMachineSame(LABEL_MIX) - self.assertFalse(result) - - def test_remove_machine(self): self.mm._machines = self.mm._all_machines self.assertTrue(self.mock_lumpy2 in self.mm._machines) diff --git a/crosperf/results_cache.py b/crosperf/results_cache.py index 232f13bc..5d0f96e3 100644 --- a/crosperf/results_cache.py +++ b/crosperf/results_cache.py @@ -21,11 +21,14 @@ from utils import misc from image_checksummer import ImageChecksummer +import results_report + SCRATCH_DIR = os.path.expanduser("~/cros_scratch") RESULTS_FILE = "results.txt" MACHINE_FILE = "machine.txt" AUTOTEST_TARBALL = "autotest.tbz2" PERF_RESULTS_FILE = "perf-results.txt" +CACHE_KEYS_FILE = "cache_keys.txt" class Result(object): """ This class manages what exactly is stored inside the cache without knowing @@ -317,7 +320,7 @@ class Result(object): command = "rm -rf %s" % self._temp_dir self._ce.RunCommand(command) - def StoreToCacheDir(self, cache_dir, machine_manager): + def StoreToCacheDir(self, cache_dir, machine_manager, key_list): # Create the dir if it doesn't exist. temp_dir = tempfile.mkdtemp() @@ -327,6 +330,11 @@ class Result(object): pickle.dump(self.err, f) pickle.dump(self.retval, f) + with open(os.path.join(temp_dir, CACHE_KEYS_FILE), "w") as f: + for k in key_list: + f.write(k) + f.write("\n") + if self.results_dir: tarball = os.path.join(temp_dir, AUTOTEST_TARBALL) command = ("cd %s && " @@ -469,9 +477,9 @@ class ResultsCache(object): CACHE_VERSION = 6 def Init(self, chromeos_image, chromeos_root, test_name, iteration, - test_args, profiler_args, machine_manager, board, cache_conditions, - logger_to_use, log_level, label, share_cache, suite, - show_all_results, run_local): + test_args, profiler_args, machine_manager, machine, board, + cache_conditions, logger_to_use, log_level, label, share_cache, + suite, show_all_results, run_local): self.chromeos_image = chromeos_image self.chromeos_root = chromeos_root self.test_name = test_name @@ -481,6 +489,7 @@ class ResultsCache(object): self.board = board self.cache_conditions = cache_conditions self.machine_manager = machine_manager + self.machine = machine self._logger = logger_to_use self._ce = command_executer.GetCommandExecuter(self._logger, log_level=log_level) @@ -502,8 +511,16 @@ class ResultsCache(object): else: return None - def _GetCacheDirForWrite(self): - return self._FormCacheDir(self._GetCacheKeyList(False))[0] + def _GetCacheDirForWrite(self, get_keylist=False): + cache_path = self._FormCacheDir(self._GetCacheKeyList(False))[0] + if get_keylist: + args_str = "%s_%s_%s" % (self.test_args, self.profiler_args, self.run_local) + version, image = results_report.ParseChromeosImage(self.label.chromeos_image) + keylist = [ version, image, self.label.board, + self.machine.name, self.test_name, str(self.iteration), + args_str] + return cache_path, keylist + return cache_path def _FormCacheDir(self, list_of_strings): cache_key = " ".join(list_of_strings) @@ -546,10 +563,13 @@ class ResultsCache(object): if read and CacheConditions.SAME_MACHINE_MATCH not in self.cache_conditions: machine_id_checksum = "*" else: - for machine in self.machine_manager.GetMachines(self.label): - if machine.name == self.label.remote[0]: - machine_id_checksum = machine.machine_id_checksum - break + if self.machine and self.machine.name in self.label.remote: + machine_id_checksum = self.machine.machine_id_checksum + else: + for machine in self.machine_manager.GetMachines(self.label): + if machine.name == self.label.remote[0]: + machine_id_checksum = machine.machine_id_checksum + break temp_test_args = "%s %s %s" % (self.test_args, self.profiler_args, self.run_local) test_args_checksum = hashlib.md5( @@ -594,8 +614,8 @@ class ResultsCache(object): return None def StoreResult(self, result): - cache_dir = self._GetCacheDirForWrite() - result.StoreToCacheDir(cache_dir, self.machine_manager) + cache_dir, keylist = self._GetCacheDirForWrite(get_keylist=True) + result.StoreToCacheDir(cache_dir, self.machine_manager, keylist) class MockResultsCache(ResultsCache): |