Various small cache cleanups.

Add the machine used for running the test to benchmark run; use the actual data from that machine for generating the cache entry for the run. Also make sure, when calculating the common checksums for looking machines up in the cache, that all the machines being used actually have the same checksum. Also add a cache_keys.txt file to the cache directory, to allow people to see what was used for generating the cache entry/checksums. BUG=None TEST=Tested in my directory. Change-Id: Ia4cf8316e920a24becf5d12fd4c8f9a3e30a180f Reviewed-on: https://chrome-internal-review.googlesource.com/228854 Commit-Ready: Caroline Tice <cmtice@google.com> Tested-by: Caroline Tice <cmtice@google.com> Reviewed-by: Yunlian Jiang <yunlian@google.com>
author: Caroline Tice <cmtice@google.com> 2015-09-02 12:36:47 -0700
committer: chrome-bot <chrome-bot@chromium.org> 2015-09-03 21:01:40 -0700
commit: 5ea9f006b5de0d882d5b51da243806b7cac69938 (patch)
tree: 388a72fa0792aaa7219672bbe0985c7cf67fb384 /crosperf
parent: 225fc17884a659647ed53ffd305b5e87533c18eb (diff)
download: toolchain-utils-5ea9f006b5de0d882d5b51da243806b7cac69938.tar.gz
5 files changed, 62 insertions, 60 deletions
diff --git a/crosperf/benchmark_run.py b/crosperf/benchmark_run.py
index dabbdeb2..c8b22000 100644
--- a/crosperf/benchmark_run.py
+++ b/crosperf/benchmark_run.py
@@ -78,6 +78,7 @@ class BenchmarkRun(threading.Thread):
                     self.test_args,
                     self.profiler_args,
                     self.machine_manager,
+                    self.machine,
                     self.label.board,
                     self.cache_conditions,
                     self._logger,
@@ -118,6 +119,7 @@ class BenchmarkRun(threading.Thread):
         self.timeline.Record(STATUS_WAITING)
         # Try to acquire a machine now.
         self.machine = self.AcquireMachine()
+        self.cache.machine = self.machine
         self.result = self.RunTest(self.machine)
 
         self.cache.remote = self.machine.name
@@ -261,6 +263,7 @@ class MockBenchmarkRun(BenchmarkRun):
                     self.test_args,
                     self.profiler_args,
                     self.machine_manager,
+                    self.machine,
                     self.label.board,
                     self.cache_conditions,
                     self._logger,
diff --git a/crosperf/experiment_runner.py b/crosperf/experiment_runner.py
index c40878b3..8ced8ccd 100644
--- a/crosperf/experiment_runner.py
+++ b/crosperf/experiment_runner.py
@@ -455,24 +455,8 @@ class Schedv2(object):
         # Termination flag.
         self._terminated = False
 
-    def _check_machines_are_same(self):
-        """Check if all machines are the same."""
-
-        for l in self._labels:
-            if not self._experiment.machine_manager.AreAllMachineSame(l):
-                self._l.LogError('All machines are NOT same for '
-                                'label "{}" ...'.format(l.name))
-                return False
-            self._l.LogOutput('Machines are same for '
-                              'label "{}" ...'.format(l.name))
-        return True
-
     def run_sched(self):
         """Start all dut worker threads and return immediately."""
-
-        if not self._check_machines_are_same():
-            raise RuntimeError("All machines are not same.")
-
         [w.start() for w in self._active_workers]
 
     def get_benchmark_run(self, dut):
diff --git a/crosperf/machine_manager.py b/crosperf/machine_manager.py
index 91fa1302..ed4f6ce9 100644
--- a/crosperf/machine_manager.py
+++ b/crosperf/machine_manager.py
@@ -22,7 +22,12 @@ from utils.file_utils import FileUtils
 
 CHECKSUM_FILE = "/usr/local/osimage_checksum_file"
 
-class NonMatchingMachines(Exception):
+class BadChecksum(Exception):
+  """Raised if all machines for a label don't have the same checksum."""
+  pass
+
+class BadChecksumString(Exception):
+  """Raised if all machines for a label don't have the same checksum string."""
   pass
 
 class MissingLocksDirectory(Exception):
@@ -270,12 +275,30 @@ class MachineManager(object):
     return retval
 
   def ComputeCommonCheckSum(self, label):
+    # Since this is used for cache lookups before the machines have been
+    # compared/verified, check here to make sure they all have the same
+    # checksum (otherwise the cache lookup may not be valid).
+    common_checksum = None
     for machine in self.GetMachines(label):
-      if machine.machine_checksum:
-        self.machine_checksum[label.name] = machine.machine_checksum
-        break
+      # Make sure the machine's checksums are calculated.
+      if not machine.machine_checksum:
+        machine.SetUpChecksumInfo()
+      cs = machine.machine_checksum
+      # If this is the first machine we've examined, initialize
+      # common_checksum.
+      if not common_checksum:
+        common_checksum = cs
+      # Make sure this machine's checksum matches our 'common' checksum.
+      if cs != common_checksum:
+        raise BadChecksum("Machine checksums do not match!")
+    self.machine_checksum[label.name] = common_checksum
 
   def ComputeCommonCheckSumString(self, label):
+    # The assumption is that this function is only called AFTER
+    # ComputeCommonCheckSum, so there is no need to verify the machines
+    # are the same here.  If this is ever changed, this function should be
+    # modified to verify that all the machines for a given label are the
+    # same.
     for machine in self.GetMachines(label):
       if machine.checksum_string:
         self.machine_checksum_string[label.name] = machine.checksum_string
@@ -318,11 +341,6 @@ class MachineManager(object):
         self._all_machines.append(cm)
 
 
-  def AreAllMachineSame(self, label):
-    checksums = [m.machine_checksum for m in self.GetMachines(label)]
-    return len(set(checksums)) == 1
-
-
   def RemoveMachine(self, machine_name):
     with self._lock:
       self._machines = [m for m in self._machines
@@ -352,14 +370,6 @@ class MachineManager(object):
           self._TryToLockMachine(m)
           if new_machine:
             m.released_time = time.time()
-        if not self.AreAllMachineSame(label):
-          if not throw:
-            # Log fatal message, which calls sys.exit.  Default behavior.
-            self.logger.LogFatal("-- not all the machines are identical")
-          else:
-            # Raise an exception, which can be caught and handled by calling
-            # function.
-            raise NonMatchingMachines("Not all the machines are identical")
         if self.GetAvailableMachines(label):
           break
         else:
diff --git a/crosperf/machine_manager_unittest.py b/crosperf/machine_manager_unittest.py
index 376f8b0e..f314cce5 100755
--- a/crosperf/machine_manager_unittest.py
+++ b/crosperf/machine_manager_unittest.py
@@ -99,13 +99,6 @@ class MachineManagerTest(unittest.TestCase):
     self.mm._all_machines.append(self.mock_lumpy3)
 
 
-  def testAreAllMachineSame(self):
-    manager = MyMachineManager(CHROMEOS_ROOT)
-    for m in MACHINE_NAMES:
-      manager.AddMachine(m)
-    self.assertEqual(manager.AreAllMachineSame(LABEL_LUMPY), True)
-    self.assertEqual(manager.AreAllMachineSame(LABEL_MIX), False)
-
   def testGetMachines(self):
     manager = MyMachineManager(CHROMEOS_ROOT)
     for m in MACHINE_NAMES:
@@ -292,14 +285,6 @@ class MachineManagerTest(unittest.TestCase):
     self.assertRaises(Exception, self.mm.AddMachine, 'lumpy1')
 
 
-  def test_are_all_machines_same(self):
-    result = self.mm.AreAllMachineSame(LABEL_LUMPY)
-    self.assertTrue(result)
-
-    result = self.mm.AreAllMachineSame(LABEL_MIX)
-    self.assertFalse(result)
-
-
   def test_remove_machine(self):
     self.mm._machines = self.mm._all_machines
     self.assertTrue(self.mock_lumpy2 in self.mm._machines)
diff --git a/crosperf/results_cache.py b/crosperf/results_cache.py
index 232f13bc..5d0f96e3 100644
--- a/crosperf/results_cache.py
+++ b/crosperf/results_cache.py
@@ -21,11 +21,14 @@ from utils import misc
 
 from image_checksummer import ImageChecksummer
 
+import results_report
+
 SCRATCH_DIR = os.path.expanduser("~/cros_scratch")
 RESULTS_FILE = "results.txt"
 MACHINE_FILE = "machine.txt"
 AUTOTEST_TARBALL = "autotest.tbz2"
 PERF_RESULTS_FILE = "perf-results.txt"
+CACHE_KEYS_FILE = "cache_keys.txt"
 
 class Result(object):
   """ This class manages what exactly is stored inside the cache without knowing
@@ -317,7 +320,7 @@ class Result(object):
       command = "rm -rf %s" % self._temp_dir
       self._ce.RunCommand(command)
 
-  def StoreToCacheDir(self, cache_dir, machine_manager):
+  def StoreToCacheDir(self, cache_dir, machine_manager, key_list):
     # Create the dir if it doesn't exist.
     temp_dir = tempfile.mkdtemp()
 
@@ -327,6 +330,11 @@ class Result(object):
       pickle.dump(self.err, f)
       pickle.dump(self.retval, f)
 
+    with open(os.path.join(temp_dir, CACHE_KEYS_FILE), "w") as f:
+      for k in key_list:
+        f.write(k)
+        f.write("\n")
+
     if self.results_dir:
       tarball = os.path.join(temp_dir, AUTOTEST_TARBALL)
       command = ("cd %s && "
@@ -469,9 +477,9 @@ class ResultsCache(object):
   CACHE_VERSION = 6
 
   def Init(self, chromeos_image, chromeos_root, test_name, iteration,
-           test_args, profiler_args, machine_manager, board, cache_conditions,
-           logger_to_use, log_level, label, share_cache, suite,
-           show_all_results, run_local):
+           test_args, profiler_args, machine_manager, machine, board,
+           cache_conditions, logger_to_use, log_level, label, share_cache,
+           suite, show_all_results, run_local):
     self.chromeos_image = chromeos_image
     self.chromeos_root = chromeos_root
     self.test_name = test_name
@@ -481,6 +489,7 @@ class ResultsCache(object):
     self.board = board
     self.cache_conditions = cache_conditions
     self.machine_manager = machine_manager
+    self.machine = machine
     self._logger = logger_to_use
     self._ce = command_executer.GetCommandExecuter(self._logger,
                                                    log_level=log_level)
@@ -502,8 +511,16 @@ class ResultsCache(object):
     else:
       return None
 
-  def _GetCacheDirForWrite(self):
-    return self._FormCacheDir(self._GetCacheKeyList(False))[0]
+  def _GetCacheDirForWrite(self, get_keylist=False):
+    cache_path = self._FormCacheDir(self._GetCacheKeyList(False))[0]
+    if get_keylist:
+      args_str = "%s_%s_%s" % (self.test_args, self.profiler_args, self.run_local)
+      version, image = results_report.ParseChromeosImage(self.label.chromeos_image)
+      keylist = [ version, image, self.label.board,
+                  self.machine.name, self.test_name, str(self.iteration),
+                  args_str]
+      return cache_path, keylist
+    return cache_path
 
   def _FormCacheDir(self, list_of_strings):
     cache_key = " ".join(list_of_strings)
@@ -546,10 +563,13 @@ class ResultsCache(object):
     if read and CacheConditions.SAME_MACHINE_MATCH not in self.cache_conditions:
       machine_id_checksum = "*"
     else:
-      for machine in self.machine_manager.GetMachines(self.label):
-        if machine.name == self.label.remote[0]:
-          machine_id_checksum = machine.machine_id_checksum
-          break
+      if self.machine and self.machine.name in self.label.remote:
+        machine_id_checksum = self.machine.machine_id_checksum
+      else:
+        for machine in self.machine_manager.GetMachines(self.label):
+          if machine.name == self.label.remote[0]:
+            machine_id_checksum = machine.machine_id_checksum
+            break
 
     temp_test_args = "%s %s %s" % (self.test_args, self.profiler_args, self.run_local)
     test_args_checksum = hashlib.md5(
@@ -594,8 +614,8 @@ class ResultsCache(object):
     return None
 
   def StoreResult(self, result):
-    cache_dir = self._GetCacheDirForWrite()
-    result.StoreToCacheDir(cache_dir, self.machine_manager)
+    cache_dir, keylist = self._GetCacheDirForWrite(get_keylist=True)
+    result.StoreToCacheDir(cache_dir, self.machine_manager, keylist)
 
 
 class MockResultsCache(ResultsCache):
author	Caroline Tice <cmtice@google.com>	2015-09-02 12:36:47 -0700
committer	chrome-bot <chrome-bot@chromium.org>	2015-09-03 21:01:40 -0700
commit	5ea9f006b5de0d882d5b51da243806b7cac69938 (patch)
tree	388a72fa0792aaa7219672bbe0985c7cf67fb384 /crosperf
parent	225fc17884a659647ed53ffd305b5e87533c18eb (diff)
download	toolchain-utils-5ea9f006b5de0d882d5b51da243806b7cac69938.tar.gz