aboutsummaryrefslogtreecommitdiff
path: root/crosperf/machine_manager.py
diff options
context:
space:
mode:
authorcmtice <cmtice@google.com>2014-05-12 13:56:42 -0700
committerchrome-internal-fetch <chrome-internal-fetch@google.com>2014-05-21 04:07:47 +0000
commit798a8fa986db930786b2a6777db0f3b06db995f6 (patch)
treefeacec28548e4a53f6961cc443915937105a35cf /crosperf/machine_manager.py
parentdf8053336feea245a6bb43fe16dbc16a7b3d71cd (diff)
downloadtoolchain-utils-798a8fa986db930786b2a6777db0f3b06db995f6.tar.gz
Better handling of "not identical machines" failure.
Nightly tests, especially on the x86-generic boxes, fail frequently because the DUTs have different board images on them, so Crosperf decides the machines are not identical and refuses to run the tests. With this CL, if the machine_manager finds that the machines fail the "identical" test, it will try to push the same image onto all the machines, and then check them again to see if they are the same. It only tries this once; if they fail the check the second time around, it is still a fatal failure. This should eliminate many of the unnecessary failures in our nightly tests. This CL also fixes a small bug in the auto-delete script (this fix has been running for a while on mobiletc-prebuild, but never got committed). BUG=None TEST=Ran several iterations where I forced the first "identical" check to fail. The changes worked. Change-Id: Ied2a55e5d3e2789e58a503aef03269888954b579 Reviewed-on: https://chrome-internal-review.googlesource.com/163334 Reviewed-by: Luis Lozano <llozano@chromium.org> Commit-Queue: Caroline Tice <cmtice@google.com> Tested-by: Caroline Tice <cmtice@google.com>
Diffstat (limited to 'crosperf/machine_manager.py')
-rw-r--r--crosperf/machine_manager.py23
1 files changed, 21 insertions, 2 deletions
diff --git a/crosperf/machine_manager.py b/crosperf/machine_manager.py
index 52c3d818..04a4eec7 100644
--- a/crosperf/machine_manager.py
+++ b/crosperf/machine_manager.py
@@ -22,6 +22,8 @@ from image_checksummer import ImageChecksummer
CHECKSUM_FILE = "/usr/local/osimage_checksum_file"
+class NonMatchingMachines(Exception):
+ pass
class CrosMachine(object):
def __init__(self, name, chromeos_root, log_level):
@@ -33,6 +35,9 @@ class CrosMachine(object):
self.test_run = None
self.chromeos_root = chromeos_root
self.log_level = log_level
+ self.SetUpChecksumInfo()
+
+ def SetUpChecksumInfo(self):
if not self.IsReachable():
self.machine_checksum = None
return
@@ -288,6 +293,7 @@ class MachineManager(object):
checksums = [m.machine_checksum for m in self.GetMachines(label)]
return len(set(checksums)) == 1
+
def RemoveMachine(self, machine_name):
with self._lock:
self._machines = [m for m in self._machines
@@ -297,7 +303,14 @@ class MachineManager(object):
logger.GetLogger().LogError("Could not unlock machine: '%s'."
% m.name)
- def AcquireMachine(self, chromeos_image, label):
+ def ForceSameImageToAllMachines(self, label):
+ machines = self.GetMachines(label)
+ chromeos_image = label.chromeos_image
+ for m in machines:
+ self.ImageMachine(m, label)
+ m.SetUpChecksumInfo()
+
+ def AcquireMachine(self, chromeos_image, label, throw=False):
if label.image_type == "local":
image_checksum = ImageChecksummer().Checksum(label, self.log_level)
elif label.image_type == "trybot":
@@ -315,7 +328,13 @@ class MachineManager(object):
if new_machine:
m.released_time = time.time()
if not self.AreAllMachineSame(label):
- logger.GetLogger().LogFatal("-- not all the machine are identical")
+ if not throw:
+ # Log fatal message, which calls sys.exit. Default behavior.
+ logger.GetLogger().LogFatal("-- not all the machines are identical")
+ else:
+ # Raise an exception, which can be caught and handled by calling
+ # function.
+ raise NonMatchingMachines("Not all the machines are identical")
if self.GetAvailableMachines(label):
break
else: