# -*- coding: utf-8 -*- # Copyright 2013 The ChromiumOS Authors # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. """The experiment setting module.""" import os from threading import Lock import time import benchmark_run from cros_utils import logger from cros_utils import misc from machine_manager import BadChecksum from machine_manager import MachineManager from machine_manager import MockMachineManager import test_flag class Experiment(object): """Class representing an Experiment to be run.""" def __init__( self, name, remote, working_directory, chromeos_root, cache_conditions, labels, benchmarks, experiment_file, email_to, acquire_timeout, log_dir, log_level, share_cache, results_directory, compress_results, locks_directory, cwp_dso, ignore_min_max, crosfleet, dut_config, no_lock: bool, ): self.name = name self.working_directory = working_directory self.remote = remote self.chromeos_root = chromeos_root self.cache_conditions = cache_conditions self.experiment_file = experiment_file self.email_to = email_to if not results_directory: self.results_directory = os.path.join( self.working_directory, self.name + "_results" ) else: self.results_directory = misc.CanonicalizePath(results_directory) self.compress_results = compress_results self.log_dir = log_dir self.log_level = log_level self.labels = labels self.benchmarks = benchmarks self.num_complete = 0 self.num_run_complete = 0 self.share_cache = share_cache self.active_threads = [] self.locks_dir = locks_directory self.locked_machines = [] self.lock_mgr = None self.cwp_dso = cwp_dso self.ignore_min_max = ignore_min_max self.crosfleet = crosfleet self.no_lock = no_lock self.l = logger.GetLogger(log_dir) if not self.benchmarks: raise RuntimeError("No benchmarks specified") if not self.labels: raise RuntimeError("No labels specified") if not remote and not self.crosfleet: raise RuntimeError("No remote hosts specified") # We need one chromeos_root to run the benchmarks in, but it doesn't # matter where it is, unless the ABIs are different. if not chromeos_root: for label in self.labels: if label.chromeos_root: chromeos_root = label.chromeos_root break if not chromeos_root: raise RuntimeError( "No chromeos_root given and could not determine " "one from the image path." ) machine_manager_fn = MachineManager if test_flag.GetTestMode(): machine_manager_fn = MockMachineManager self.machine_manager = machine_manager_fn( chromeos_root, acquire_timeout, log_level, locks_directory ) self.l = logger.GetLogger(log_dir) for machine in self.remote: # machine_manager.AddMachine only adds reachable machines. self.machine_manager.AddMachine(machine) # Now machine_manager._all_machines contains a list of reachable # machines. This is a subset of self.remote. We make both lists the same. self.remote = [m.name for m in self.machine_manager.GetAllMachines()] if not self.remote: raise RuntimeError("No machine available for running experiment.") # Initialize checksums for all machines, ignore errors at this time. # The checksum will be double checked, and image will be flashed after # duts are locked/leased. self.SetCheckSums() self.start_time = None self.benchmark_runs = self._GenerateBenchmarkRuns(dut_config) self._schedv2 = None self._internal_counter_lock = Lock() def set_schedv2(self, schedv2): self._schedv2 = schedv2 def schedv2(self): return self._schedv2 def _GenerateBenchmarkRuns(self, dut_config): """Generate benchmark runs from labels and benchmark defintions.""" benchmark_runs = [] for label in self.labels: for benchmark in self.benchmarks: for iteration in range(1, benchmark.iterations + 1): benchmark_run_name = "%s: %s (%s)" % ( label.name, benchmark.name, iteration, ) full_name = "%s_%s_%s" % ( label.name, benchmark.name, iteration, ) logger_to_use = logger.Logger( self.log_dir, "run.%s" % (full_name), True ) benchmark_runs.append( benchmark_run.BenchmarkRun( benchmark_run_name, benchmark, label, iteration, self.cache_conditions, self.machine_manager, logger_to_use, self.log_level, self.share_cache, dut_config, ) ) return benchmark_runs def SetCheckSums(self, forceSameImage=False): for label in self.labels: # We filter out label remotes that are not reachable (not in # self.remote). So each label.remote is a sublist of experiment.remote. label.remote = [r for r in label.remote if r in self.remote] try: self.machine_manager.ComputeCommonCheckSum(label) except BadChecksum: # Force same image on all machines, then we do checksum again. No # bailout if checksums still do not match. # TODO (zhizhouy): Need to figure out how flashing image will influence # the new checksum. if forceSameImage: self.machine_manager.ForceSameImageToAllMachines(label) self.machine_manager.ComputeCommonCheckSum(label) self.machine_manager.ComputeCommonCheckSumString(label) def Build(self): pass def Terminate(self): if self._schedv2 is not None: self._schedv2.terminate() else: for t in self.benchmark_runs: if t.isAlive(): self.l.LogError("Terminating run: '%s'." % t.name) t.Terminate() def IsComplete(self): if self._schedv2: return self._schedv2.is_complete() if self.active_threads: for t in self.active_threads: if t.isAlive(): t.join(0) if not t.isAlive(): self.num_complete += 1 if not t.cache_hit: self.num_run_complete += 1 self.active_threads.remove(t) return False return True def BenchmarkRunFinished(self, br): """Update internal counters after br finishes. Note this is only used by schedv2 and is called by multiple threads. Never throw any exception here. """ assert self._schedv2 is not None with self._internal_counter_lock: self.num_complete += 1 if not br.cache_hit: self.num_run_complete += 1 def Run(self): self.start_time = time.time() if self._schedv2 is not None: self._schedv2.run_sched() else: self.active_threads = [] for run in self.benchmark_runs: # Set threads to daemon so program exits when ctrl-c is pressed. run.daemon = True run.start() self.active_threads.append(run) def SetCacheConditions(self, cache_conditions): for run in self.benchmark_runs: run.SetCacheConditions(cache_conditions) def Cleanup(self): """Make sure all machines are unlocked.""" if self.locks_dir: # We are using the file locks mechanism, so call machine_manager.Cleanup # to unlock everything. self.machine_manager.Cleanup() if test_flag.GetTestMode() or not self.locked_machines: return # If we locked any machines earlier, make sure we unlock them now. if self.lock_mgr: machine_states = self.lock_mgr.GetMachineStates("unlock") self.lock_mgr.CheckMachineLocks(machine_states, "unlock") unlocked_machines = self.lock_mgr.UpdateMachines(False) failed_machines = [ m for m in self.locked_machines if m not in unlocked_machines ] if failed_machines: raise RuntimeError( "These machines are not unlocked correctly: %s" % failed_machines ) self.lock_mgr = None