diff options
32 files changed, 2769 insertions, 359 deletions
diff --git a/crosperf/benchmark.py b/crosperf/benchmark.py index a75bd8e3..bc7f1fa8 100644 --- a/crosperf/benchmark.py +++ b/crosperf/benchmark.py @@ -21,3 +21,4 @@ class Benchmark(object): self.iterations = iterations self.outlier_range = outlier_range self.perf_args = perf_args + self.iteration_adjusted = False diff --git a/crosperf/benchmark_run.py b/crosperf/benchmark_run.py index 7579b6c2..dc837937 100644 --- a/crosperf/benchmark_run.py +++ b/crosperf/benchmark_run.py @@ -4,16 +4,16 @@ import datetime import os -import re import threading import time import traceback +from utils import command_executer +from utils import timeline + from autotest_runner import AutotestRunner from results_cache import Result from results_cache import ResultsCache -from utils import command_executer -from utils import logger STATUS_FAILED = "FAILED" STATUS_SUCCEEDED = "SUCCEEDED" @@ -24,53 +24,50 @@ STATUS_PENDING = "PENDING" class BenchmarkRun(threading.Thread): - def __init__(self, name, benchmark_name, autotest_name, autotest_args, - label_name, chromeos_root, chromeos_image, board, iteration, - cache_conditions, outlier_range, perf_args, + def __init__(self, name, benchmark, + label, + iteration, + cache_conditions, machine_manager, logger_to_use): threading.Thread.__init__(self) self.name = name self._logger = logger_to_use - self.benchmark_name = benchmark_name - self.autotest_name = autotest_name - self.label_name = label_name - self.chromeos_root = chromeos_root - self.chromeos_image = os.path.expanduser(chromeos_image) - self.board = board + self.benchmark = benchmark self.iteration = iteration + self.label = label self.result = None self.terminated = False self.retval = None - self.status = STATUS_PENDING self.run_completed = False - self.outlier_range = outlier_range - self.perf_args = perf_args self.machine_manager = machine_manager self.cache = ResultsCache() self.autotest_runner = AutotestRunner(self._logger) self.machine = None - self.full_name = self.autotest_name self.cache_conditions = cache_conditions self.runs_complete = 0 self.cache_hit = False self.failure_reason = "" - self.autotest_args = "%s %s" % (autotest_args, self._GetExtraAutotestArgs()) + self.autotest_args = "%s %s" % (benchmark.autotest_args, + self._GetExtraAutotestArgs()) self._ce = command_executer.GetCommandExecuter(self._logger) + self.timeline = timeline.Timeline() + self.timeline.Record(STATUS_PENDING) def run(self): try: # Just use the first machine for running the cached version, # without locking it. - self.cache.Init(self.chromeos_image, - self.chromeos_root, - self.autotest_name, + self.cache.Init(self.label.chromeos_image, + self.label.chromeos_root, + self.benchmark.autotest_name, self.iteration, self.autotest_args, self.machine_manager, - self.board, + self.label.board, self.cache_conditions, self._logger, + self.label ) self.result = self.cache.ReadResult() @@ -78,10 +75,12 @@ class BenchmarkRun(threading.Thread): if self.result: self._logger.LogOutput("%s: Cache hit." % self.name) - self._logger.LogOutput(self.result.out + "\n" + self.result.err) + self._logger.LogOutput(self.result.out, print_to_console=False) + self._logger.LogError(self.result.err, print_to_console=False) + else: self._logger.LogOutput("%s: No cache hit." % self.name) - self.status = STATUS_WAITING + self.timeline.Record(STATUS_WAITING) # Try to acquire a machine now. self.machine = self.AcquireMachine() self.cache.remote = self.machine.name @@ -92,17 +91,17 @@ class BenchmarkRun(threading.Thread): return if not self.result.retval: - self.status = STATUS_SUCCEEDED + self.timeline.Record(STATUS_SUCCEEDED) else: - if self.status != STATUS_FAILED: - self.status = STATUS_FAILED + if self.timeline.GetLastEvent() != STATUS_FAILED: self.failure_reason = "Return value of autotest was non-zero." + self.timeline.Record(STATUS_FAILED) except Exception, e: self._logger.LogError("Benchmark run: '%s' failed: %s" % (self.name, e)) traceback.print_exc() - if self.status != STATUS_FAILED: - self.status = STATUS_FAILED + if self.timeline.GetLastEvent() != STATUS_FAILED: + self.timeline.Record(STATUS_FAILED) self.failure_reason = str(e) finally: if self.machine: @@ -113,15 +112,17 @@ class BenchmarkRun(threading.Thread): def Terminate(self): self.terminated = True self.autotest_runner.Terminate() - if self.status != STATUS_FAILED: - self.status = STATUS_FAILED + if self.timeline.GetLastEvent() != STATUS_FAILED: + self.timeline.Record(STATUS_FAILED) self.failure_reason = "Thread terminated." def AcquireMachine(self): while True: if self.terminated: raise Exception("Thread terminated while trying to acquire machine.") - machine = self.machine_manager.AcquireMachine(self.chromeos_image) + machine = self.machine_manager.AcquireMachine(self.label.chromeos_image, + self.label) + if machine: self._logger.LogOutput("%s: Machine %s acquired at %s" % (self.name, @@ -134,8 +135,8 @@ class BenchmarkRun(threading.Thread): return machine def _GetExtraAutotestArgs(self): - if self.perf_args: - perf_args_list = self.perf_args.split(" ") + if self.benchmark.perf_args: + perf_args_list = self.benchmark.perf_args.split(" ") perf_args_list = [perf_args_list[0]] + ["-a"] + perf_args_list[1:] perf_args = " ".join(perf_args_list) if not perf_args_list[0] in ["record", "stat"]: @@ -148,24 +149,47 @@ class BenchmarkRun(threading.Thread): return "" def RunTest(self, machine): - self.status = STATUS_IMAGING + self.timeline.Record(STATUS_IMAGING) self.machine_manager.ImageMachine(machine, - self.chromeos_image, - self.board) - self.status = "%s %s" % (STATUS_RUNNING, self.autotest_name) + self.label) + self.timeline.Record(STATUS_RUNNING) [retval, out, err] = self.autotest_runner.Run(machine.name, - self.chromeos_root, - self.board, - self.autotest_name, + self.label.chromeos_root, + self.label.board, + self.benchmark.autotest_name, self.autotest_args) self.run_completed = True return Result.CreateFromRun(self._logger, - self.chromeos_root, - self.board, + self.label.chromeos_root, + self.label.board, + self.label.name, out, err, retval) def SetCacheConditions(self, cache_conditions): self.cache_conditions = cache_conditions + + +class MockBenchmarkRun(BenchmarkRun): + """Inherited from BenchmarkRun, just overide RunTest for testing.""" + + def RunTest(self, machine): + """Remove Result.CreateFromRun for testing.""" + self.timeline.Record(STATUS_IMAGING) + self.machine_manager.ImageMachine(machine, + self.label) + self.timeline.Record(STATUS_RUNNING) + [retval, out, err] = self.autotest_runner.Run(machine.name, + self.label.chromeos_root, + self.label.board, + self.benchmark.autotest_name, + self.autotest_args) + self.run_completed = True + rr = Result("Results placed in /tmp/test", "", 0) + rr.out = out + rr.err = err + rr.retval = retval + return rr + diff --git a/crosperf/benchmark_run_unittest.py b/crosperf/benchmark_run_unittest.py index c4c3fdf1..c4670c9c 100755 --- a/crosperf/benchmark_run_unittest.py +++ b/crosperf/benchmark_run_unittest.py @@ -1,38 +1,42 @@ #!/usr/bin/python -# Copyright (c) 2011 The Chromium OS Authors. All rights reserved. -# Use of this source code is governed by a BSD-style license that can be -# found in the LICENSE file. +# Copyright 2011 Google Inc. All Rights Reserved. + +"""Testing of benchmark_run.""" import unittest + +from utils import logger + from autotest_runner import MockAutotestRunner -from benchmark_run import BenchmarkRun +from benchmark_run import MockBenchmarkRun +from label import MockLabel +from benchmark import Benchmark from machine_manager import MockMachineManager from results_cache import MockResultsCache -from utils import logger class BenchmarkRunTest(unittest.TestCase): def testDryRun(self): - m = MockMachineManager() + my_label = MockLabel("test1", "image1", "/tmp/test_benchmark_run", + "x86-alex", "chromeos-alex1", "") + m = MockMachineManager("/tmp/chromeos_root") m.AddMachine("chromeos-alex1") - b = BenchmarkRun("test run", - "PageCycler", - "PageCycler", - "", - "image1", - "/tmp/test", - "/tmp/test/image", - "x86-alex", - 1, - [], - 0.2, - "", - "none", - m, - MockResultsCache(), - MockAutotestRunner(), - logger.GetLogger()) + bench = Benchmark("PageCyler", + "Pyautoperf", + "", + 1, + 0.2, + "") + b = MockBenchmarkRun("test run", + bench, + my_label, + 1, + [], + m, + logger.GetLogger()) + b.cache = MockResultsCache() + b.autotest_runner = MockAutotestRunner() b.start() diff --git a/crosperf/crosperf b/crosperf/crosperf index 286bf25a..904a172a 100755 --- a/crosperf/crosperf +++ b/crosperf/crosperf @@ -1,2 +1,2 @@ #!/bin/bash -PYTHONPATH=$PYTHONPATH:$(dirname $0)/.. python $(dirname $0)/crosperf.py "$@" +PYTHONPATH=$(dirname $0)/..:$PYTHONPATH python $(dirname $0)/crosperf.py "$@" diff --git a/crosperf/crosperf.py b/crosperf/crosperf.py index 24699338..cfb48d7c 100755 --- a/crosperf/crosperf.py +++ b/crosperf/crosperf.py @@ -16,6 +16,7 @@ from help import Help from settings_factory import GlobalSettings from utils import logger +import test_flag l = logger.GetLogger() @@ -77,6 +78,9 @@ def Main(argv): parser.error("Invalid number arguments.") working_directory = os.getcwd() + if options.dry_run: + test_flag.SetTestMode(True) + experiment_file = ExperimentFile(open(experiment_filename, "rb"), option_settings) if not experiment_file.GetGlobalSettings().GetField("name"): diff --git a/crosperf/default_remotes b/crosperf/default_remotes new file mode 100644 index 00000000..5efaebcd --- /dev/null +++ b/crosperf/default_remotes @@ -0,0 +1,5 @@ +x86-zgb : chromeos1-rack3-host4.cros chromeos1-rack3-host5.cros chromeos1-rack3-host6.cros +x86-alex : chromeos2-row1-rack4-host7.cros chromeos2-row1-rack4-host8.cros chromeos2-row1-rack4-host9.cros +lumpy : chromeos2-row2-rack4-host10.cros chromeos2-row2-rack4-host11.cros chromeos2-row2-rack4-host12.cros +stumpy : chromeos2-row3-rack7-host1.cros chromeos2-row3-rack7-host2.cros chromeos2-row3-rack7-host3.cros + diff --git a/crosperf/experiment.py b/crosperf/experiment.py index 7b48344c..e9dc3d07 100644 --- a/crosperf/experiment.py +++ b/crosperf/experiment.py @@ -2,15 +2,20 @@ # Copyright 2011 Google Inc. All Rights Reserved. +"""The experiment setting module.""" + import os import time + +from utils import logger + from autotest_runner import AutotestRunner from benchmark_run import BenchmarkRun from machine_manager import MachineManager +from machine_manager import MockMachineManager from results_cache import ResultsCache from results_report import HTMLResultsReport -from utils import logger -from utils.file_utils import FileUtils +import test_flag class Experiment(object): @@ -33,6 +38,7 @@ class Experiment(object): self.labels = labels self.benchmarks = benchmarks self.num_complete = 0 + self.num_run_complete = 0 # We need one chromeos_root to run the benchmarks in, but it doesn't # matter where it is, unless the ABIs are different. @@ -44,13 +50,17 @@ class Experiment(object): raise Exception("No chromeos_root given and could not determine one from " "the image path.") - self.machine_manager = MachineManager(chromeos_root) + if test_flag.GetTestMode(): + self.machine_manager = MockMachineManager(chromeos_root) + else: + self.machine_manager = MachineManager(chromeos_root) self.l = logger.GetLogger() for machine in remote: self.machine_manager.AddMachine(machine) - self.machine_manager.ComputeCommonCheckSum() - self.machine_manager.ComputeCommonCheckSumString() + for label in labels: + self.machine_manager.ComputeCommonCheckSum(label) + self.machine_manager.ComputeCommonCheckSumString(label) self.start_time = None self.benchmark_runs = self._GenerateBenchmarkRuns() @@ -69,17 +79,10 @@ class Experiment(object): "run.%s" % (full_name), True) benchmark_run = BenchmarkRun(benchmark_run_name, - benchmark.name, - benchmark.autotest_name, - benchmark.autotest_args, - label.name, - label.chromeos_root, - label.chromeos_image, - label.board, + benchmark, + label, iteration, self.cache_conditions, - benchmark.outlier_range, - benchmark.perf_args, self.machine_manager, logger_to_use) @@ -102,6 +105,8 @@ class Experiment(object): t.join(0) if not t.isAlive(): self.num_complete += 1 + if not t.cache_hit: + self.num_run_complete += 1 self.active_threads.remove(t) return False return True diff --git a/crosperf/experiment_factory.py b/crosperf/experiment_factory.py index 5c21179e..bd3076dd 100644 --- a/crosperf/experiment_factory.py +++ b/crosperf/experiment_factory.py @@ -1,11 +1,17 @@ #!/usr/bin/python # Copyright 2011 Google Inc. All Rights Reserved. +"""A module to generate experments.""" + +import os +import socket from benchmark import Benchmark from experiment import Experiment from label import Label +from label import MockLabel from results_cache import CacheConditions +import test_flag class ExperimentFactory(object): @@ -32,7 +38,9 @@ class ExperimentFactory(object): cache_conditions.append(CacheConditions.RUN_SUCCEEDED) if global_settings.GetField("rerun"): cache_conditions.append(CacheConditions.FALSE) - if global_settings.GetField("exact_remote"): + if global_settings.GetField("same_machine"): + cache_conditions.append(CacheConditions.SAME_MACHINE_MATCH) + if global_settings.GetField("same_specs"): cache_conditions.append(CacheConditions.MACHINES_MATCH) # Construct benchmarks. @@ -54,20 +62,61 @@ class ExperimentFactory(object): # Construct labels. labels = [] all_label_settings = experiment_file.GetSettings("label") + all_remote = list(remote) for label_settings in all_label_settings: label_name = label_settings.name image = label_settings.GetField("chromeos_image") chromeos_root = label_settings.GetField("chromeos_root") board = label_settings.GetField("board") - label = Label(label_name, image, chromeos_root, board) + my_remote = label_settings.GetField("remote") + # TODO(yunlian): We should consolidate code in machine_manager.py + # to derermine whether we are running from within google or not + if ("corp.google.com" in socket.gethostname() and + (not my_remote + or my_remote == remote + and global_settings.GetField("board") != board)): + my_remote = self.GetDefaultRemotes(board) + if global_settings.GetField("same_machine") and len(my_remote) > 1: + raise Exception("Only one remote is allowed when same_machine " + "is turned on") + all_remote += my_remote + image_args = label_settings.GetField("image_args") + if test_flag.GetTestMode(): + label = MockLabel(label_name, image, chromeos_root, board, my_remote, + image_args) + else: + label = Label(label_name, image, chromeos_root, board, my_remote, + image_args) labels.append(label) email = global_settings.GetField("email") - - experiment = Experiment(experiment_name, remote, rerun_if_failed, + all_remote = list(set(all_remote)) + experiment = Experiment(experiment_name, all_remote, rerun_if_failed, working_directory, chromeos_root, cache_conditions, labels, benchmarks, experiment_file.Canonicalize(), email) return experiment + + def GetDefaultRemotes(self, board): + default_remotes_file = os.path.join(os.path.dirname(__file__), + "default_remotes") + try: + with open(default_remotes_file) as f: + for line in f: + key, v = line.split(":") + if key.strip() == board: + remotes = v.strip().split(" ") + if remotes: + return remotes + else: + raise Exception("There is not remote for {0}".format(board)) + except IOError: + raise Exception("IOError while reading file {0}" + .format(default_remotes_file)) + else: + raise Exception("There is not remote for {0}".format(board)) + + + diff --git a/crosperf/experiment_factory_unittest.py b/crosperf/experiment_factory_unittest.py index e91295da..fa943519 100755 --- a/crosperf/experiment_factory_unittest.py +++ b/crosperf/experiment_factory_unittest.py @@ -4,10 +4,12 @@ import StringIO import unittest -from experiment_factory import ExperimentFactory -from experiment_file import ExperimentFile + from utils.file_utils import FileUtils +from experiment_factory import ExperimentFactory +from experiment_file import ExperimentFile +import test_flag EXPERIMENT_FILE_1 = """ board: x86-alex @@ -47,4 +49,5 @@ class ExperimentFactoryTest(unittest.TestCase): if __name__ == "__main__": FileUtils.Configure(True) + test_flag.SetTestMode(True) unittest.main() diff --git a/crosperf/experiment_file_unittest.py b/crosperf/experiment_file_unittest.py index 67da11e5..d08c7eb5 100755 --- a/crosperf/experiment_file_unittest.py +++ b/crosperf/experiment_file_unittest.py @@ -11,7 +11,7 @@ from experiment_file import ExperimentFile EXPERIMENT_FILE_1 = """ board: x86-alex remote: chromeos-alex3 - + perf_args: record -a -e cycles benchmark: PageCycler { iterations: 3 } @@ -21,6 +21,8 @@ EXPERIMENT_FILE_1 = """ } image2 { + board: lumpy + remote: chromeos-lumpy1 chromeos_image: /usr/local/google/cros_image2.bin } """ @@ -70,7 +72,8 @@ class ExperimentFileTest(unittest.TestCase): experiment_file = ExperimentFile(input_file) global_settings = experiment_file.GetGlobalSettings() self.assertEqual(global_settings.GetField("remote"), ["chromeos-alex3"]) - + self.assertEqual(global_settings.GetField("perf_args"), + "record -a -e cycles") benchmark_settings = experiment_file.GetSettings("benchmark") self.assertEqual(len(benchmark_settings), 1) self.assertEqual(benchmark_settings[0].name, "PageCycler") @@ -82,6 +85,8 @@ class ExperimentFileTest(unittest.TestCase): self.assertEqual(label_settings[0].GetField("board"), "x86-alex") self.assertEqual(label_settings[0].GetField("chromeos_image"), "/usr/local/google/cros_image1.bin") + self.assertEqual(label_settings[1].GetField("remote"), ["chromeos-lumpy1"]) + self.assertEqual(label_settings[0].GetField("remote"), ["chromeos-alex3"]) def testOverrideSetting(self): input_file = StringIO.StringIO(EXPERIMENT_FILE_2) diff --git a/crosperf/experiment_runner.py b/crosperf/experiment_runner.py index 4219c435..b905bbdc 100644 --- a/crosperf/experiment_runner.py +++ b/crosperf/experiment_runner.py @@ -2,19 +2,24 @@ # Copyright 2011 Google Inc. All Rights Reserved. +"""The experiment runner module.""" import getpass import os import time -from experiment_status import ExperimentStatus -from results_report import HTMLResultsReport -from results_report import TextResultsReport + from utils import command_executer from utils import logger from utils.email_sender import EmailSender from utils.file_utils import FileUtils +from experiment_status import ExperimentStatus +from results_report import HTMLResultsReport +from results_report import TextResultsReport + class ExperimentRunner(object): + """ExperimentRunner Class.""" + STATUS_TIME_DELAY = 30 THREAD_MONITOR_DELAY = 2 @@ -95,6 +100,7 @@ class ExperimentRunner(object): benchmark_run_path = os.path.join(results_directory, benchmark_run_name) benchmark_run.result.CopyResultsTo(benchmark_run_path) + benchmark_run.result.CleanUp() def Run(self): self._Run(self._experiment) @@ -105,6 +111,8 @@ class ExperimentRunner(object): class MockExperimentRunner(ExperimentRunner): + """Mocked ExperimentRunner for testing.""" + def __init__(self, experiment): super(MockExperimentRunner, self).__init__(experiment) diff --git a/crosperf/experiment_status.py b/crosperf/experiment_status.py index ddf3f54a..3a270663 100644 --- a/crosperf/experiment_status.py +++ b/crosperf/experiment_status.py @@ -2,14 +2,20 @@ # Copyright 2011 Google Inc. All Rights Reserved. +"""The class to show the banner.""" + import datetime import time class ExperimentStatus(object): + """The status class.""" + def __init__(self, experiment): self.experiment = experiment self.num_total = len(self.experiment.benchmark_runs) + self.completed = 0 + self.new_job_start_time = time.time() def _GetProgressBar(self, num_complete, num_total): ret = "Done: %s%%" % int(100.0 * num_complete / num_total) @@ -23,14 +29,23 @@ class ExperimentStatus(object): return ret def GetProgressString(self): + """Get the elapsed_time, ETA.""" current_time = time.time() if self.experiment.start_time: elapsed_time = current_time - self.experiment.start_time else: elapsed_time = 0 try: - eta_seconds = (float(self.num_total - self.experiment.num_complete) * - elapsed_time / self.experiment.num_complete) + if self.completed != self.experiment.num_complete: + self.completed = self.experiment.num_complete + self.new_job_start_time = current_time + time_completed_jobs = (elapsed_time - + (current_time - self.new_job_start_time)) + eta_seconds = (float(self.num_total - self.experiment.num_complete -1) * + time_completed_jobs / self.experiment.num_run_complete + + (time_completed_jobs / self.experiment.num_run_complete + - (current_time - self.new_job_start_time))) + eta_seconds = int(eta_seconds) eta = datetime.timedelta(seconds=eta_seconds) except ZeroDivisionError: @@ -45,11 +60,12 @@ class ExperimentStatus(object): return "\n".join(strings) def GetStatusString(self): + """Get the status string of all the benchmark_runs.""" status_bins = {} for benchmark_run in self.experiment.benchmark_runs: - if benchmark_run.status not in status_bins: - status_bins[benchmark_run.status] = [] - status_bins[benchmark_run.status].append(benchmark_run) + if benchmark_run.timeline.GetLastEvent() not in status_bins: + status_bins[benchmark_run.timeline.GetLastEvent()] = [] + status_bins[benchmark_run.timeline.GetLastEvent()].append(benchmark_run) status_strings = [] for key, val in status_bins.items(): @@ -64,6 +80,9 @@ class ExperimentStatus(object): def _GetNamesAndIterations(self, benchmark_runs): strings = [] + t = time.time() for benchmark_run in benchmark_runs: - strings.append("'%s'" % benchmark_run.name) + t_last = benchmark_run.timeline.GetLastEventTime() + elapsed = str(datetime.timedelta(seconds=int(t-t_last))) + strings.append("'{0}' {1}".format(benchmark_run.name, elapsed)) return " %s (%s)" % (len(strings), ", ".join(strings)) diff --git a/crosperf/label.py b/crosperf/label.py index 3b6fb804..64ce352f 100644 --- a/crosperf/label.py +++ b/crosperf/label.py @@ -2,14 +2,24 @@ # Copyright 2011 Google Inc. All Rights Reserved. +"""The label of benchamrks.""" + +import os from utils.file_utils import FileUtils class Label(object): - def __init__(self, name, chromeos_image, chromeos_root, board): + def __init__(self, name, chromeos_image, chromeos_root, board, remote, + image_args): + # Expand ~ + chromeos_root = os.path.expanduser(chromeos_root) + chromeos_image = os.path.expanduser(chromeos_image) + self.name = name self.chromeos_image = chromeos_image self.board = board + self.remote = remote + self.image_args = image_args if not chromeos_root: chromeos_root = FileUtils().ChromeOSRootFromImage(chromeos_image) @@ -24,3 +34,17 @@ class Label(object): % (name, chromeos_root)) self.chromeos_root = chromeos_root + + +class MockLabel(object): + def __init__(self, name, chromeos_image, chromeos_root, board, remote, + image_args): + self.name = name + self.chromeos_image = chromeos_image + self.board = board + self.remote = remote + if not chromeos_root: + self.chromeos_root = "/tmp/chromeos_root" + else: + self.chromeos_root = chromeos_root + self.image_args = image_args diff --git a/crosperf/machine_manager.py b/crosperf/machine_manager.py index 8562e929..9eb9bcdf 100644 --- a/crosperf/machine_manager.py +++ b/crosperf/machine_manager.py @@ -1,16 +1,23 @@ +#!/usr/bin/python +# +# Copyright 2012 Google Inc. All Rights Reserved. + import hashlib import image_chromeos import lock_machine import math import os.path +import re import sys import threading import time -from image_checksummer import ImageChecksummer + from utils import command_executer from utils import logger from utils.file_utils import FileUtils +from image_checksummer import ImageChecksummer + CHECKSUM_FILE = "/usr/local/osimage_checksum_file" @@ -26,7 +33,9 @@ class CrosMachine(object): self._GetMemoryInfo() self._GetCPUInfo() self._ComputeMachineChecksumString() - self._ComputeMachineChecksum() + self._GetMachineID() + self.machine_checksum = self._GetMD5Checksum(self.checksum_string) + self.machine_id_checksum = self._GetMD5Checksum(self.machine_id) def _ParseMemoryInfo(self): line = self.meminfo.splitlines()[0] @@ -60,7 +69,7 @@ class CrosMachine(object): self.phys_kbytes = phys_kbytes def _GetMemoryInfo(self): - #TODO yunlian: when the machine in rebooting, it will not return + #TODO yunlian: when the machine in rebooting, it will not return #meminfo, the assert does not catch it either ce = command_executer.GetCommandExecuter() command = "cat /proc/meminfo" @@ -94,11 +103,22 @@ class CrosMachine(object): self.checksum_string += line self.checksum_string += " " + str(self.phys_kbytes) - def _ComputeMachineChecksum(self): - if self.checksum_string: - self.machine_checksum = hashlib.md5(self.checksum_string).hexdigest() + def _GetMD5Checksum(self, ss): + if ss: + return hashlib.md5(ss).hexdigest() else: - self.machine_checksum = "" + return "" + + def _GetMachineID(self): + ce = command_executer.GetCommandExecuter() + command = "ifconfig" + ret, if_out, _ = ce.CrosRunCommand( + command, return_output=True, + machine=self.name, chromeos_root=self.chromeos_root) + b = if_out.splitlines() + a = [l for l in b if "lan" in l] + self.machine_id = a[0] + assert ret == 0, "Could not get machine_id from machine: %s" % self.name def __str__(self): l = [] @@ -118,52 +138,55 @@ class MachineManager(object): self.image_lock = threading.Lock() self.num_reimages = 0 self.chromeos_root = None - if os.path.isdir(lock_machine.FileLock.LOCKS_DIR): + self.machine_checksum = {} + self.machine_checksum_string = {} + + if os.path.isdir(lock_machine.Machine.LOCKS_DIR): self.no_lock = False else: self.no_lock = True - self.initialized = False + self._initialized_machines = [] self.chromeos_root = chromeos_root - def ImageMachine(self, machine, chromeos_image, board=None): - checksum = ImageChecksummer().Checksum(chromeos_image) + def ImageMachine(self, machine, label): + checksum = ImageChecksummer().Checksum(label.chromeos_image) if machine.checksum == checksum: return - chromeos_root = FileUtils().ChromeOSRootFromImage(chromeos_image) + chromeos_root = label.chromeos_root if not chromeos_root: chromeos_root = self.chromeos_root - image_args = [image_chromeos.__file__, - "--chromeos_root=%s" % chromeos_root, - "--image=%s" % chromeos_image, - "--remote=%s" % machine.name] - if board: - image_args.append("--board=%s" % board) + image_chromeos_args = [image_chromeos.__file__, + "--chromeos_root=%s" % chromeos_root, + "--image=%s" % label.chromeos_image, + "--image_args=%s" % label.image_args, + "--remote=%s" % machine.name] + if label.board: + image_chromeos_args.append("--board=%s" % label.board) # Currently can't image two machines at once. # So have to serialized on this lock. ce = command_executer.GetCommandExecuter() with self.image_lock: - retval = ce.RunCommand(" ".join(["python"] + image_args)) - self.num_reimages += 1 + retval = ce.RunCommand(" ".join(["python"] + image_chromeos_args)) if retval: raise Exception("Could not image machine: '%s'." % machine.name) + else: + self.num_reimages += 1 machine.checksum = checksum - machine.image = chromeos_image + machine.image = label.chromeos_image return retval - def ComputeCommonCheckSum(self): - self.machine_checksum = "" - for machine in self.GetMachines(): + def ComputeCommonCheckSum(self, label): + for machine in self.GetMachines(label): if machine.machine_checksum: - self.machine_checksum = machine.machine_checksum + self.machine_checksum[label.name] = machine.machine_checksum break - def ComputeCommonCheckSumString(self): - self.machine_checksum_string = "" - for machine in self.GetMachines(): + def ComputeCommonCheckSumString(self, label): + for machine in self.GetMachines(label): if machine.checksum_string: - self.machine_checksum_string = machine.checksum_string + self.machine_checksum_string[label.name] = machine.checksum_string break def _TryToLockMachine(self, cros_machine): @@ -198,28 +221,28 @@ class MachineManager(object): machine_name) self._all_machines.append(cm) - def AreAllMachineSame(self): - checksums = [m.machine_checksum for m in self.GetMachines()] + def AreAllMachineSame(self, label): + checksums = [m.machine_checksum for m in self.GetMachines(label)] return len(set(checksums)) == 1 - def AcquireMachine(self, chromeos_image): + def AcquireMachine(self, chromeos_image, label): image_checksum = ImageChecksummer().Checksum(chromeos_image) + machines = self.GetMachines(label) with self._lock: # Lazily external lock machines - if not self.initialized: - for m in self._all_machines: + + for m in machines: + if m not in self._initialized_machines: + self._initialized_machines.append(m) self._TryToLockMachine(m) - self.initialized = True - for m in self._all_machines: m.released_time = time.time() - - if not self.AreAllMachineSame(): + if not self.AreAllMachineSame(label): logger.GetLogger().LogFatal("-- not all the machine are identical") - if not self._machines: + if not self.GetAvailableMachines(label): machine_names = [] - for machine in self._all_machines: + for machine in machines: machine_names.append(machine.name) - logger.GetLogger().LogFatal("Could not acquire any of the" + logger.GetLogger().LogFatal("Could not acquire any of the " "following machines: '%s'" % ", ".join(machine_names)) @@ -227,12 +250,14 @@ class MachineManager(object): ### if (m.locked and time.time() - m.released_time < 10 and ### m.checksum == image_checksum): ### return None - for m in [machine for machine in self._machines if not machine.locked]: + for m in [machine for machine in self.GetAvailableMachines(label) + if not machine.locked]: if m.checksum == image_checksum: m.locked = True m.autotest_run = threading.current_thread() return m - for m in [machine for machine in self._machines if not machine.locked]: + for m in [machine for machine in self.GetAvailableMachines(label) + if not machine.locked]: if not m.checksum: m.locked = True m.autotest_run = threading.current_thread() @@ -243,15 +268,23 @@ class MachineManager(object): # the number of re-images. # TODO(asharif): If we centralize the thread-scheduler, we wont need this # code and can implement minimal reimaging code more cleanly. - for m in [machine for machine in self._machines if not machine.locked]: + for m in [machine for machine in self.GetAvailableMachines(label) + if not machine.locked]: if time.time() - m.released_time > 20: m.locked = True m.autotest_run = threading.current_thread() return m return None - def GetMachines(self): - return self._all_machines + def GetAvailableMachines(self, label=None): + if not label: + return self._machines + return [m for m in self._machines if m.name in label.remote] + + def GetMachines(self, label=None): + if not label: + return self._all_machines + return [m for m in self._all_machines if m.name in label.remote] def ReleaseMachine(self, machine): with self._lock: @@ -289,7 +322,7 @@ class MachineManager(object): for m in self._machines: if m.autotest_run: autotest_name = m.autotest_run.name - autotest_status = m.autotest_run.status + autotest_status = m.autotest_run.timeline.GetLastEvent() else: autotest_name = "" autotest_status = "" @@ -305,26 +338,73 @@ class MachineManager(object): table.append(machine_string) return "Machine Status:\n%s" % "\n".join(table) + def GetAllCPUInfo(self, labels): + """Get cpuinfo for labels, merge them if their cpuinfo are the same.""" + dic = {} + for label in labels: + for machine in self._all_machines: + if machine.name in label.remote: + if machine.cpuinfo not in dic: + dic[machine.cpuinfo] = [label.name] + else: + dic[machine.cpuinfo].append(label.name) + break + output = "" + for key, v in dic.items(): + output += " ".join(v) + output += "\n-------------------\n" + output += key + output += "\n\n\n" + return output -class MockMachineManager(object): - def __init__(self): - self.machines = [] - def ImageMachine(self, machine_name, chromeos_image, board=None): - return 0 +class MockCrosMachine(CrosMachine): + def __init__(self, name, chromeos_root): + self.name = name + self.image = None + self.checksum = None + self.locked = False + self.released_time = time.time() + self.autotest_run = None + self.chromeos_root = chromeos_root + self.checksum_string = re.sub("\d", "", name) + #In test, we assume "lumpy1", "lumpy2" are the same machine. + self.machine_checksum = self._GetMD5Checksum(self.checksum_string) + + +class MockMachineManager(MachineManager): + + def __init__(self, chromeos_root): + super(MockMachineManager, self).__init__(chromeos_root) + + def _TryToLockMachine(self, cros_machine): + self._machines.append(cros_machine) + cros_machine.checksum = "" def AddMachine(self, machine_name): - self.machines.append(CrosMachine(machine_name)) + with self._lock: + for m in self._all_machines: + assert m.name != machine_name, "Tried to double-add %s" % machine_name + cm = MockCrosMachine(machine_name, self.chromeos_root) + assert cm.machine_checksum, ("Could not find checksum for machine %s" % + machine_name) + self._all_machines.append(cm) - def AcquireMachine(self, chromeos_image): - for machine in self.machines: + def AcquireMachine(self, chromeos_image, label): + for machine in self._all_machines: if not machine.locked: machine.locked = True return machine return None + def ImageMachine(self, machine_name, label): + return 0 + def ReleaseMachine(self, machine): machine.locked = False - def GetMachines(self): - return self.machines + def GetMachines(self, label): + return self._all_machines + + def GetAvailableMachines(self, label): + return self._all_machines diff --git a/crosperf/machine_manager_unittest.py b/crosperf/machine_manager_unittest.py new file mode 100755 index 00000000..98baf456 --- /dev/null +++ b/crosperf/machine_manager_unittest.py @@ -0,0 +1,65 @@ +#!/usr/bin/python + +# Copyright 2012 Google Inc. All Rights Reserved. + +"""Unittest for machine_manager.""" +import unittest + +import label +import machine_manager + + +class MyMachineManager(machine_manager.MachineManager): + + def __init__(self, chromeos_root): + super(MyMachineManager, self).__init__(chromeos_root) + + def _TryToLockMachine(self, cros_machine): + self._machines.append(cros_machine) + cros_machine.checksum = "" + + def AddMachine(self, machine_name): + with self._lock: + for m in self._all_machines: + assert m.name != machine_name, "Tried to double-add %s" % machine_name + cm = machine_manager.MockCrosMachine(machine_name, self.chromeos_root) + assert cm.machine_checksum, ("Could not find checksum for machine %s" % + machine_name) + self._all_machines.append(cm) + +CHROMEOS_ROOT = "/tmp/chromeos-root" +MACHINE_NAMES = ["lumpy1", "lumpy2", "lumpy3", "daisy1", "daisy2"] +LABEL_LUMPY = label.MockLabel("lumpy", "image", CHROMEOS_ROOT, "lumpy", + ["lumpy1", "lumpy2", "lumpy3", "lumpy4"], "") +LABEL_MIX = label.MockLabel("mix", "image", CHROMEOS_ROOT, "mix", + ["daisy1", "daisy2", "lumpy3", "lumpy4"], "") + + +class MachineManagerTest(unittest.TestCase): + + def testAreAllMachineSame(self): + manager = MyMachineManager(CHROMEOS_ROOT) + for m in MACHINE_NAMES: + manager.AddMachine(m) + self.assertEqual(manager.AreAllMachineSame(LABEL_LUMPY), True) + self.assertEqual(manager.AreAllMachineSame(LABEL_MIX), False) + + def testGetMachines(self): + manager = MyMachineManager(CHROMEOS_ROOT) + for m in MACHINE_NAMES: + manager.AddMachine(m) + names = [m.name for m in manager.GetMachines(LABEL_LUMPY)] + self.assertEqual(names, ["lumpy1", "lumpy2", "lumpy3"]) + + def testGetAvailableMachines(self): + manager = MyMachineManager(CHROMEOS_ROOT) + for m in MACHINE_NAMES: + manager.AddMachine(m) + for m in manager._all_machines: + if int(m.name[-1]) % 2: + manager._TryToLockMachine(m) + names = [m.name for m in manager.GetAvailableMachines(LABEL_LUMPY)] + self.assertEqual(names, ["lumpy1", "lumpy3"]) + +if __name__ == "__main__": + unittest.main() diff --git a/crosperf/perf_table.py b/crosperf/perf_table.py new file mode 100644 index 00000000..b3387ea8 --- /dev/null +++ b/crosperf/perf_table.py @@ -0,0 +1,58 @@ +#!/usr/bin/python +# +# Copyright 2012 Google Inc. All Rights Reserved. +"""Parse perf report data for tabulator.""" + +import os + +from utils import perf_diff + +def ParsePerfReport(perf_file): + """It should return a dict.""" + + return {"cycles": {"foo": 10, "bar": 20}, + "cache_miss": {"foo": 20, "bar": 10}} + + +class PerfTable(object): + """The class to generate dicts for tabulator.""" + + def __init__(self, experiment, label_names): + self._experiment = experiment + self._label_names = label_names + self.perf_data = {} + self.GenerateData() + # {benchmark:{perf_event1:[[{func1:number, func2:number}, + # {func1: number, func2: number}]], ...}, + # benchmark2:...} + + def GenerateData(self): + for label in self._label_names: + for benchmark in self._experiment.benchmarks: + for i in range(1, benchmark.iterations+1): + dir_name = label + benchmark.name + str(i) + dir_name = filter(str.isalnum, dir_name) + perf_file = os.path.join(self._experiment.results_directory, + dir_name, + "perf.data.report.0") + self.ReadPerfReport(perf_file, label, benchmark.name, i - 1) + + def ReadPerfReport(self, perf_file, label, benchmark_name, iteration): + """Add the data from one run to the dict.""" + if not os.path.isfile(perf_file): + return + perf_of_run = perf_diff.GetPerfDictFromReport(perf_file) + if benchmark_name not in self.perf_data: + self.perf_data[benchmark_name] = {} + for event in perf_of_run: + self.perf_data[benchmark_name][event] = [] + ben_data = self.perf_data[benchmark_name] + + label_index = self._label_names.index(label) + for event in ben_data: + while len(ben_data[event]) <= label_index: + ben_data[event].append([]) + data_for_label = ben_data[event][label_index] + while len(data_for_label) <= iteration: + data_for_label.append({}) + data_for_label[iteration] = perf_of_run[event] diff --git a/crosperf/results_cache.py b/crosperf/results_cache.py index 1c33e720..c0600962 100644 --- a/crosperf/results_cache.py +++ b/crosperf/results_cache.py @@ -11,7 +11,6 @@ import re import tempfile from utils import command_executer -from utils import logger from utils import misc from image_checksummer import ImageChecksummer @@ -28,11 +27,13 @@ class Result(object): what the key of the cache is. For runs with perf, it stores perf.data, perf.report, etc. The key generation is handled by the ResultsCache class. """ - def __init__(self, chromeos_root, logger): + + def __init__(self, chromeos_root, logger, label_name): self._chromeos_root = chromeos_root self._logger = logger self._ce = command_executer.GetCommandExecuter(self._logger) self._temp_dir = None + self.label_name = label_name def _CopyFilesTo(self, dest_dir, files_to_copy): file_index = 0 @@ -63,7 +64,7 @@ class Result(object): command = ("python %s --no-color --csv %s" % (generate_test_report, self.results_dir)) - [ret, out, err] = self._ce.RunCommand(command, return_output=True) + [_, out, _] = self._ce.RunCommand(command, return_output=True) keyvals_dict = {} for line in out.splitlines(): tokens = re.split("=|,", line) @@ -76,7 +77,7 @@ class Result(object): return keyvals_dict def _GetResultsDir(self): - mo = re.search("Results placed in (\S+)", self.out) + mo = re.search(r"Results placed in (\S+)", self.out) if mo: result = mo.group(1) return result @@ -85,7 +86,7 @@ class Result(object): def _FindFilesInResultsDir(self, find_args): command = "find %s %s" % (self.results_dir, find_args) - ret, out, err = self._ce.RunCommand(command, return_output=True) + ret, out, _ = self._ce.RunCommand(command, return_output=True) if ret: raise Exception("Could not run find command!") return out @@ -108,23 +109,22 @@ class Result(object): raise Exception("Perf report file already exists: %s" % perf_report_file) chroot_perf_report_file = misc.GetInsideChrootPath(self._chromeos_root, - perf_report_file) + perf_report_file) command = ("/usr/sbin/perf report " "-n " "--symfs /build/%s " "--vmlinux /build/%s/usr/lib/debug/boot/vmlinux " "--kallsyms /build/%s/boot/System.map-* " "-i %s --stdio " - "| head -n1000 " - "| tee %s" % + "> %s" % (self._board, self._board, self._board, chroot_perf_data_file, chroot_perf_report_file)) - ret, out, err = self._ce.ChrootRunCommand(self._chromeos_root, - command, - return_output=True) + self._ce.ChrootRunCommand(self._chromeos_root, + command) + # Add a keyval to the dictionary for the events captured. perf_report_files.append( misc.GetOutsideChrootPath(self._chromeos_root, @@ -136,7 +136,7 @@ class Result(object): for perf_report_file in self.perf_report_files: with open(perf_report_file, "r") as f: report_contents = f.read() - for group in re.findall("Events: (\S+) (\S+)", report_contents): + for group in re.findall(r"Events: (\S+) (\S+)", report_contents): num_events = group[0] event_name = group[1] key = "perf_%s_%s" % (report_id, event_name) @@ -188,14 +188,12 @@ class Result(object): self.perf_data_files = self._GetPerfDataFiles() self.perf_report_files = self._GetPerfReportFiles() self._ProcessResults() - self.CleanUp() def CleanUp(self): if self._temp_dir: command = "rm -rf %s" % self._temp_dir self._ce.RunCommand(command) - def StoreToCacheDir(self, cache_dir, machine_manager): # Create the dir if it doesn't exist. command = "mkdir -p %s" % cache_dir @@ -221,17 +219,18 @@ class Result(object): # TODO(asharif): Make machine_manager a singleton, and don't pass it into # this function. with open(os.path.join(cache_dir, MACHINE_FILE), "w") as f: - f.write(machine_manager.machine_checksum_string) + f.write(machine_manager.machine_checksum_string[self.label_name]) @classmethod - def CreateFromRun(cls, logger, chromeos_root, board, out, err, retval): - result = cls(chromeos_root, logger) + def CreateFromRun(cls, logger, chromeos_root, board, label_name, + out, err, retval): + result = cls(chromeos_root, logger, label_name) result._PopulateFromRun(board, out, err, retval) return result @classmethod - def CreateFromCacheHit(cls, chromeos_root, logger, cache_dir): - result = cls(chromeos_root, logger) + def CreateFromCacheHit(cls, chromeos_root, logger, cache_dir, label_name): + result = cls(chromeos_root, logger, label_name) try: result._PopulateFromCacheDir(cache_dir) except Exception as e: @@ -260,17 +259,21 @@ class CacheConditions(object): # Cache hit if the image path matches the cached image path. IMAGE_PATH_MATCH = 5 + # Cache hit if the uuid of hard disk mataches the cached one + + SAME_MACHINE_MATCH = 6 + class ResultsCache(object): """ This class manages the key of the cached runs without worrying about what is exactly stored (value). The value generation is handled by the Results class. """ - CACHE_VERSION = 5 + CACHE_VERSION = 6 def Init(self, chromeos_image, chromeos_root, autotest_name, iteration, autotest_args, machine_manager, board, cache_conditions, - logger_to_use): + logger_to_use, label): self.chromeos_image = chromeos_image self.chromeos_root = chromeos_root self.autotest_name = autotest_name @@ -281,6 +284,7 @@ class ResultsCache(object): self.machine_manager = machine_manager self._logger = logger_to_use self._ce = command_executer.GetCommandExecuter(self._logger) + self.label = label def _GetCacheDirForRead(self): glob_path = self._FormCacheDir(self._GetCacheKeyList(True)) @@ -288,9 +292,6 @@ class ResultsCache(object): if matching_dirs: # Cache file found. - if len(matching_dirs) > 1: - self._logger.LogError("Multiple compatible cache files: %s." % - " ".join(matching_dirs)) return matching_dirs[0] else: return None @@ -308,7 +309,7 @@ class ResultsCache(object): if read and CacheConditions.MACHINES_MATCH not in self.cache_conditions: machine_checksum = "*" else: - machine_checksum = self.machine_manager.machine_checksum + machine_checksum = self.machine_manager.machine_checksum[self.label.name] if read and CacheConditions.CHECKSUMS_MATCH not in self.cache_conditions: checksum = "*" else: @@ -319,13 +320,22 @@ class ResultsCache(object): else: image_path_checksum = hashlib.md5(self.chromeos_image).hexdigest() + if read and CacheConditions.SAME_MACHINE_MATCH not in self.cache_conditions: + machine_id_checksum = "*" + else: + for machine in self.machine_manager.GetMachines(self.label): + if machine.name == self.label.remote[0]: + machine_id_checksum = machine.machine_id_checksum + break + autotest_args_checksum = hashlib.md5( - "".join(self.autotest_args)).hexdigest() + "".join(self.autotest_args)).hexdigest() return (image_path_checksum, self.autotest_name, str(self.iteration), autotest_args_checksum, checksum, machine_checksum, + machine_id_checksum, str(self.CACHE_VERSION)) def ReadResult(self): @@ -342,7 +352,7 @@ class ResultsCache(object): self._logger.LogOutput("Trying to read from cache dir: %s" % cache_dir) result = Result.CreateFromCacheHit(self.chromeos_root, - self._logger, cache_dir) + self._logger, cache_dir, self.label.name) if not result: return None @@ -358,12 +368,20 @@ class ResultsCache(object): result.StoreToCacheDir(cache_dir, self.machine_manager) -class MockResultsCache(object): +class MockResultsCache(ResultsCache): def Init(self, *args): pass def ReadResult(self): - return Result("Results placed in /tmp/test", "", 0) + return None def StoreResult(self, result): pass + + +class MockResult(Result): + def _PopulateFromRun(self, out, err, retval): + self.out = out + self.err = err + self.retval = retval + diff --git a/crosperf/results_organizer.py b/crosperf/results_organizer.py index 0071387b..810186b2 100644 --- a/crosperf/results_organizer.py +++ b/crosperf/results_organizer.py @@ -1,6 +1,8 @@ #!/usr/bin/python # Copyright 2012 Google Inc. All Rights Reserved. +"""Parse data from benchmark_runs for tabulator.""" +import re class ResultOrganizer(object): @@ -18,18 +20,22 @@ class ResultOrganizer(object): ]}. """ - def __init__(self, benchmark_runs, labels): + def __init__(self, benchmark_runs, labels, benchmarks=None): self.result = {} self.labels = [] + self.prog = re.compile(r"(\w+)\{(\d+)\}") + self.benchmarks = benchmarks + if not self.benchmarks: + self.benchmarks = [] for label in labels: self.labels.append(label.name) for benchmark_run in benchmark_runs: - benchmark_name = benchmark_run.benchmark_name + benchmark_name = benchmark_run.benchmark.name if benchmark_name not in self.result: self.result[benchmark_name] = [] while len(self.result[benchmark_name]) < len(labels): self.result[benchmark_name].append([]) - label_index = self.labels.index(benchmark_run.label_name) + label_index = self.labels.index(benchmark_run.label.name) cur_table = self.result[benchmark_name][label_index] index = benchmark_run.iteration - 1 while index >= len(cur_table): @@ -40,3 +46,50 @@ class ResultOrganizer(object): for autotest_key in benchmark_run.result.keyvals: result_value = benchmark_run.result.keyvals[autotest_key] cur_dict[autotest_key] = result_value + self._DuplicatePass() + + def _DuplicatePass(self): + for bench, data in self.result.items(): + max_dup = self._GetMaxDup(data) + if not max_dup: + continue + for label in data: + index = data.index(label) + data[index] = self._GetNonDupLabel(max_dup, label) + self._AdjustIteration(max_dup, bench) + + def _GetMaxDup(self, data): + """Find the maximum i inside ABCD{i}.""" + max_dup = 0 + for label in data: + for run in label: + for key in run: + if re.match(self.prog, key): + max_dup = max(max_dup, + int(re.search(self.prog, key).group(2))) + return max_dup + + def _GetNonDupLabel(self, max_dup, label): + """Create new list for the runs of the same label.""" + new_label = [] + for run in label: + start_index = len(new_label) + new_label.append(dict(run)) + for i in range(max_dup): + new_label.append({}) + new_run = new_label[start_index] + for key, value in new_run.items(): + if re.match(self.prog, key): + new_key = re.search(self.prog, key).group(1) + index = int(re.search(self.prog, key).group(2)) + new_label[start_index+index][new_key] = str(value) + del new_run[key] + return new_label + + def _AdjustIteration(self, max_dup, bench): + """Adjust the interation numbers if the have keys like ABCD{i}.""" + for benchmark in self.benchmarks: + if benchmark.name == bench: + if not benchmark.iteration_adjusted: + benchmark.iteration_adjusted = True + benchmark.iterations *= (max_dup +1) diff --git a/crosperf/results_report.py b/crosperf/results_report.py index b591370a..f7434132 100644 --- a/crosperf/results_report.py +++ b/crosperf/results_report.py @@ -2,11 +2,12 @@ # Copyright 2011 Google Inc. All Rights Reserved. -import math +from utils.tabulator import * + from column_chart import ColumnChart -from results_sorter import ResultSorter from results_organizer import ResultOrganizer -from utils.tabulator import * +from perf_table import PerfTable + class ResultsReport(object): MAX_COLOR_CODE = 255 @@ -26,7 +27,7 @@ class ResultsReport(object): labels[benchmark_run.label_name].append(benchmark_run) return labels - def GetFullTables(self): + def GetFullTables(self, perf=False): columns = [Column(NonEmptyCountResult(), Format(), "Completed"), @@ -41,23 +42,30 @@ class ResultsReport(object): Column(StdResult(), Format()) ] - return self._GetTables(self.labels, self.benchmark_runs, columns) + if not perf: + return self._GetTables(self.labels, self.benchmark_runs, columns) + return self. _GetPerfTables(self.labels, columns) - def GetSummaryTables(self): - columns = [Column(AmeanResult(), + def GetSummaryTables(self, perf=False): + columns = [Column(NonEmptyCountResult(), + Format(), + "Completed"), + Column(AmeanResult(), Format()), Column(StdResult(), Format(), "StdDev"), Column(CoeffVarResult(), - CoeffVarFormat(), "Mean/StdDev"), + CoeffVarFormat(), "StdDev/Mean"), Column(GmeanRatioResult(), RatioFormat(), "GmeanSpeedup"), Column(GmeanRatioResult(), ColorBoxFormat(), " "), - Column(StatsSignificant(), - Format(), "p-value") + Column(PValueResult(), + PValueFormat(), "p-value") ] - return self._GetTables(self.labels, self.benchmark_runs, columns) + if not perf: + return self._GetTables(self.labels, self.benchmark_runs, columns) + return self. _GetPerfTables(self.labels, columns) def _ParseColumn(self, columns, iteration): new_column = [] @@ -78,9 +86,17 @@ class ResultsReport(object): return False return True + def _GetTableHeader(self, benchmark): + benchmark_info = ("Benchmark: {0}; Iterations: {1}" + .format(benchmark.name, benchmark.iterations)) + cell = Cell() + cell.string_value = benchmark_info + cell.header = True + return [[cell]] + def _GetTables(self, labels, benchmark_runs, columns): tables = [] - ro = ResultOrganizer(benchmark_runs, labels) + ro = ResultOrganizer(benchmark_runs, labels, self.benchmarks) result = ro.result label_name = ro.labels for item in result: @@ -88,11 +104,7 @@ class ResultsReport(object): for benchmark in self.benchmarks: if benchmark.name == item: break - benchmark_info = ("Benchmark: {0}; Iterations: {1}" - .format(benchmark.name, benchmark.iterations)) - cell = Cell() - cell.string_value = benchmark_info - ben_table = [[cell]] + ben_table = self._GetTableHeader(benchmark) if self._AreAllRunsEmpty(runs): cell = Cell() @@ -109,8 +121,41 @@ class ResultsReport(object): tables.append(cell_table) return tables + def _GetPerfTables(self, labels, columns): + tables = [] + label_names = [label.name for label in labels] + p_table = PerfTable(self.experiment, label_names) + + if not p_table.perf_data: + return tables + + for benchmark in p_table.perf_data: + ben = None + for ben in self.benchmarks: + if ben.name == benchmark: + break + + ben_table = self._GetTableHeader(ben) + tables.append(ben_table) + benchmark_data = p_table.perf_data[benchmark] + table = [] + for event in benchmark_data: + tg = TableGenerator(benchmark_data[event], label_names) + table = tg.GetTable() + parsed_columns = self._ParseColumn(columns, ben.iterations) + tf = TableFormatter(table, parsed_columns) + tf.GenerateCellTable() + tf.AddColumnName() + tf.AddLabelName() + tf.AddHeader(str(event)) + table = tf.GetCellTable(headers=False) + tables.append(table) + return tables + def PrintTables(self, tables, out_to): output = "" + if not tables: + return output for table in tables: if out_to == "HTML": tp = TablePrinter(table, TablePrinter.HTML) @@ -126,6 +171,8 @@ class ResultsReport(object): pass output += tp.Print() return output + + class TextResultsReport(ResultsReport): TEXT = """ =========================================== @@ -137,10 +184,30 @@ Summary ------------------------------------------- %s + +Number re-images: %s + +------------------------------------------- +Benchmark Run Status ------------------------------------------- +%s + + +------------------------------------------- +Perf Data +------------------------------------------- +%s + + + Experiment File ------------------------------------------- %s + + +CPUInfo +------------------------------------------- +%s =========================================== """ @@ -148,17 +215,46 @@ Experiment File super(TextResultsReport, self).__init__(experiment) self.email = email + def GetStatusTable(self): + """Generate the status table by the tabulator.""" + table = [["", ""]] + columns = [Column(LiteralResult(iteration=0), Format(), "Status"), + Column(LiteralResult(iteration=1), Format(), "Failing Reason")] + + for benchmark_run in self.benchmark_runs: + status = [benchmark_run.name, [benchmark_run.timeline.GetLastEvent(), + benchmark_run.failure_reason]] + table.append(status) + tf = TableFormatter(table, columns) + cell_table = tf.GetCellTable() + return [cell_table] + def GetReport(self): + """Generate the report for email and console.""" + status_table = self.GetStatusTable() summary_table = self.GetSummaryTables() full_table = self.GetFullTables() + perf_table = self.GetSummaryTables(perf=True) + if not perf_table: + perf_table = None if not self.email: return self.TEXT % (self.experiment.name, self.PrintTables(summary_table, "CONSOLE"), - self.experiment.experiment_file) + self.experiment.machine_manager.num_reimages, + self.PrintTables(status_table, "CONSOLE"), + self.PrintTables(perf_table, "CONSOLE"), + self.experiment.experiment_file, + self.experiment.machine_manager.GetAllCPUInfo( + self.experiment.labels)) return self.TEXT % (self.experiment.name, self.PrintTables(summary_table, "EMAIL"), - self.experiment.experiment_file) + self.experiment.machine_manager.num_reimages, + self.PrintTables(status_table, "EMAIL"), + self.PrintTables(perf_table, "EMAIL"), + self.experiment.experiment_file, + self.experiment.machine_manager.GetAllCPUInfo( + self.experiment.labels)) class HTMLResultsReport(ResultsReport): @@ -243,6 +339,7 @@ pre { google.setOnLoadCallback(init); function init() { switchTab('summary', 'html'); + %s switchTab('full', 'html'); drawTable(); } @@ -268,6 +365,7 @@ pre { </div> %s </div> + %s <div class='results-section'> <div class='results-section-title'>Charts</div> <div class='results-section-content'>%s</div> @@ -291,6 +389,18 @@ pre { </html> """ + PERF_HTML = """ + <div class='results-section'> + <div class='results-section-title'>Perf Table</div> + <div class='results-section-content'> + <div id='perf-html'>%s</div> + <div id='perf-text'><pre>%s</pre></div> + <div id='perf-tsv'><pre>%s</pre></div> + </div> + %s + </div> +""" + def __init__(self, experiment): super(HTMLResultsReport, self).__init__(experiment) @@ -313,11 +423,26 @@ pre { summary_table = self.GetSummaryTables() full_table = self.GetFullTables() - return self.HTML % (chart_javascript, + perf_table = self.GetSummaryTables(perf=True) + if perf_table: + perf_html = self.PERF_HTML % ( + self.PrintTables(perf_table, "HTML"), + self.PrintTables(perf_table, "PLAIN"), + self.PrintTables(perf_table, "TSV"), + self._GetTabMenuHTML("perf") + ) + perf_init = "switchTab('perf', 'html');" + else: + perf_html = "" + perf_init = "" + + return self.HTML % (perf_init, + chart_javascript, self.PrintTables(summary_table, "HTML"), self.PrintTables(summary_table, "PLAIN"), self.PrintTables(summary_table, "TSV"), self._GetTabMenuHTML("summary"), + perf_html, chart_divs, self.PrintTables(full_table, "HTML"), self.PrintTables(full_table, "PLAIN"), diff --git a/crosperf/settings_factory.py b/crosperf/settings_factory.py index 782f0dd3..11fa4b4b 100644 --- a/crosperf/settings_factory.py +++ b/crosperf/settings_factory.py @@ -1,9 +1,9 @@ #!/usr/bin/python # Copyright 2011 Google Inc. All Rights Reserved. +"""Setting files for global, benchmark and labels.""" from field import BooleanField -from field import EnumField from field import FloatField from field import IntegerField from field import ListField @@ -46,6 +46,13 @@ class LabelSettings(Settings): "chromeos_image.")) self.AddField(TextField("board", required=True, description="The target " "board for running experiments on, e.g. x86-alex.")) + self.AddField(ListField("remote", description= + "A comma-separated list of ip's of chromeos" + "devices to run experiments on.")) + self.AddField(TextField("image_args", required=False, + default="", + description="Extra arguments to pass to " + "image_chromeos.py.")) class GlobalSettings(Settings): @@ -56,20 +63,24 @@ class GlobalSettings(Settings): "identifier.")) self.AddField(TextField("board", description="The target " "board for running experiments on, e.g. x86-alex.")) - self.AddField(ListField("remote", required=True, + self.AddField(ListField("remote", description="A comma-separated list of ip's of " "chromeos devices to run experiments on.")) self.AddField(BooleanField("rerun_if_failed", description="Whether to " "re-run failed autotest runs or not.", default=False)) self.AddField(ListField("email", description="Space-seperated" - "list of email addresses to send email to.")) + "list of email addresses to send email to.")) self.AddField(BooleanField("rerun", description="Whether to ignore the " "cache and for autotests to be re-run.", default=False)) - self.AddField(BooleanField("exact_remote", default=True, + self.AddField(BooleanField("same_specs", default=True, description="Ensure cached runs are run on the " - "same device that is specified as a remote.")) + "same kind of devices which are specified as a " + "remote.")) + self.AddField(BooleanField("same_machine", default=False, + description="Ensure cached runs are run on the " + "exact the same remote")) self.AddField(IntegerField("iterations", default=1, description="Number of iterations to run all " "autotests.")) diff --git a/crosperf/test_flag.py b/crosperf/test_flag.py new file mode 100644 index 00000000..613138b2 --- /dev/null +++ b/crosperf/test_flag.py @@ -0,0 +1,16 @@ +#!/usr/bin/python +# +# Copyright 2011 Google Inc. All Rights Reserved. + +"""A global variable for testing.""" + + +_is_test = [False] + + +def SetTestMode(flag): + _is_test[0] = flag + + +def GetTestMode(): + return _is_test[0] diff --git a/image_chromeos.py b/image_chromeos.py index 380a94f7..30b29a45 100755 --- a/image_chromeos.py +++ b/image_chromeos.py @@ -1,4 +1,4 @@ -#!/usr/bin/python2.6 +#!/usr/bin/python # # Copyright 2011 Google Inc. All Rights Reserved. @@ -9,7 +9,6 @@ This script images a remote ChromeOS device with a specific image." __author__ = "asharif@google.com (Ahmad Sharif)" -import fcntl import filecmp import glob import optparse @@ -18,13 +17,15 @@ import re import shutil import sys import tempfile +import time + from utils import command_executer from utils import logger from utils import misc from utils.file_utils import FileUtils checksum_file = "/usr/local/osimage_checksum_file" -lock_file = "/tmp/lock_image_chromeos" +lock_file = "/tmp/image_chromeos_lock/image_chromeos_lock" def Usage(parser, message): print "ERROR: " + message @@ -32,17 +33,9 @@ def Usage(parser, message): sys.exit(0) -def Main(argv): +def DoImage(argv): """Build ChromeOS.""" - #Get lock for the host - f = open(lock_file, "w+a") - try: - fcntl.lockf(f, fcntl.LOCK_EX|fcntl.LOCK_NB) - except IOError: - f.close() - print ("You can not run two instances of image_chromes at the same time." - "\nTry again. Exiting ....") - exit(0) + # Common initializations cmd_executer = command_executer.GetCommandExecuter() l = logger.GetLogger() @@ -61,8 +54,8 @@ def Main(argv): default=False, help="Force an image even if it is non-test.") parser.add_option("-a", - "--image_to_live_args", - dest="image_to_live_args") + "--image_args", + dest="image_args") options = parser.parse_args(argv[1:])[0] @@ -132,21 +125,40 @@ def Main(argv): chromeos_root=options.chromeos_root, machine=options.remote) - command = (options.chromeos_root + - "/src/scripts/image_to_live.sh --remote=" + - options.remote + - " --image=" + located_image) - if options.image_to_live_args: - command += " %s" % options.image_to_live_args - - retval = cmd_executer.RunCommand(command) - + real_src_dir = os.path.join(os.path.realpath(options.chromeos_root), + "src") + if located_image.find(real_src_dir) != 0: + raise Exception("Located image: %s not in chromeos_root: %s" % + (located_image, options.chromeos_root)) + chroot_image = os.path.join( + "..", + located_image[len(real_src_dir):].lstrip("/")) + cros_image_to_target_args = ["--remote=%s" % options.remote, + "--board=%s" % board, + "--from=%s" % os.path.dirname(chroot_image), + "--image-name=%s" % + os.path.basename(located_image)] + + command = ("./bin/cros_image_to_target.py %s" % + " ".join(cros_image_to_target_args)) + if options.image_args: + command += " %s" % options.image_args + + # Workaround for crosbug.com/35684. + os.chmod(misc.GetChromeOSKeyFile(options.chromeos_root), 0600) + retval = cmd_executer.ChrootRunCommand(options.chromeos_root, + command) if found == False: temp_dir = os.path.dirname(located_image) l.LogOutput("Deleting temp image dir: %s" % temp_dir) shutil.rmtree(temp_dir) logger.GetLogger().LogFatalIf(retval, "Image command failed") + + # Unfortunately cros_image_to_target.py sometimes returns early when the + # machine isn't fully up yet. + retval = EnsureMachineUp(options.chromeos_root, options.remote) + command = "echo %s > %s && chmod -w %s" % (image_checksum, checksum_file, checksum_file) retval = cmd_executer.CrosRunCommand(command, @@ -161,9 +173,6 @@ def Main(argv): "Image verification failed!") else: l.LogOutput("Checksums match. Skipping reimage") - - fcntl.lockf(f, fcntl.LOCK_UN) - f.close() return retval @@ -260,6 +269,34 @@ def VerifyChromeChecksum(chromeos_root, image, remote): return False +def EnsureMachineUp(chromeos_root, remote): + l = logger.GetLogger() + cmd_executer = command_executer.GetCommandExecuter() + timeout = 600 + magic = "abcdefghijklmnopqrstuvwxyz" + command = "echo %s" % magic + start_time = time.time() + while True: + current_time = time.time() + if current_time - start_time > timeout: + l.LogError("Timeout of %ss reached. Machine still not up. Aborting." % + timeout) + return False + retval = cmd_executer.CrosRunCommand(command, + chromeos_root=chromeos_root, + machine=remote) + if not retval: + return True + + +def Main(argv): + misc.AcquireLock(lock_file) + try: + return DoImage(argv) + finally: + misc.ReleaseLock(lock_file) + + if __name__ == "__main__": retval = Main(sys.argv) sys.exit(retval) diff --git a/lock_machine.py b/lock_machine.py index c5f98092..0f948c3d 100755 --- a/lock_machine.py +++ b/lock_machine.py @@ -1,10 +1,8 @@ -#!/usr/bin/python2.6 +#!/usr/bin/python # # Copyright 2010 Google Inc. All Rights Reserved. -"""Script to lock/unlock machines. - -""" +"""Script to lock/unlock machines.""" __author__ = "asharif@google.com (Ahmad Sharif)" @@ -12,14 +10,31 @@ import datetime import fcntl import getpass import glob +import json import optparse import os -import pickle import socket import sys import time + from utils import logger +LOCK_SUFFIX = "_check_lock_liveness" + + +def FileCheckName(name): + return name + LOCK_SUFFIX + + +def OpenLiveCheck(file_name): + with FileCreationMask(0000): + fd = open(file_name, "a+w") + try: + fcntl.lockf(fd, fcntl.LOCK_EX | fcntl.LOCK_NB) + except IOError: + raise + return fd + class FileCreationMask(object): def __init__(self, mask): @@ -33,12 +48,23 @@ class FileCreationMask(object): class LockDescription(object): - def __init__(self): - self.owner = "" - self.exclusive = False - self.counter = 0 - self.time = 0 - self.reason = "" + """The description of the lock.""" + + def __init__(self, desc=None): + try: + self.owner = desc["owner"] + self.exclusive = desc["exclusive"] + self.counter = desc["counter"] + self.time = desc["time"] + self.reason = desc["reason"] + self.auto = desc["auto"] + except (KeyError, TypeError): + self.owner = "" + self.exclusive = False + self.counter = 0 + self.time = 0 + self.reason = "" + self.auto = False def IsLocked(self): return self.counter or self.exclusive @@ -48,23 +74,26 @@ class LockDescription(object): "Exclusive: %s" % self.exclusive, "Counter: %s" % self.counter, "Time: %s" % self.time, - "Reason: %s" % self.reason]) + "Reason: %s" % self.reason, + "Auto: %s" % self.auto]) class FileLock(object): - LOCKS_DIR = "/home/mobiletc-prebuild/locks" + """File lock operation class.""" + FILE_OPS = [] def __init__(self, lock_filename): - assert os.path.isdir(self.LOCKS_DIR), ( - "Locks dir: %s doesn't exist!" % self.LOCKS_DIR) - self._filepath = os.path.join(self.LOCKS_DIR, lock_filename) + self._filepath = lock_filename + lock_dir = os.path.dirname(lock_filename) + assert os.path.isdir(lock_dir), ( + "Locks dir: %s doesn't exist!" % lock_dir) self._file = None @classmethod def AsString(cls, file_locks): - stringify_fmt = "%-30s %-15s %-4s %-4s %-15s %-40s" + stringify_fmt = "%-30s %-15s %-4s %-4s %-15s %-40s %-4s" header = stringify_fmt % ("machine", "owner", "excl", "ctr", - "elapsed", "reason") + "elapsed", "reason", "auto") lock_strings = [] for file_lock in file_locks: @@ -77,15 +106,20 @@ class FileLock(object): file_lock._description.exclusive, file_lock._description.counter, elapsed_time, - file_lock._description.reason)) + file_lock._description.reason, + file_lock._description.auto)) table = "\n".join(lock_strings) return "\n".join([header, table]) @classmethod - def ListLock(cls, pattern): - full_pattern = os.path.join(cls.LOCKS_DIR, pattern) + def ListLock(cls, pattern, locks_dir): + if not locks_dir: + locks_dir = Machine.LOCKS_DIR + full_pattern = os.path.join(locks_dir, pattern) file_locks = [] for lock_filename in glob.glob(full_pattern): + if LOCK_SUFFIX in lock_filename: + continue file_lock = FileLock(lock_filename) with file_lock as lock: if lock.IsLocked(): @@ -102,9 +136,26 @@ class FileLock(object): raise IOError("flock(%s, LOCK_EX) failed!" % self._filepath) try: - self._description = pickle.load(self._file) - except (EOFError, pickle.PickleError): - self._description = LockDescription() + desc = json.load(self._file) + except (EOFError, ValueError): + desc = None + self._description = LockDescription(desc) + + if self._description.exclusive and self._description.auto: + locked_byself = False + for fd in self.FILE_OPS: + if fd.name == FileCheckName(self._filepath): + locked_byself = True + break + if not locked_byself: + try: + fp = OpenLiveCheck(FileCheckName(self._filepath)) + except IOError: + pass + else: + self._description = LockDescription() + fcntl.lockf(fp, fcntl.LOCK_UN) + fp.close() return self._description # Check this differently? except IOError as ex: @@ -113,7 +164,7 @@ class FileLock(object): def __exit__(self, type, value, traceback): self._file.truncate(0) - self._file.write(pickle.dumps(self._description)) + self._file.write(json.dumps(self._description.__dict__, skipkeys=True)) self._file.close() def __str__(self): @@ -121,12 +172,14 @@ class FileLock(object): class Lock(object): - def __init__(self, to_lock): - self._to_lock = to_lock + def __init__(self, lock_file, auto=True): + self._to_lock = os.path.basename(lock_file) + self._lock_file = lock_file self._logger = logger.GetLogger() + self._auto = auto def NonBlockingLock(self, exclusive, reason=""): - with FileLock(self._to_lock) as lock: + with FileLock(self._lock_file) as lock: if lock.exclusive: self._logger.LogError( "Exclusive lock already acquired by %s. Reason: %s" % @@ -137,17 +190,22 @@ class Lock(object): if lock.counter: self._logger.LogError("Shared lock already acquired") return False + lock_file_check = FileCheckName(self._lock_file) + fd = OpenLiveCheck(lock_file_check) + FileLock.FILE_OPS.append(fd) + lock.exclusive = True lock.reason = reason lock.owner = getpass.getuser() lock.time = time.time() + lock.auto = self._auto else: lock.counter += 1 self._logger.LogOutput("Successfully locked: %s" % self._to_lock) return True def Unlock(self, exclusive, force=False): - with FileLock(self._to_lock) as lock: + with FileLock(self._lock_file) as lock: if not lock.IsLocked(): self._logger.LogError("Can't unlock unlocked machine!") return False @@ -161,28 +219,61 @@ class Lock(object): self._logger.LogError("%s can't unlock lock owned by: %s" % (getpass.getuser(), lock.owner)) return False + if lock.auto != self._auto: + self._logger.LogError("Can't unlock lock with different -a" + " parameter.") + return False lock.exclusive = False lock.reason = "" lock.owner = "" + + if self._auto: + del_list = [i for i in FileLock.FILE_OPS + if i.name == FileCheckName(self._lock_file)] + for i in del_list: + FileLock.FILE_OPS.remove(i) + for f in del_list: + fcntl.lockf(f, fcntl.LOCK_UN) + f.close() + del del_list + os.remove(FileCheckName(self._lock_file)) + else: lock.counter -= 1 return True class Machine(object): - def __init__(self, name): + LOCKS_DIR = "/home/mobiletc-prebuild/locks" + + def __init__(self, name, locks_dir=LOCKS_DIR, auto=True): self._name = name + self._auto = auto try: self._full_name = socket.gethostbyaddr(name)[0] except socket.error: self._full_name = self._name + self._full_name = os.path.join(locks_dir, self._full_name) def Lock(self, exclusive=False, reason=""): - lock = Lock(self._full_name) + lock = Lock(self._full_name, self._auto) return lock.NonBlockingLock(exclusive, reason) + def TryLock(self, timeout=300, exclusive=False, reason=""): + locked = False + sleep = timeout / 10 + while True: + locked = self.Lock(exclusive, reason) + if locked or not timeout >= 0: + break + print "Lock not acquired for {0}, wait {1} seconds ...".format( + self._name, sleep) + time.sleep(sleep) + timeout -= sleep + return locked + def Unlock(self, exclusive=False, ignore_ownership=False): - lock = Lock(self._full_name) + lock = Lock(self._full_name, self._auto) return lock.Unlock(exclusive, ignore_ownership) @@ -218,9 +309,16 @@ def Main(argv): action="store_true", default=False, help="Use this for a shared (non-exclusive) lock.") + parser.add_option("-d", + "--dir", + dest="locks_dir", + action="store", + default=Machine.LOCKS_DIR, + help="Use this to set different locks_dir") options, args = parser.parse_args(argv) + options.locks_dir = os.path.abspath(options.locks_dir) exclusive = not options.shared if not options.list_locks and len(args) != 2: @@ -229,12 +327,12 @@ def Main(argv): return 1 if len(args) > 1: - machine = Machine(args[1]) + machine = Machine(args[1], options.locks_dir, auto=False) else: machine = None if options.list_locks: - FileLock.ListLock("*") + FileLock.ListLock("*", options.locks_dir) retval = True elif options.unlock: retval = machine.Unlock(exclusive, options.ignore_ownership) diff --git a/utils/command_executer.py b/utils/command_executer.py index 54c9c355..3edb6262 100644 --- a/utils/command_executer.py +++ b/utils/command_executer.py @@ -80,7 +80,8 @@ class CommandExecuter: while len(pipes): fds = select.select(pipes, [], [], 0.1) if command_terminator and command_terminator.IsTerminated(): - self.RunCommand("sudo kill -9 " + str(p.pid)) + self.RunCommand("sudo kill -9 " + str(p.pid), + print_to_console=print_to_console) wait = p.wait() self.logger.LogError("Command was terminated!", print_to_console) if return_output: @@ -119,7 +120,8 @@ class CommandExecuter: % command_timeout) self.logger.LogWarning(m, print_to_console) self.RunCommand("kill %d || sudo kill %d || sudo kill -9 %d" % - (p.pid, p.pid, p.pid)) + (p.pid, p.pid, p.pid), + print_to_console=print_to_console) break if out == err == "": @@ -172,12 +174,17 @@ class CommandExecuter: # Write all commands to a file. command_file = self.WriteToTempShFile(cmd) - self.CopyFiles(command_file, command_file, - dest_machine=machine, - command_terminator=command_terminator, - chromeos_root=chromeos_root, - dest_cros=True, - recursive=False) + retval = self.CopyFiles(command_file, command_file, + dest_machine=machine, + command_terminator=command_terminator, + chromeos_root=chromeos_root, + dest_cros=True, + recursive=False, + print_to_console=print_to_console) + if retval: + self.logger.LogError("Could not run remote command on machine." + " Is the machine up?") + return retval command = self.RemoteAccessInitCommand(chromeos_root, machine) command += "\nremote_sh bash %s" % command_file @@ -185,7 +192,8 @@ class CommandExecuter: retval = self.RunCommand(command, return_output, command_terminator=command_terminator, command_timeout=command_timeout, - terminated_timeout=terminated_timeout) + terminated_timeout=terminated_timeout, + print_to_console=print_to_console) if return_output: connect_signature = ("Initiating first contact with remote host\n" + "Connection OK\n") @@ -200,7 +208,8 @@ class CommandExecuter: def ChrootRunCommand(self, chromeos_root, command, return_output=False, command_terminator=None, command_timeout=None, terminated_timeout=10, - print_to_console=True): + print_to_console=True, + cros_sdk_options=""): self.logger.LogCmd(command, print_to_console) handle, command_file = tempfile.mkstemp(dir=os.path.join(chromeos_root, @@ -213,12 +222,13 @@ class CommandExecuter: os.chmod(command_file, 0777) - command = "cd %s; cros_sdk -- ./%s" % (chromeos_root, - os.path.basename(command_file)) + command = "cd %s; cros_sdk %s -- ./%s" % (chromeos_root, cros_sdk_options, + os.path.basename(command_file)) ret = self.RunCommand(command, return_output, command_terminator=command_terminator, command_timeout=command_timeout, - terminated_timeout=terminated_timeout) + terminated_timeout=terminated_timeout, + print_to_console=print_to_console) os.remove(command_file) return ret @@ -232,7 +242,8 @@ class CommandExecuter: def CopyFiles(self, src, dest, src_machine=None, dest_machine=None, src_user=None, dest_user=None, recursive=True, command_terminator=None, - chromeos_root=None, src_cros=False, dest_cros=False): + chromeos_root=None, src_cros=False, dest_cros=False, + print_to_console=True): src = os.path.expanduser(src) dest = os.path.expanduser(dest) @@ -262,13 +273,15 @@ class CommandExecuter: return self.RunCommand(command, machine=src_machine, username=src_user, - command_terminator=command_terminator) + command_terminator=command_terminator, + print_to_console=print_to_console) else: command += rsync_prefix + "root@%s:%s %s" % (src_machine, src, dest) return self.RunCommand(command, machine=dest_machine, username=dest_user, - command_terminator=command_terminator) + command_terminator=command_terminator, + print_to_console=print_to_console) if dest_machine == src_machine: @@ -285,7 +298,8 @@ class CommandExecuter: return self.RunCommand(command, machine=dest_machine, username=dest_user, - command_terminator=command_terminator) + command_terminator=command_terminator, + print_to_console=print_to_console) class MockCommandExecuter(CommandExecuter): diff --git a/utils/logger.py b/utils/logger.py index a8f0a6f0..faad9666 100644 --- a/utils/logger.py +++ b/utils/logger.py @@ -7,8 +7,11 @@ import os.path import sys import traceback -# Local modules -import misc +#TODO(yunlian@google.com): Use GetRoot from misc +def GetRoot(scr_name): + """Break up pathname into (dir+name).""" + abs_path = os.path.abspath(scr_name) + return (os.path.dirname(abs_path), os.path.basename(abs_path)) class Logger(object): @@ -168,7 +171,7 @@ def InitLogger(script_name, print_console=True): """Initialize a global logger. To be called only once.""" global main_logger assert not main_logger, "The logger has already been initialized" - rootdir, basefilename = misc.GetRoot(script_name) + rootdir, basefilename = GetRoot(script_name) main_logger = Logger(rootdir, basefilename, print_console) diff --git a/utils/misc.py b/utils/misc.py index cb9acbfa..40622512 100644 --- a/utils/misc.py +++ b/utils/misc.py @@ -1,4 +1,4 @@ -#!/usr/bin/python2.6 +#!/usr/bin/python # # Copyright 2010 Google Inc. All Rights Reserved. @@ -9,12 +9,18 @@ __author__ = "asharif@google.com (Ahmad Sharif)" from contextlib import contextmanager import os import re +import shutil +import sys +import time + +import lock_machine import command_executer import logger def GetChromeOSVersionFromLSBVersion(lsb_version): + """Get Chromeos version from Lsb version.""" ce = command_executer.GetCommandExecuter() command = "git ls-remote http://git.chromium.org/chromiumos/manifest.git" ret, out, _ = ce.RunCommand(command, return_output=True, @@ -22,7 +28,7 @@ def GetChromeOSVersionFromLSBVersion(lsb_version): assert ret == 0, "Command %s failed" % command lower = [] for line in out.splitlines(): - mo = re.search("refs/heads/release-R(\d+)-(\d+)\.B", line) + mo = re.search(r"refs/heads/release-R(\d+)-(\d+)\.B", line) if mo: revision = int(mo.group(1)) build = int(mo.group(2)) @@ -42,12 +48,13 @@ def ApplySubs(string, *substitutions): return string -def UnitToNumber(string, base=1000): +def UnitToNumber(unit_num, base=1000): + """Convert a number with unit to float.""" unit_dict = {"kilo": base, "mega": base**2, "giga": base**3} - string = string.lower() - mo = re.search("(\d*)(.+)?", string) + unit_num = unit_num.lower() + mo = re.search(r"(\d*)(.+)?", unit_num) number = mo.group(1) unit = mo.group(2) if not unit: @@ -57,15 +64,15 @@ def UnitToNumber(string, base=1000): return float(number) * v raise Exception("Unit: %s not found in byte: %s!" % (unit, - string)) + unit_num)) def GetFilenameFromString(string): return ApplySubs(string, - ("/", "__"), - ("\s", "_"), - ("[\^\$=\"\\\?]", ""), - ) + (r"/", "__"), + (r"\s", "_"), + (r"[\^\$=\"\\\?]", ""), + ) def GetRoot(scr_name): @@ -74,6 +81,15 @@ def GetRoot(scr_name): return (os.path.dirname(abs_path), os.path.basename(abs_path)) +def GetChromeOSKeyFile(chromeos_root): + return os.path.join(chromeos_root, + "src", + "scripts", + "mod_for_test_scripts", + "ssh_keys", + "testing_rsa") + + def GetChrootPath(chromeos_root): return os.path.join(chromeos_root, "chroot") @@ -101,7 +117,7 @@ def FormatCommands(commands): return ApplySubs(str(commands), ("&&", "&&\n"), (";", ";\n"), - ("\n+\s*", "\n")) + (r"\n+\s*", "\n")) def GetImageDir(chromeos_root, board): @@ -145,6 +161,7 @@ def GetBuildImageCommand(board): def GetSetupBoardCommand(board, gcc_version=None, binutils_version=None, usepkg=None, force=None): + """Get setup_board command.""" options = [] if gcc_version: @@ -171,6 +188,7 @@ def CanonicalizePath(path): def GetCtargetFromBoard(board, chromeos_root): + """Get Ctarget from board.""" base_board = board.split("_")[0] command = ("source " "../platform/dev/toolchain_utils.sh; get_ctarget_from_board %s" % @@ -187,7 +205,7 @@ def GetCtargetFromBoard(board, chromeos_root): def StripANSIEscapeSequences(string): - string = re.sub("\x1b\[[0-9]*[a-zA-Z]", "", string) + string = re.sub(r"\x1b\[[0-9]*[a-zA-Z]", "", string) return string @@ -200,6 +218,7 @@ def GetEnvStringFromDict(env_dict): def MergeEnvStringWithDict(env_string, env_dict, prepend=True): + """Merge env string with dict.""" if not env_string.strip(): return GetEnvStringFromDict(env_dict) override_env_list = [] @@ -226,8 +245,67 @@ def GetAllImages(chromeos_root, board): return out.splitlines() +def AcquireLock(lock_file, timeout=1200): + """Acquire a lock with timeout.""" + start_time = time.time() + locked = False + abs_path = os.path.abspath(lock_file) + dir_path = os.path.dirname(abs_path) + sleep_time = min(10, timeout/10.0) + if not os.path.exists(dir_path): + try: + os.makedirs(dir_path) + except OSError: + print "Cannot create dir {0}, exiting...".format(dir_path) + exit(0) + while True: + locked = (lock_machine.Lock(lock_file).NonBlockingLock(True, sys.argv[0])) + if locked: + break + time.sleep(sleep_time) + if time.time() - start_time > timeout: + logger.GetLogger().LogWarning( + "Could not acquire lock on this file: {0} within {1} seconds." + "Manually remove the file if you think the lock is stale" + .format(abs_path, timeout)) + break + return locked + + +def ReleaseLock(lock_file): + lock_file = os.path.abspath(lock_file) + ret = lock_machine.Lock(lock_file).Unlock(True) + assert ret, ("Could not unlock {0}," + "Please remove it manually".format(lock_file)) + + +def IsFloat(text): + if text is None: + return False + try: + float(text) + return True + except ValueError: + return False + +def RemoveChromeBrowserObjectFiles(chromeos_root, board): + """ Remove any object files from all the posible locations """ + out_dir = os.path.join( + GetChrootPath(chromeos_root), + "var/cache/chromeos-chrome/chrome-src/src/out_%s" % board) + if os.path.exists(out_dir): + shutil.rmtree(out_dir) + logger.GetLogger().LogCmd("rm -rf %s" % out_dir) + out_dir = os.path.join( + GetChrootPath(chromeos_root), + "var/cache/chromeos-chrome/chrome-src-internal/src/out_%s" % board) + if os.path.exists(out_dir): + shutil.rmtree(out_dir) + logger.GetLogger().LogCmd("rm -rf %s" % out_dir) + @contextmanager def WorkingDirectory(new_dir): + """Get the working directory.""" old_dir = os.getcwd() if old_dir != new_dir: msg = "cd %s" % new_dir diff --git a/utils/perf_diff.py b/utils/perf_diff.py new file mode 100755 index 00000000..cfe90a9b --- /dev/null +++ b/utils/perf_diff.py @@ -0,0 +1,318 @@ +#!/usr/bin/python +# Copyright 2012 Google Inc. All Rights Reserved. + +"""One-line documentation for perf_diff module. + +A detailed description of perf_diff. +""" + +__author__ = "asharif@google.com (Ahmad Sharif)" + +import optparse +import re +import sys + +import misc +import tabulator + + +def GetPerfDictFromReport(report_file, num_functions=5): + output = {} + perf_report = PerfReport(report_file) + for k, v in perf_report.sections.items(): + if k not in output: + output[k] = {} + for function in v.functions[:num_functions]: + out_key = "%s" % (function.name) + output[k][out_key] = function.count + return output + + +def _SortDictionaryByValue(d): + l = [(k, v) for (k, v) in d.iteritems()] + + def GetFloat(x): + if misc.IsFloat(x): + return float(x) + else: + return x + + sorted_l = sorted(l, + key=lambda x: GetFloat(x[1])) + sorted_l.reverse() + return [f[0] for f in sorted_l] + + +class Tabulator(object): + def __init__(self, all_dicts): + self._all_dicts = all_dicts + + def PrintTable(self): + for dicts in self._all_dicts: + self.PrintTableHelper(dicts) + + def PrintTableHelper(self, dicts): + """Transfrom dicts to tables.""" + fields = {} + for d in dicts: + for f in d.keys(): + if f not in fields: + fields[f] = d[f] + else: + fields[f] = max(fields[f], d[f]) + table = [] + header = ["name"] + for i in range(len(dicts)): + header.append(i) + + table.append(header) + + sorted_fields = _SortDictionaryByValue(fields) + + for f in sorted_fields: + row = [f] + for d in dicts: + if f in d: + row.append(d[f]) + else: + row.append("0") + table.append(row) + + print tabulator.GetSimpleTable(table) + + +class Function(object): + def __init__(self): + self.count = 0 + self.name = "" + + +class Section(object): + def __init__(self, contents): + self.raw_contents = contents + self._ParseSection() + + def _ParseSection(self): + matches = re.findall(r"Events: (\w+)\s+(.*)", self.raw_contents) + assert len(matches) <= 1, "More than one event found in 1 section" + if not matches: + return + match = matches[0] + self.name = match[1] + self.count = misc.UnitToNumber(match[0]) + + self.functions = [] + for line in self.raw_contents.splitlines(): + if not line.strip(): + continue + if "%" not in line: + continue + if not line.startswith("#"): + fields = [f for f in line.split(" ") if f] + function = Function() + function.count = int(fields[1]) + function.name = " ".join(fields[2:]) + self.functions.append(function) + + +class PerfReport(object): + """Get report from raw report.""" + + def __init__(self, perf_file): + self.perf_file = perf_file + self._ReadFile() + self.sections = {} + self.metadata = {} + self._section_contents = [] + self._section_header = "" + self._SplitSections() + self._ParseSections() + self._ParseSectionHeader() + + def _ParseSectionHeader(self): + """Parse a header of a perf report file.""" + # The "captured on" field is inaccurate - this actually refers to when the + # report was generated, not when the data was captured. + for line in self._section_header.splitlines(): + line = line[2:] + if ":" in line: + key, val = line.strip().split(":", 1) + key = key.strip() + val = val.strip() + self.metadata[key] = val + + def _ReadFile(self): + self._perf_contents = open(self.perf_file).read() + + def _ParseSections(self): + self.event_counts = {} + self.sections = {} + for section_content in self._section_contents: + section = Section(section_content) + section.name = self._GetHumanReadableName(section.name) + self.sections[section.name] = section + + # TODO(asharif): Do this better. + def _GetHumanReadableName(self, section_name): + if not "raw" in section_name: + return section_name + raw_number = section_name.strip().split(" ")[-1] + for line in self._section_header.splitlines(): + if raw_number in line: + name = line.strip().split(" ")[5] + return name + + def _SplitSections(self): + self._section_contents = [] + indices = [m.start() for m in re.finditer("Events:", self._perf_contents)] + indices.append(len(self._perf_contents)) + for i in range(len(indices) - 1): + section_content = self._perf_contents[indices[i]:indices[i+1]] + self._section_contents.append(section_content) + self._section_header = "" + if indices: + self._section_header = self._perf_contents[0:indices[0]] + + +class PerfDiffer(object): + """Perf differ class.""" + + def __init__(self, reports, num_symbols, common_only): + self._reports = reports + self._num_symbols = num_symbols + self._common_only = common_only + self._common_function_names = {} + + def DoDiff(self): + """The function that does the diff.""" + section_names = self._FindAllSections() + + filename_dicts = [] + summary_dicts = [] + for report in self._reports: + d = {} + filename_dicts.append({"file": report.perf_file}) + for section_name in section_names: + if section_name in report.sections: + d[section_name] = report.sections[section_name].count + summary_dicts.append(d) + + all_dicts = [filename_dicts, summary_dicts] + + for section_name in section_names: + function_names = self._GetTopFunctions(section_name, + self._num_symbols) + self._FindCommonFunctions(section_name) + dicts = [] + for report in self._reports: + d = {} + if section_name in report.sections: + section = report.sections[section_name] + + # Get a common scaling factor for this report. + common_scaling_factor = self._GetCommonScalingFactor(section) + + for function in section.functions: + if function.name in function_names: + key = "%s %s" % (section.name, function.name) + d[key] = function.count + # Compute a factor to scale the function count by in common_only + # mode. + if self._common_only and ( + function.name in self._common_function_names[section.name]): + d[key + " scaled"] = common_scaling_factor * function.count + dicts.append(d) + + all_dicts.append(dicts) + + mytabulator = Tabulator(all_dicts) + mytabulator.PrintTable() + + def _FindAllSections(self): + sections = {} + for report in self._reports: + for section in report.sections.values(): + if section.name not in sections: + sections[section.name] = section.count + else: + sections[section.name] = max(sections[section.name], + section.count) + return _SortDictionaryByValue(sections) + + def _GetCommonScalingFactor(self, section): + unique_count = self._GetCount( + section, + lambda x: x in self._common_function_names[section.name]) + return 100.0/unique_count + + def _GetCount(self, section, filter_fun=None): + total_count = 0 + for function in section.functions: + if not filter_fun or filter_fun(function.name): + total_count += int(function.count) + return total_count + + def _FindCommonFunctions(self, section_name): + function_names_list = [] + for report in self._reports: + if section_name in report.sections: + section = report.sections[section_name] + function_names = [f.name for f in section.functions] + function_names_list.append(function_names) + + self._common_function_names[section_name] = ( + reduce(set.intersection, map(set, function_names_list))) + + def _GetTopFunctions(self, section_name, num_functions): + all_functions = {} + for report in self._reports: + if section_name in report.sections: + section = report.sections[section_name] + for f in section.functions[:num_functions]: + if f.name in all_functions: + all_functions[f.name] = max(all_functions[f.name], f.count) + else: + all_functions[f.name] = f.count + # FIXME(asharif): Don't really need to sort these... + return _SortDictionaryByValue(all_functions) + + def _GetFunctionsDict(self, section, function_names): + d = {} + for function in section.functions: + if function.name in function_names: + d[function.name] = function.count + return d + + +def Main(argv): + """The entry of the main.""" + parser = optparse.OptionParser() + parser.add_option("-n", + "--num_symbols", + dest="num_symbols", + default="5", + help="The number of symbols to show.") + parser.add_option("-c", + "--common_only", + dest="common_only", + action="store_true", + default=False, + help="Diff common symbols only.") + + options, args = parser.parse_args(argv) + + try: + reports = [] + for report in args[1:]: + report = PerfReport(report) + reports.append(report) + pd = PerfDiffer(reports, int(options.num_symbols), options.common_only) + pd.DoDiff() + finally: + pass + + return 0 + + +if __name__ == "__main__": + sys.exit(Main(sys.argv)) diff --git a/utils/pstat.py b/utils/pstat.py new file mode 100644 index 00000000..dae681e6 --- /dev/null +++ b/utils/pstat.py @@ -0,0 +1,1068 @@ +# Copyright (c) 1999-2007 Gary Strangman; All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# Comments and/or additions are welcome (send e-mail to: +# strang@nmr.mgh.harvard.edu). +# +""" +pstat.py module + +################################################# +####### Written by: Gary Strangman ########### +####### Last modified: Dec 18, 2007 ########### +################################################# + +This module provides some useful list and array manipulation routines +modeled after those found in the |Stat package by Gary Perlman, plus a +number of other useful list/file manipulation functions. The list-based +functions include: + + abut (source,*args) + simpleabut (source, addon) + colex (listoflists,cnums) + collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None) + dm (listoflists,criterion) + flat (l) + linexand (listoflists,columnlist,valuelist) + linexor (listoflists,columnlist,valuelist) + linedelimited (inlist,delimiter) + lineincols (inlist,colsize) + lineincustcols (inlist,colsizes) + list2string (inlist) + makelol(inlist) + makestr(x) + printcc (lst,extra=2) + printincols (listoflists,colsize) + pl (listoflists) + printl(listoflists) + replace (lst,oldval,newval) + recode (inlist,listmap,cols='all') + remap (listoflists,criterion) + roundlist (inlist,num_digits_to_round_floats_to) + sortby(listoflists,sortcols) + unique (inlist) + duplicates(inlist) + writedelimited (listoflists, delimiter, file, writetype='w') + +Some of these functions have alternate versions which are defined only if +Numeric (NumPy) can be imported. These functions are generally named as +above, with an 'a' prefix. + + aabut (source, *args) + acolex (a,indices,axis=1) + acollapse (a,keepcols,collapsecols,sterr=0,ns=0) + adm (a,criterion) + alinexand (a,columnlist,valuelist) + alinexor (a,columnlist,valuelist) + areplace (a,oldval,newval) + arecode (a,listmap,col='all') + arowcompare (row1, row2) + arowsame (row1, row2) + asortrows(a,axis=0) + aunique(inarray) + aduplicates(inarray) + +Currently, the code is all but completely un-optimized. In many cases, the +array versions of functions amount simply to aliases to built-in array +functions/methods. Their inclusion here is for function name consistency. +""" + +## CHANGE LOG: +## ========== +## 07-11-26 ... edited to work with numpy +## 01-11-15 ... changed list2string() to accept a delimiter +## 01-06-29 ... converted exec()'s to eval()'s to make compatible with Py2.1 +## 01-05-31 ... added duplicates() and aduplicates() functions +## 00-12-28 ... license made GPL, docstring and import requirements +## 99-11-01 ... changed version to 0.3 +## 99-08-30 ... removed get, getstrings, put, aget, aput (into io.py) +## 03/27/99 ... added areplace function, made replace fcn recursive +## 12/31/98 ... added writefc function for ouput to fixed column sizes +## 12/07/98 ... fixed import problem (failed on collapse() fcn) +## added __version__ variable (now 0.2) +## 12/05/98 ... updated doc-strings +## added features to collapse() function +## added flat() function for lists +## fixed a broken asortrows() +## 11/16/98 ... fixed minor bug in aput for 1D arrays +## +## 11/08/98 ... fixed aput to output large arrays correctly + +import stats # required 3rd party module +import string, copy +from types import * + +__version__ = 0.4 + +###=========================== LIST FUNCTIONS ========================== +### +### Here are the list functions, DEFINED FOR ALL SYSTEMS. +### Array functions (for NumPy-enabled computers) appear below. +### + +def abut (source,*args): + """ +Like the |Stat abut command. It concatenates two lists side-by-side +and returns the result. '2D' lists are also accomodated for either argument +(source or addon). CAUTION: If one list is shorter, it will be repeated +until it is as long as the longest list. If this behavior is not desired, +use pstat.simpleabut(). + +Usage: abut(source, args) where args=any # of lists +Returns: a list of lists as long as the LONGEST list past, source on the + 'left', lists in <args> attached consecutively on the 'right' +""" + + if type(source) not in [ListType,TupleType]: + source = [source] + for addon in args: + if type(addon) not in [ListType,TupleType]: + addon = [addon] + if len(addon) < len(source): # is source list longer? + if len(source) % len(addon) == 0: # are they integer multiples? + repeats = len(source)/len(addon) # repeat addon n times + origadd = copy.deepcopy(addon) + for i in range(repeats-1): + addon = addon + origadd + else: + repeats = len(source)/len(addon)+1 # repeat addon x times, + origadd = copy.deepcopy(addon) # x is NOT an integer + for i in range(repeats-1): + addon = addon + origadd + addon = addon[0:len(source)] + elif len(source) < len(addon): # is addon list longer? + if len(addon) % len(source) == 0: # are they integer multiples? + repeats = len(addon)/len(source) # repeat source n times + origsour = copy.deepcopy(source) + for i in range(repeats-1): + source = source + origsour + else: + repeats = len(addon)/len(source)+1 # repeat source x times, + origsour = copy.deepcopy(source) # x is NOT an integer + for i in range(repeats-1): + source = source + origsour + source = source[0:len(addon)] + + source = simpleabut(source,addon) + return source + + +def simpleabut (source, addon): + """ +Concatenates two lists as columns and returns the result. '2D' lists +are also accomodated for either argument (source or addon). This DOES NOT +repeat either list to make the 2 lists of equal length. Beware of list pairs +with different lengths ... the resulting list will be the length of the +FIRST list passed. + +Usage: simpleabut(source,addon) where source, addon=list (or list-of-lists) +Returns: a list of lists as long as source, with source on the 'left' and + addon on the 'right' +""" + if type(source) not in [ListType,TupleType]: + source = [source] + if type(addon) not in [ListType,TupleType]: + addon = [addon] + minlen = min(len(source),len(addon)) + list = copy.deepcopy(source) # start abut process + if type(source[0]) not in [ListType,TupleType]: + if type(addon[0]) not in [ListType,TupleType]: + for i in range(minlen): + list[i] = [source[i]] + [addon[i]] # source/addon = column + else: + for i in range(minlen): + list[i] = [source[i]] + addon[i] # addon=list-of-lists + else: + if type(addon[0]) not in [ListType,TupleType]: + for i in range(minlen): + list[i] = source[i] + [addon[i]] # source=list-of-lists + else: + for i in range(minlen): + list[i] = source[i] + addon[i] # source/addon = list-of-lists + source = list + return source + + +def colex (listoflists,cnums): + """ +Extracts from listoflists the columns specified in the list 'cnums' +(cnums can be an integer, a sequence of integers, or a string-expression that +corresponds to a slice operation on the variable x ... e.g., 'x[3:]' will colex +columns 3 onward from the listoflists). + +Usage: colex (listoflists,cnums) +Returns: a list-of-lists corresponding to the columns from listoflists + specified by cnums, in the order the column numbers appear in cnums +""" + global index + column = 0 + if type(cnums) in [ListType,TupleType]: # if multiple columns to get + index = cnums[0] + column = map(lambda x: x[index], listoflists) + for col in cnums[1:]: + index = col + column = abut(column,map(lambda x: x[index], listoflists)) + elif type(cnums) == StringType: # if an 'x[3:]' type expr. + evalstring = 'map(lambda x: x'+cnums+', listoflists)' + column = eval(evalstring) + else: # else it's just 1 col to get + index = cnums + column = map(lambda x: x[index], listoflists) + return column + + +def collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None): + """ +Averages data in collapsecol, keeping all unique items in keepcols +(using unique, which keeps unique LISTS of column numbers), retaining the +unique sets of values in keepcols, the mean for each. Setting fcn1 +and/or fcn2 to point to a function rather than None (e.g., stats.sterr, len) +will append those results (e.g., the sterr, N) after each calculated mean. +cfcn is the collapse function to apply (defaults to mean, defined here in the +pstat module to avoid circular imports with stats.py, but harmonicmean or +others could be passed). + +Usage: collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None) +Returns: a list of lists with all unique permutations of entries appearing in + columns ("conditions") specified by keepcols, abutted with the result of + cfcn (if cfcn=None, defaults to the mean) of each column specified by + collapsecols. +""" + def collmean (inlist): + s = 0 + for item in inlist: + s = s + item + return s/float(len(inlist)) + + if type(keepcols) not in [ListType,TupleType]: + keepcols = [keepcols] + if type(collapsecols) not in [ListType,TupleType]: + collapsecols = [collapsecols] + if cfcn == None: + cfcn = collmean + if keepcols == []: + means = [0]*len(collapsecols) + for i in range(len(collapsecols)): + avgcol = colex(listoflists,collapsecols[i]) + means[i] = cfcn(avgcol) + if fcn1: + try: + test = fcn1(avgcol) + except: + test = 'N/A' + means[i] = [means[i], test] + if fcn2: + try: + test = fcn2(avgcol) + except: + test = 'N/A' + try: + means[i] = means[i] + [len(avgcol)] + except TypeError: + means[i] = [means[i],len(avgcol)] + return means + else: + values = colex(listoflists,keepcols) + uniques = unique(values) + uniques.sort() + newlist = [] + if type(keepcols) not in [ListType,TupleType]: keepcols = [keepcols] + for item in uniques: + if type(item) not in [ListType,TupleType]: item =[item] + tmprows = linexand(listoflists,keepcols,item) + for col in collapsecols: + avgcol = colex(tmprows,col) + item.append(cfcn(avgcol)) + if fcn1 <> None: + try: + test = fcn1(avgcol) + except: + test = 'N/A' + item.append(test) + if fcn2 <> None: + try: + test = fcn2(avgcol) + except: + test = 'N/A' + item.append(test) + newlist.append(item) + return newlist + + +def dm (listoflists,criterion): + """ +Returns rows from the passed list of lists that meet the criteria in +the passed criterion expression (a string as a function of x; e.g., 'x[3]>=9' +will return all rows where the 4th column>=9 and "x[2]=='N'" will return rows +with column 2 equal to the string 'N'). + +Usage: dm (listoflists, criterion) +Returns: rows from listoflists that meet the specified criterion. +""" + function = 'filter(lambda x: '+criterion+',listoflists)' + lines = eval(function) + return lines + + +def flat(l): + """ +Returns the flattened version of a '2D' list. List-correlate to the a.ravel()() +method of NumPy arrays. + +Usage: flat(l) +""" + newl = [] + for i in range(len(l)): + for j in range(len(l[i])): + newl.append(l[i][j]) + return newl + + +def linexand (listoflists,columnlist,valuelist): + """ +Returns the rows of a list of lists where col (from columnlist) = val +(from valuelist) for EVERY pair of values (columnlist[i],valuelists[i]). +len(columnlist) must equal len(valuelist). + +Usage: linexand (listoflists,columnlist,valuelist) +Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ALL i +""" + if type(columnlist) not in [ListType,TupleType]: + columnlist = [columnlist] + if type(valuelist) not in [ListType,TupleType]: + valuelist = [valuelist] + criterion = '' + for i in range(len(columnlist)): + if type(valuelist[i])==StringType: + critval = '\'' + valuelist[i] + '\'' + else: + critval = str(valuelist[i]) + criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' and' + criterion = criterion[0:-3] # remove the "and" after the last crit + function = 'filter(lambda x: '+criterion+',listoflists)' + lines = eval(function) + return lines + + +def linexor (listoflists,columnlist,valuelist): + """ +Returns the rows of a list of lists where col (from columnlist) = val +(from valuelist) for ANY pair of values (colunmlist[i],valuelist[i[). +One value is required for each column in columnlist. If only one value +exists for columnlist but multiple values appear in valuelist, the +valuelist values are all assumed to pertain to the same column. + +Usage: linexor (listoflists,columnlist,valuelist) +Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ANY i +""" + if type(columnlist) not in [ListType,TupleType]: + columnlist = [columnlist] + if type(valuelist) not in [ListType,TupleType]: + valuelist = [valuelist] + criterion = '' + if len(columnlist) == 1 and len(valuelist) > 1: + columnlist = columnlist*len(valuelist) + for i in range(len(columnlist)): # build an exec string + if type(valuelist[i])==StringType: + critval = '\'' + valuelist[i] + '\'' + else: + critval = str(valuelist[i]) + criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' or' + criterion = criterion[0:-2] # remove the "or" after the last crit + function = 'filter(lambda x: '+criterion+',listoflists)' + lines = eval(function) + return lines + + +def linedelimited (inlist,delimiter): + """ +Returns a string composed of elements in inlist, with each element +separated by 'delimiter.' Used by function writedelimited. Use '\t' +for tab-delimiting. + +Usage: linedelimited (inlist,delimiter) +""" + outstr = '' + for item in inlist: + if type(item) <> StringType: + item = str(item) + outstr = outstr + item + delimiter + outstr = outstr[0:-1] + return outstr + + +def lineincols (inlist,colsize): + """ +Returns a string composed of elements in inlist, with each element +right-aligned in columns of (fixed) colsize. + +Usage: lineincols (inlist,colsize) where colsize is an integer +""" + outstr = '' + for item in inlist: + if type(item) <> StringType: + item = str(item) + size = len(item) + if size <= colsize: + for i in range(colsize-size): + outstr = outstr + ' ' + outstr = outstr + item + else: + outstr = outstr + item[0:colsize+1] + return outstr + + +def lineincustcols (inlist,colsizes): + """ +Returns a string composed of elements in inlist, with each element +right-aligned in a column of width specified by a sequence colsizes. The +length of colsizes must be greater than or equal to the number of columns +in inlist. + +Usage: lineincustcols (inlist,colsizes) +Returns: formatted string created from inlist +""" + outstr = '' + for i in range(len(inlist)): + if type(inlist[i]) <> StringType: + item = str(inlist[i]) + else: + item = inlist[i] + size = len(item) + if size <= colsizes[i]: + for j in range(colsizes[i]-size): + outstr = outstr + ' ' + outstr = outstr + item + else: + outstr = outstr + item[0:colsizes[i]+1] + return outstr + + +def list2string (inlist,delimit=' '): + """ +Converts a 1D list to a single long string for file output, using +the string.join function. + +Usage: list2string (inlist,delimit=' ') +Returns: the string created from inlist +""" + stringlist = map(makestr,inlist) + return string.join(stringlist,delimit) + + +def makelol(inlist): + """ +Converts a 1D list to a 2D list (i.e., a list-of-lists). Useful when you +want to use put() to write a 1D list one item per line in the file. + +Usage: makelol(inlist) +Returns: if l = [1,2,'hi'] then returns [[1],[2],['hi']] etc. +""" + x = [] + for item in inlist: + x.append([item]) + return x + + +def makestr (x): + if type(x) <> StringType: + x = str(x) + return x + + +def printcc (lst,extra=2): + """ +Prints a list of lists in columns, customized by the max size of items +within the columns (max size of items in col, plus 'extra' number of spaces). +Use 'dashes' or '\\n' in the list-of-lists to print dashes or blank lines, +respectively. + +Usage: printcc (lst,extra=2) +Returns: None +""" + if type(lst[0]) not in [ListType,TupleType]: + lst = [lst] + rowstokill = [] + list2print = copy.deepcopy(lst) + for i in range(len(lst)): + if lst[i] == ['\n'] or lst[i]=='\n' or lst[i]=='dashes' or lst[i]=='' or lst[i]==['']: + rowstokill = rowstokill + [i] + rowstokill.reverse() # delete blank rows from the end + for row in rowstokill: + del list2print[row] + maxsize = [0]*len(list2print[0]) + for col in range(len(list2print[0])): + items = colex(list2print,col) + items = map(makestr,items) + maxsize[col] = max(map(len,items)) + extra + for row in lst: + if row == ['\n'] or row == '\n' or row == '' or row == ['']: + print + elif row == ['dashes'] or row == 'dashes': + dashes = [0]*len(maxsize) + for j in range(len(maxsize)): + dashes[j] = '-'*(maxsize[j]-2) + print lineincustcols(dashes,maxsize) + else: + print lineincustcols(row,maxsize) + return None + + +def printincols (listoflists,colsize): + """ +Prints a list of lists in columns of (fixed) colsize width, where +colsize is an integer. + +Usage: printincols (listoflists,colsize) +Returns: None +""" + for row in listoflists: + print lineincols(row,colsize) + return None + + +def pl (listoflists): + """ +Prints a list of lists, 1 list (row) at a time. + +Usage: pl(listoflists) +Returns: None +""" + for row in listoflists: + if row[-1] == '\n': + print row, + else: + print row + return None + + +def printl(listoflists): + """Alias for pl.""" + pl(listoflists) + return + + +def replace (inlst,oldval,newval): + """ +Replaces all occurrences of 'oldval' with 'newval', recursively. + +Usage: replace (inlst,oldval,newval) +""" + lst = inlst*1 + for i in range(len(lst)): + if type(lst[i]) not in [ListType,TupleType]: + if lst[i]==oldval: lst[i]=newval + else: + lst[i] = replace(lst[i],oldval,newval) + return lst + + +def recode (inlist,listmap,cols=None): + """ +Changes the values in a list to a new set of values (useful when +you need to recode data from (e.g.) strings to numbers. cols defaults +to None (meaning all columns are recoded). + +Usage: recode (inlist,listmap,cols=None) cols=recode cols, listmap=2D list +Returns: inlist with the appropriate values replaced with new ones +""" + lst = copy.deepcopy(inlist) + if cols != None: + if type(cols) not in [ListType,TupleType]: + cols = [cols] + for col in cols: + for row in range(len(lst)): + try: + idx = colex(listmap,0).index(lst[row][col]) + lst[row][col] = listmap[idx][1] + except ValueError: + pass + else: + for row in range(len(lst)): + for col in range(len(lst)): + try: + idx = colex(listmap,0).index(lst[row][col]) + lst[row][col] = listmap[idx][1] + except ValueError: + pass + return lst + + +def remap (listoflists,criterion): + """ +Remaps values in a given column of a 2D list (listoflists). This requires +a criterion as a function of 'x' so that the result of the following is +returned ... map(lambda x: 'criterion',listoflists). + +Usage: remap(listoflists,criterion) criterion=string +Returns: remapped version of listoflists +""" + function = 'map(lambda x: '+criterion+',listoflists)' + lines = eval(function) + return lines + + +def roundlist (inlist,digits): + """ +Goes through each element in a 1D or 2D inlist, and applies the following +function to all elements of FloatType ... round(element,digits). + +Usage: roundlist(inlist,digits) +Returns: list with rounded floats +""" + if type(inlist[0]) in [IntType, FloatType]: + inlist = [inlist] + l = inlist*1 + for i in range(len(l)): + for j in range(len(l[i])): + if type(l[i][j])==FloatType: + l[i][j] = round(l[i][j],digits) + return l + + +def sortby(listoflists,sortcols): + """ +Sorts a list of lists on the column(s) specified in the sequence +sortcols. + +Usage: sortby(listoflists,sortcols) +Returns: sorted list, unchanged column ordering +""" + newlist = abut(colex(listoflists,sortcols),listoflists) + newlist.sort() + try: + numcols = len(sortcols) + except TypeError: + numcols = 1 + crit = '[' + str(numcols) + ':]' + newlist = colex(newlist,crit) + return newlist + + +def unique (inlist): + """ +Returns all unique items in the passed list. If the a list-of-lists +is passed, unique LISTS are found (i.e., items in the first dimension are +compared). + +Usage: unique (inlist) +Returns: the unique elements (or rows) in inlist +""" + uniques = [] + for item in inlist: + if item not in uniques: + uniques.append(item) + return uniques + +def duplicates(inlist): + """ +Returns duplicate items in the FIRST dimension of the passed list. + +Usage: duplicates (inlist) +""" + dups = [] + for i in range(len(inlist)): + if inlist[i] in inlist[i+1:]: + dups.append(inlist[i]) + return dups + + +def nonrepeats(inlist): + """ +Returns items that are NOT duplicated in the first dim of the passed list. + +Usage: nonrepeats (inlist) +""" + nonrepeats = [] + for i in range(len(inlist)): + if inlist.count(inlist[i]) == 1: + nonrepeats.append(inlist[i]) + return nonrepeats + + +#=================== PSTAT ARRAY FUNCTIONS ===================== +#=================== PSTAT ARRAY FUNCTIONS ===================== +#=================== PSTAT ARRAY FUNCTIONS ===================== +#=================== PSTAT ARRAY FUNCTIONS ===================== +#=================== PSTAT ARRAY FUNCTIONS ===================== +#=================== PSTAT ARRAY FUNCTIONS ===================== +#=================== PSTAT ARRAY FUNCTIONS ===================== +#=================== PSTAT ARRAY FUNCTIONS ===================== +#=================== PSTAT ARRAY FUNCTIONS ===================== +#=================== PSTAT ARRAY FUNCTIONS ===================== +#=================== PSTAT ARRAY FUNCTIONS ===================== +#=================== PSTAT ARRAY FUNCTIONS ===================== +#=================== PSTAT ARRAY FUNCTIONS ===================== +#=================== PSTAT ARRAY FUNCTIONS ===================== +#=================== PSTAT ARRAY FUNCTIONS ===================== +#=================== PSTAT ARRAY FUNCTIONS ===================== + +try: # DEFINE THESE *ONLY* IF numpy IS AVAILABLE + import numpy as N + + def aabut (source, *args): + """ +Like the |Stat abut command. It concatenates two arrays column-wise +and returns the result. CAUTION: If one array is shorter, it will be +repeated until it is as long as the other. + +Usage: aabut (source, args) where args=any # of arrays +Returns: an array as long as the LONGEST array past, source appearing on the + 'left', arrays in <args> attached on the 'right'. +""" + if len(source.shape)==1: + width = 1 + source = N.resize(source,[source.shape[0],width]) + else: + width = source.shape[1] + for addon in args: + if len(addon.shape)==1: + width = 1 + addon = N.resize(addon,[source.shape[0],width]) + else: + width = source.shape[1] + if len(addon) < len(source): + addon = N.resize(addon,[source.shape[0],addon.shape[1]]) + elif len(source) < len(addon): + source = N.resize(source,[addon.shape[0],source.shape[1]]) + source = N.concatenate((source,addon),1) + return source + + + def acolex (a,indices,axis=1): + """ +Extracts specified indices (a list) from passed array, along passed +axis (column extraction is default). BEWARE: A 1D array is presumed to be a +column-array (and that the whole array will be returned as a column). + +Usage: acolex (a,indices,axis=1) +Returns: the columns of a specified by indices +""" + if type(indices) not in [ListType,TupleType,N.ndarray]: + indices = [indices] + if len(N.shape(a)) == 1: + cols = N.resize(a,[a.shape[0],1]) + else: +# print a[:3] + cols = N.take(a,indices,axis) +# print cols[:3] + return cols + + + def acollapse (a,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None): + """ +Averages data in collapsecol, keeping all unique items in keepcols +(using unique, which keeps unique LISTS of column numbers), retaining +the unique sets of values in keepcols, the mean for each. If stderror or +N of the mean are desired, set either or both parameters to 1. + +Usage: acollapse (a,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None) +Returns: unique 'conditions' specified by the contents of columns specified + by keepcols, abutted with the mean(s) of column(s) specified by + collapsecols +""" + def acollmean (inarray): + return N.sum(N.ravel(inarray)) + + if type(keepcols) not in [ListType,TupleType,N.ndarray]: + keepcols = [keepcols] + if type(collapsecols) not in [ListType,TupleType,N.ndarray]: + collapsecols = [collapsecols] + + if cfcn == None: + cfcn = acollmean + if keepcols == []: + avgcol = acolex(a,collapsecols) + means = N.sum(avgcol)/float(len(avgcol)) + if fcn1<>None: + try: + test = fcn1(avgcol) + except: + test = N.array(['N/A']*len(means)) + means = aabut(means,test) + if fcn2<>None: + try: + test = fcn2(avgcol) + except: + test = N.array(['N/A']*len(means)) + means = aabut(means,test) + return means + else: + if type(keepcols) not in [ListType,TupleType,N.ndarray]: + keepcols = [keepcols] + values = colex(a,keepcols) # so that "item" can be appended (below) + uniques = unique(values) # get a LIST, so .sort keeps rows intact + uniques.sort() + newlist = [] + for item in uniques: + if type(item) not in [ListType,TupleType,N.ndarray]: + item =[item] + tmprows = alinexand(a,keepcols,item) + for col in collapsecols: + avgcol = acolex(tmprows,col) + item.append(acollmean(avgcol)) + if fcn1<>None: + try: + test = fcn1(avgcol) + except: + test = 'N/A' + item.append(test) + if fcn2<>None: + try: + test = fcn2(avgcol) + except: + test = 'N/A' + item.append(test) + newlist.append(item) + try: + new_a = N.array(newlist) + except TypeError: + new_a = N.array(newlist,'O') + return new_a + + + def adm (a,criterion): + """ +Returns rows from the passed list of lists that meet the criteria in +the passed criterion expression (a string as a function of x). + +Usage: adm (a,criterion) where criterion is like 'x[2]==37' +""" + function = 'filter(lambda x: '+criterion+',a)' + lines = eval(function) + try: + lines = N.array(lines) + except: + lines = N.array(lines,dtype='O') + return lines + + + def isstring(x): + if type(x)==StringType: + return 1 + else: + return 0 + + + def alinexand (a,columnlist,valuelist): + """ +Returns the rows of an array where col (from columnlist) = val +(from valuelist). One value is required for each column in columnlist. + +Usage: alinexand (a,columnlist,valuelist) +Returns: the rows of a where columnlist[i]=valuelist[i] for ALL i +""" + if type(columnlist) not in [ListType,TupleType,N.ndarray]: + columnlist = [columnlist] + if type(valuelist) not in [ListType,TupleType,N.ndarray]: + valuelist = [valuelist] + criterion = '' + for i in range(len(columnlist)): + if type(valuelist[i])==StringType: + critval = '\'' + valuelist[i] + '\'' + else: + critval = str(valuelist[i]) + criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' and' + criterion = criterion[0:-3] # remove the "and" after the last crit + return adm(a,criterion) + + + def alinexor (a,columnlist,valuelist): + """ +Returns the rows of an array where col (from columnlist) = val (from +valuelist). One value is required for each column in columnlist. +The exception is if either columnlist or valuelist has only 1 value, +in which case that item will be expanded to match the length of the +other list. + +Usage: alinexor (a,columnlist,valuelist) +Returns: the rows of a where columnlist[i]=valuelist[i] for ANY i +""" + if type(columnlist) not in [ListType,TupleType,N.ndarray]: + columnlist = [columnlist] + if type(valuelist) not in [ListType,TupleType,N.ndarray]: + valuelist = [valuelist] + criterion = '' + if len(columnlist) == 1 and len(valuelist) > 1: + columnlist = columnlist*len(valuelist) + elif len(valuelist) == 1 and len(columnlist) > 1: + valuelist = valuelist*len(columnlist) + for i in range(len(columnlist)): + if type(valuelist[i])==StringType: + critval = '\'' + valuelist[i] + '\'' + else: + critval = str(valuelist[i]) + criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' or' + criterion = criterion[0:-2] # remove the "or" after the last crit + return adm(a,criterion) + + + def areplace (a,oldval,newval): + """ +Replaces all occurrences of oldval with newval in array a. + +Usage: areplace(a,oldval,newval) +""" + return N.where(a==oldval,newval,a) + + + def arecode (a,listmap,col='all'): + """ +Remaps the values in an array to a new set of values (useful when +you need to recode data from (e.g.) strings to numbers as most stats +packages require. Can work on SINGLE columns, or 'all' columns at once. +@@@BROKEN 2007-11-26 + +Usage: arecode (a,listmap,col='all') +Returns: a version of array a where listmap[i][0] = (instead) listmap[i][1] +""" + ashape = a.shape + if col == 'all': + work = a.ravel() + else: + work = acolex(a,col) + work = work.ravel() + for pair in listmap: + if type(pair[1]) == StringType or work.dtype.char=='O' or a.dtype.char=='O': + work = N.array(work,dtype='O') + a = N.array(a,dtype='O') + for i in range(len(work)): + if work[i]==pair[0]: + work[i] = pair[1] + if col == 'all': + return N.reshape(work,ashape) + else: + return N.concatenate([a[:,0:col],work[:,N.newaxis],a[:,col+1:]],1) + else: # must be a non-Object type array and replacement + work = N.where(work==pair[0],pair[1],work) + return N.concatenate([a[:,0:col],work[:,N.newaxis],a[:,col+1:]],1) + + + def arowcompare(row1, row2): + """ +Compares two rows from an array, regardless of whether it is an +array of numbers or of python objects (which requires the cmp function). +@@@PURPOSE? 2007-11-26 + +Usage: arowcompare(row1,row2) +Returns: an array of equal length containing 1s where the two rows had + identical elements and 0 otherwise +""" + return + if row1.dtype.char=='O' or row2.dtype=='O': + cmpvect = N.logical_not(abs(N.array(map(cmp,row1,row2)))) # cmp fcn gives -1,0,1 + else: + cmpvect = N.equal(row1,row2) + return cmpvect + + + def arowsame(row1, row2): + """ +Compares two rows from an array, regardless of whether it is an +array of numbers or of python objects (which requires the cmp function). + +Usage: arowsame(row1,row2) +Returns: 1 if the two rows are identical, 0 otherwise. +""" + cmpval = N.alltrue(arowcompare(row1,row2)) + return cmpval + + + def asortrows(a,axis=0): + """ +Sorts an array "by rows". This differs from the Numeric.sort() function, +which sorts elements WITHIN the given axis. Instead, this function keeps +the elements along the given axis intact, but shifts them 'up or down' +relative to one another. + +Usage: asortrows(a,axis=0) +Returns: sorted version of a +""" + return N.sort(a,axis=axis,kind='mergesort') + + + def aunique(inarray): + """ +Returns unique items in the FIRST dimension of the passed array. Only +works on arrays NOT including string items. + +Usage: aunique (inarray) +""" + uniques = N.array([inarray[0]]) + if len(uniques.shape) == 1: # IF IT'S A 1D ARRAY + for item in inarray[1:]: + if N.add.reduce(N.equal(uniques,item).ravel()) == 0: + try: + uniques = N.concatenate([uniques,N.array[N.newaxis,:]]) + except TypeError: + uniques = N.concatenate([uniques,N.array([item])]) + else: # IT MUST BE A 2+D ARRAY + if inarray.dtype.char != 'O': # not an Object array + for item in inarray[1:]: + if not N.sum(N.alltrue(N.equal(uniques,item),1)): + try: + uniques = N.concatenate( [uniques,item[N.newaxis,:]] ) + except TypeError: # the item to add isn't a list + uniques = N.concatenate([uniques,N.array([item])]) + else: + pass # this item is already in the uniques array + else: # must be an Object array, alltrue/equal functions don't work + for item in inarray[1:]: + newflag = 1 + for unq in uniques: # NOTE: cmp --> 0=same, -1=<, 1=> + test = N.sum(abs(N.array(map(cmp,item,unq)))) + if test == 0: # if item identical to any 1 row in uniques + newflag = 0 # then not a novel item to add + break + if newflag == 1: + try: + uniques = N.concatenate( [uniques,item[N.newaxis,:]] ) + except TypeError: # the item to add isn't a list + uniques = N.concatenate([uniques,N.array([item])]) + return uniques + + + def aduplicates(inarray): + """ +Returns duplicate items in the FIRST dimension of the passed array. Only +works on arrays NOT including string items. + +Usage: aunique (inarray) +""" + inarray = N.array(inarray) + if len(inarray.shape) == 1: # IF IT'S A 1D ARRAY + dups = [] + inarray = inarray.tolist() + for i in range(len(inarray)): + if inarray[i] in inarray[i+1:]: + dups.append(inarray[i]) + dups = aunique(dups) + else: # IT MUST BE A 2+D ARRAY + dups = [] + aslist = inarray.tolist() + for i in range(len(aslist)): + if aslist[i] in aslist[i+1:]: + dups.append(aslist[i]) + dups = unique(dups) + dups = N.array(dups) + return dups + +except ImportError: # IF NUMERIC ISN'T AVAILABLE, SKIP ALL arrayfuncs + pass diff --git a/utils/tabulator.py b/utils/tabulator.py index 3c6b1839..ed83a04b 100644 --- a/utils/tabulator.py +++ b/utils/tabulator.py @@ -5,20 +5,14 @@ import getpass import math import numpy + import colortrans from email_sender import EmailSender - - -def _IsFloat(v): - try: - float(v) - return True - except ValueError: - return False +import misc def _AllFloat(values): - return all([_IsFloat(v) for v in values]) + return all([misc.IsFloat(v) for v in values]) def _GetFloats(values): @@ -45,10 +39,11 @@ class TableGenerator(object): MISSING_VALUE = "x" - def __init__(self, d, l, sort=SORT_BY_KEYS): + def __init__(self, d, l, sort=SORT_BY_KEYS, key_name="keys"): self._runs = d self._labels = l self._sort = sort + self._key_name = key_name def _AggregateKeys(self): keys = set([]) @@ -116,7 +111,7 @@ class TableGenerator(object): module. """ keys = self._GetKeys() - header = ["keys"] + self._labels + header = [self._key_name] + self._labels table = [header] for k in keys: row = [k] @@ -164,7 +159,13 @@ class Result(object): def _GetGmean(self, values): if not values: return float("nan") - return (reduce(lambda x, y: x*y, values))**(1.0/len(values)) + if any([v < 0 for v in values]): + return float ("nan") + if any([v == 0 for v in values]): + return 0.0 + log_list = [math.log(v) for v in values] + gmean_log = sum(log_list)/len(log_list) + return math.exp(gmean_log) def Compute(self, cell, values, baseline_values): """Compute the result given a list of values and baseline values. @@ -221,6 +222,19 @@ class LiteralResult(Result): class NonEmptyCountResult(Result): def Compute(self, cell, values, baseline_values): cell.value = len(_StripNone(values)) + if not baseline_values: + return + base_value = len(_StripNone(baseline_values)) + if cell.value == base_value: + return + f = ColorBoxFormat() + len_values = len(values) + len_baseline_values = len(baseline_values) + tmp_cell = Cell() + tmp_cell.value= 1.0 + (float(cell.value - base_value) / + (max(len_values, len_baseline_values))) + f.Compute(tmp_cell) + cell.bgcolor = tmp_cell.bgcolor class StringMeanResult(Result): @@ -273,7 +287,7 @@ class StdResult(NumericalResult): class CoeffVarResult(NumericalResult): def _ComputeFloat(self, cell, values, baseline_values): - noise = numpy.std(values)/numpy.mean(values) + noise = numpy.abs(numpy.std(values)/numpy.mean(values)) cell.value = noise @@ -297,7 +311,7 @@ class ComparisonResult(Result): cell.value = "?" -class StatsSignificant(ComparisonResult): +class PValueResult(ComparisonResult): def _ComputeFloat(self, cell, values, baseline_values): if len(values) < 2 or len(baseline_values) < 2: cell.value = float("nan") @@ -308,6 +322,7 @@ class StatsSignificant(ComparisonResult): def _ComputeString(self, cell, values, baseline_values): return float("nan") + class KeyAwareComparisonResult(ComparisonResult): def _IsLowerBetter(self, key): lower_is_better_keys = ["milliseconds", "ms", "seconds", "KB", @@ -442,6 +457,19 @@ class Format(object): return ret +class PValueFormat(Format): + def _ComputeFloat(self, cell): + cell.string_value = "%0.2f" % float(cell.value) + if float(cell.value) < 0.05: + cell.bgcolor = self._GetColor(cell.value, + Color(255, 255, 0, 0), + Color(255, 255, 255, 0), + Color(255, 255, 255, 0), + mid_value=0.05, + power=1) + cell.bgcolor_row = True + + class StorageFormat(Format): """Format the cell as a storage number. @@ -549,6 +577,7 @@ class Cell(object): colspan: Set the colspan of the cell in the HTML table, this is used for table headers. Default value is 1. name: the test name of the cell. + header: Whether this is a header in html. """ def __init__(self): @@ -564,6 +593,7 @@ class Cell(object): self.width = None self.colspan = 1 self.name = None + self.header = False def __str__(self): l = [] @@ -614,7 +644,7 @@ class TableFormatter(object): self._table_columns = [] self._out_table = [] - def _GenerateCellTable(self): + def GenerateCellTable(self): row_index = 0 for row in self._table[1:]: @@ -645,13 +675,15 @@ class TableFormatter(object): self._out_table.append(out_row) row_index += 1 - # TODO(asharif): refactor this. - # Now generate header + def AddColumnName(self): + """Generate Column name at the top of table.""" key = Cell() + key.header = True key.string_value = "Keys" header = [key] for column in self._table_columns: cell = Cell() + cell.header = True if column.name: cell.string_value = column.name else: @@ -665,22 +697,37 @@ class TableFormatter(object): self._out_table = [header] + self._out_table + def AddHeader(self, s): + """Put additional string on the top of the table.""" + cell = Cell() + cell.header = True + cell.string_value = str(s) + header = [cell] + colspan = max(1, max(len(row) for row in self._table)) + cell.colspan = colspan + self._out_table = [header] + self._out_table + + def AddLabelName(self): + """Put label on the top of the table.""" top_header = [] - colspan = 0 - for column in self._columns: - if not column.result.NeedsBaseline(): - colspan += 1 + base_colspan = len([c for c in self._columns + if not c.result.NeedsBaseline()]) + compare_colspan = len(self._columns) + # The label is organized as follows + # "keys" label_base, label_comparison1, label_comparison2 + # The first cell has colspan 1, the second is base_colspan + # The others are compare_colspan for label in self._table[0]: cell = Cell() + cell.header = True cell.string_value = str(label) - if cell.string_value != "keys": - cell.colspan = colspan + if top_header: + cell.colspan = base_colspan + if len(top_header) > 1: + cell.colspan = compare_colspan top_header.append(cell) - self._out_table = [top_header] + self._out_table - return self._out_table - def _PrintOutTable(self): o = "" for row in self._out_table: @@ -689,18 +736,25 @@ class TableFormatter(object): o += "\n" print o - def GetCellTable(self): + def GetCellTable(self, headers=True): """Function to return a table of cells. The table (list of lists) is converted into a table of cells by this function. + Args: + headers: A boolean saying whether we want default headers Returns: A table of cells with each cell having the properties and string values as requiested by the columns passed in the constructor. """ # Generate the cell table, creating a list of dynamic columns on the fly. - return self._GenerateCellTable() + if not self._out_table: + self.GenerateCellTable() + if headers: + self.AddColumnName() + self.AddLabelName() + return self._out_table class TablePrinter(object): @@ -726,16 +780,22 @@ class TablePrinter(object): assert cell.color, "Cell color not set but color_row set!" assert not row_style.color, "Multiple row_style.colors found!" row_style.color = cell.color + if cell.bgcolor_row: + assert cell.bgcolor, "Cell bgcolor not set but bgcolor_row set!" + assert not row_style.bgcolor, "Multiple row_style.bgcolors found!" + row_style.bgcolor = cell.bgcolor self._row_styles.append(row_style) self._column_styles = [] if len(self._table) < 2: return - for i in range(len(self._table[1])): + + for i in range(max(len(row) for row in self._table)): column_style = Cell() - for row in self._table[1:]: - column_style.width = max(column_style.width, - len(row[i].string_value)) + for row in self._table: + if not any([cell.colspan != 1 for cell in row]): + column_style.width = max(column_style.width, + len(row[i].string_value)) self._column_styles.append(column_style) def _GetBGColorFix(self, color): @@ -746,7 +806,7 @@ class TablePrinter(object): suffix = "\033[0m" elif self._output_type in [self.EMAIL, self.HTML]: rgb = color.GetRGB() - prefix = ("<FONT style=\"BACKGROUND-COLOR:#{0}\" color =#{0}>" + prefix = ("<FONT style=\"BACKGROUND-COLOR:#{0}\">" .format(rgb)) suffix = "</FONT>" elif self._output_type in [self.PLAIN, self.TSV]: @@ -780,26 +840,15 @@ class TablePrinter(object): def _GetCellValue(self, i, j): cell = self._table[i][j] - color = None out = cell.string_value - if self._row_styles[i].color: - color = self._row_styles[i].color - elif cell.color: - color = cell.color - - if self._row_styles[i].bgcolor: - bgcolor = self._row_styles[i].bgcolor - else: - bgcolor = cell.bgcolor - raw_width = len(out) - if color: - p, s = self._GetColorFix(color) + if cell.color: + p, s = self._GetColorFix(cell.color) out = "%s%s%s" % (p, out, s) - if bgcolor: - p, s = self._GetBGColorFix(bgcolor) + if cell.bgcolor: + p, s = self._GetBGColorFix(cell.bgcolor) out = "%s%s%s" % (p, out, s) if self._output_type in [self.PLAIN, self.CONSOLE, self.EMAIL]: @@ -812,14 +861,17 @@ class TablePrinter(object): width = len(cell.string_value) if cell.colspan > 1: width = 0 + start = 0 + for k in range(j): + start += self._table[i][k].colspan for k in range(cell.colspan): - width += self._column_styles[1 + (j-1) * cell.colspan + k].width + width += self._column_styles[start + k].width if width > raw_width: padding = ("%" + str(width - raw_width) + "s") % "" out = padding + out if self._output_type == self.HTML: - if i < 2: + if cell.header: tag = "th" else: tag = "td" @@ -831,7 +883,7 @@ class TablePrinter(object): if self._output_type in [self.CONSOLE, self.PLAIN, self.EMAIL]: return " " if self._output_type == self.HTML: - return " " + return "" if self._output_type == self.TSV: return "\t" @@ -858,9 +910,17 @@ class TablePrinter(object): o += self._GetPrefix() for i in range(len(self._table)): row = self._table[i] + # Apply row color and bgcolor. + p = s = bgp = bgs = "" + if self._row_styles[i].bgcolor: + bgp, bgs = self._GetBGColorFix(self._row_styles[i].bgcolor) + if self._row_styles[i].color: + p, s = self._GetColorFix(self._row_styles[i].color) + o += p + bgp for j in range(len(row)): out = self._GetCellValue(i, j) o += out + self._GetHorizontalSeparator() + o += s + bgs o += self._GetVerticalSeparator() o += self._GetSuffix() return o @@ -925,6 +985,8 @@ def GetComplexTable(runs, labels, out_to=TablePrinter.CONSOLE): RatioFormat()), Column(GmeanRatioResult(), RatioFormat()), + Column(PValueResult(), + PValueFormat()), ] tf = TableFormatter(table, columns) cell_table = tf.GetCellTable() @@ -952,6 +1014,17 @@ if __name__ == "__main__": print t email = GetComplexTable(runs, labels, TablePrinter.EMAIL) + runs = [ + [ + {"k1": "1",}, {"k1": "1.1"}, {"k1": "1.2"}, + ], + [ + {"k1": "5",}, {"k1": "5.1"}, {"k1": "5.2"}, + ], + ] + t = GetComplexTable(runs, labels, TablePrinter.CONSOLE) + print t + simple_table = [ ["binary", "b1", "b2", "b3"], ["size", 100, 105, 108], @@ -961,7 +1034,7 @@ if __name__ == "__main__": ] t = GetSimpleTable(simple_table) print t - email += GetSimpleTable(simple_table, TablePrinter.PLAIN) + email += GetSimpleTable(simple_table, TablePrinter.HTML) email_to = [getpass.getuser()] email = "<pre style='font-size: 13px'>%s</pre>" % email EmailSender().SendEmail(email_to, "SimpleTableTest", email, msg_type="html") diff --git a/utils/tabulator_test.py b/utils/tabulator_test.py index e1892b07..54b455d3 100644 --- a/utils/tabulator_test.py +++ b/utils/tabulator_test.py @@ -2,7 +2,7 @@ """Tests for misc.""" -__author__ = 'asharif@google.com (Ahmad Sharif)' +__author__ = "asharif@google.com (Ahmad Sharif)" # System modules import unittest @@ -61,6 +61,11 @@ class TabulatorTest(unittest.TestCase): c3.Round() self.assertTrue(c3.r == 127) + def testGmean(self): + a = [1.0e+308] * 3 + b = tabulator.Result()._GetGmean(a) + self.assertTrue(b >= 0.99e+308 and b <= 1.01e+308) + def testTableGenerator(self): runs = [[{"k1": "10", "k2": "12"}, {"k1": "13", "k2": "14", "k3": "15"}], @@ -91,6 +96,40 @@ class TabulatorTest(unittest.TestCase): table = tf.GetCellTable() self.assertTrue(table) + def testColspan(self): + simple_table = [ + ["binary", "b1", "b2", "b3"], + ["size", 100, 105, 108], + ["rodata", 100, 80, 70], + ["data", 100, 100, 100], + ["debug", 100, 140, 60], + ] + columns = [ + tabulator.Column(tabulator.AmeanResult(), + tabulator.Format()), + tabulator.Column(tabulator.MinResult(), + tabulator.Format()), + tabulator.Column(tabulator.AmeanRatioResult(), + tabulator.PercentFormat()), + tabulator.Column(tabulator.AmeanRatioResult(), + tabulator.ColorBoxFormat()), + ] + our_table = [simple_table[0]] + for row in simple_table[1:]: + our_row = [row[0]] + for v in row[1:]: + our_row.append([v]) + our_table.append(our_row) + + tf = tabulator.TableFormatter(our_table, columns) + cell_table = tf.GetCellTable() + self.assertTrue(cell_table[0][0].colspan == 1) + self.assertTrue(cell_table[0][1].colspan == 2) + self.assertTrue(cell_table[0][2].colspan == 4) + self.assertTrue(cell_table[0][3].colspan == 4) + for row in cell_table[1:]: + for cell in row: + self.assertTrue(cell.colspan == 1) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/utils/timeline.py b/utils/timeline.py new file mode 100644 index 00000000..fa75ba01 --- /dev/null +++ b/utils/timeline.py @@ -0,0 +1,51 @@ +#!/usr/bin/python2.6 +# +# Copyright 2012 Google Inc. All Rights Reserved. +# + +"""Tools for recording and reporting timeline of benchmark_run.""" + +__author__ = 'yunlian@google.com (Yunlian Jiang)' + +import time + + +class Event(object): + def __init__(self, name='', cur_time=0): + self.name = name + self.timestamp = cur_time + + +class Timeline(object): + """Use a dict to store the timeline.""" + + def __init__(self): + self.events = [] + + def Record(self, event): + for e in self.events: + assert e.name != event, ('The event {0} is already recorded.' + .format(event)) + cur_event = Event(name=event, cur_time=time.time()) + self.events.append(cur_event) + + def GetEvents(self): + return([e.name for e in self.events]) + + def GetEventDict(self): + tl = {} + for e in self.events: + tl[e.name] = e.timestamp + return tl + + def GetEventTime(self, event): + for e in self.events: + if e.name == event: + return e.timestamp + raise IndexError, 'The event {0} is not recorded'.format(event) + + def GetLastEventTime(self): + return self.events[-1].timestamp + + def GetLastEvent(self): + return self.events[-1].name diff --git a/utils/timeline_test.py b/utils/timeline_test.py new file mode 100644 index 00000000..1f4d178a --- /dev/null +++ b/utils/timeline_test.py @@ -0,0 +1,54 @@ +# Copyright 2012 Google Inc. All Rights Reserved. + +"""Tests for time_line.py.""" + +__author__ = 'yunlian@google.com (Yunlian Jiang)' + +import time +import unittest + +import timeline + + +class TimeLineTest(unittest.TestCase): + + def testRecord(self): + tl = timeline.Timeline() + tl.Record('A') + t = time.time() + t1 = tl.events[0].timestamp + self.assertEqual(int(t1-t), 0) + self.assertRaises(AssertionError, tl.Record, 'A') + + def testGetEvents(self): + tl = timeline.Timeline() + tl.Record('A') + e = tl.GetEvents() + self.assertEqual(e, ['A']) + tl.Record('B') + e = tl.GetEvents() + self.assertEqual(e, ['A', 'B']) + + def testGetEventTime(self): + tl = timeline.Timeline() + tl.Record('A') + t = time.time() + t1 = tl.GetEventTime('A') + self.assertEqual(int(t1-t), 0) + self.assertRaises(IndexError, tl.GetEventTime, 'B') + + def testGetLastEventTime(self): + tl = timeline.Timeline() + self.assertRaises(IndexError, tl.GetLastEventTime) + tl.Record('A') + t = time.time() + t1 = tl.GetLastEventTime() + self.assertEqual(int(t1-t), 0) + time.sleep(2) + tl.Record('B') + t = time.time() + t1 = tl.GetLastEventTime() + self.assertEqual(int(t1-t), 0) + +if __name__ == '__main__': + unittest.main() |