From 8332364c0237ca6c4976c5206346ab9a596c8e98 Mon Sep 17 00:00:00 2001 From: Ting-Yuan Huang Date: Fri, 17 Feb 2017 12:20:02 -0800 Subject: crosperf: set recommended iterations for benchmarks This CL associates estimated standard deviations to each benchmark, according to experiments. The recommended iterations can be specified by setting iterations = 0 in the experiment files. Setting it to numbers greater than 0 will override the default iterations and behaves exactly the same as before. With this change, benchmarks in all_toolchain_perf get no more than 2% margin of error within 90% of time. See crbug.com/673558 for how the standard deviations are estimated. BUG=chromium:673558 TEST=all_toolchain_perf + page_cycler_v2.typical_25 finishes in 3.5 hours for an image on chell. Change-Id: Ie2ed07878c1237ad31a8568ae3fd3fb96cd11f3b Reviewed-on: https://chromium-review.googlesource.com/424915 Commit-Ready: Ting-Yuan Huang Tested-by: Ting-Yuan Huang Reviewed-by: Caroline Tice --- crosperf/benchmark.py | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) (limited to 'crosperf/benchmark.py') diff --git a/crosperf/benchmark.py b/crosperf/benchmark.py index a2a34bca..55673a56 100644 --- a/crosperf/benchmark.py +++ b/crosperf/benchmark.py @@ -5,6 +5,33 @@ """Define a type that wraps a Benchmark instance.""" +import math +from scipy import stats + +# See crbug.com/673558 for how these are estimated. +_estimated_stddev = { + 'octane': 0.015, + 'kraken': 0.019, + 'speedometer': 0.007, + 'dromaeo.domcoreattr': 0.023, + 'dromaeo.domcoremodify': 0.011, + 'smoothness.tough_webgl_cases': 0.025, + 'graphics_WebGLAquarium': 0.008, + 'page_cycler_v2.typical_25': 0.021, +} + +# Get #samples needed to guarantee a given confidence interval, assuming the +# samples follow normal distribution. +def _samples(b): + # TODO: Make this an option + # CI = (0.9, 0.02), i.e., 90% chance that |sample mean - true mean| < 2%. + p = 0.9 + e = 0.02 + if b not in _estimated_stddev: + return 1 + d = _estimated_stddev[b] + return int(math.ceil((stats.norm.isf((1 - p) / 2) * d / e) ** 2)) + class Benchmark(object): """Class representing a benchmark to be run. @@ -31,7 +58,7 @@ class Benchmark(object): self.test_name = test_name #For telemetry, this is the data. self.test_args = test_args - self.iterations = iterations + self.iterations = iterations if iterations > 0 else _samples(name) self.perf_args = perf_args self.rm_chroot_tmp = rm_chroot_tmp self.iteration_adjusted = False -- cgit v1.2.3 From 9088f2b8997baf37098bce7a464348296e0dbe47 Mon Sep 17 00:00:00 2001 From: Ting-Yuan Huang Date: Fri, 10 Mar 2017 11:26:45 -0800 Subject: crosperf: make default iterations of benchmarks >= 2 So as to get standard deviations, which are needed in ttest for p-value. TEST=none BUG=none Change-Id: If59fb46b62c0cb58610507962f8ca13ccc0b7d01 Reviewed-on: https://chromium-review.googlesource.com/452796 Commit-Ready: Ting-Yuan Huang Tested-by: Ting-Yuan Huang Reviewed-by: Manoj Gupta --- crosperf/benchmark.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'crosperf/benchmark.py') diff --git a/crosperf/benchmark.py b/crosperf/benchmark.py index 55673a56..3f0a842a 100644 --- a/crosperf/benchmark.py +++ b/crosperf/benchmark.py @@ -30,7 +30,10 @@ def _samples(b): if b not in _estimated_stddev: return 1 d = _estimated_stddev[b] - return int(math.ceil((stats.norm.isf((1 - p) / 2) * d / e) ** 2)) + # Get at least 2 samples so as to calculate standard deviation, which is + # needed in T-test for p-value. + n = int(math.ceil((stats.norm.isf((1 - p) / 2) * d / e) ** 2)) + return n if n > 1 else 2 class Benchmark(object): """Class representing a benchmark to be run. -- cgit v1.2.3 From f6ef4395fe1896ba68c80e52cb24763b0fcfe7f8 Mon Sep 17 00:00:00 2001 From: Caroline Tice Date: Thu, 6 Apr 2017 17:16:05 -0700 Subject: [toolchain-utils] Fix remaining lint errors in toolchain-utils. In addition to fixing the lint errors, this also fixes the Python formatting issues (ran tc_pyformat on nearly all the files). BUG=chromium:570450 TEST=Ran all crosperf & bisect tool unit tests. Ran afe_lock_machine.py (check machine status) Ran full crosperf test (octane, speedometer, BootPerf) on alex. Change-Id: Ic86f9192801ac67769f3de30f1c5f0d203ce0831 Reviewed-on: https://chromium-review.googlesource.com/471886 Commit-Ready: Caroline Tice Tested-by: Caroline Tice Reviewed-by: Manoj Gupta --- crosperf/benchmark.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) (limited to 'crosperf/benchmark.py') diff --git a/crosperf/benchmark.py b/crosperf/benchmark.py index 3f0a842a..bbb1cdfc 100644 --- a/crosperf/benchmark.py +++ b/crosperf/benchmark.py @@ -1,9 +1,8 @@ - # Copyright (c) 2013 The Chromium OS Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. - """Define a type that wraps a Benchmark instance.""" +from __future__ import print_function import math from scipy import stats @@ -20,20 +19,22 @@ _estimated_stddev = { 'page_cycler_v2.typical_25': 0.021, } + # Get #samples needed to guarantee a given confidence interval, assuming the # samples follow normal distribution. def _samples(b): - # TODO: Make this an option - # CI = (0.9, 0.02), i.e., 90% chance that |sample mean - true mean| < 2%. - p = 0.9 - e = 0.02 - if b not in _estimated_stddev: - return 1 - d = _estimated_stddev[b] - # Get at least 2 samples so as to calculate standard deviation, which is - # needed in T-test for p-value. - n = int(math.ceil((stats.norm.isf((1 - p) / 2) * d / e) ** 2)) - return n if n > 1 else 2 + # TODO: Make this an option + # CI = (0.9, 0.02), i.e., 90% chance that |sample mean - true mean| < 2%. + p = 0.9 + e = 0.02 + if b not in _estimated_stddev: + return 1 + d = _estimated_stddev[b] + # Get at least 2 samples so as to calculate standard deviation, which is + # needed in T-test for p-value. + n = int(math.ceil((stats.norm.isf((1 - p) / 2) * d / e)**2)) + return n if n > 1 else 2 + class Benchmark(object): """Class representing a benchmark to be run. -- cgit v1.2.3