aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTing-Yuan Huang <laszio@google.com>2017-02-17 12:20:02 -0800
committerchrome-bot <chrome-bot@chromium.org>2017-02-21 16:16:41 -0800
commit8332364c0237ca6c4976c5206346ab9a596c8e98 (patch)
treeacb32cdab517b4d2d78836ce2d1dba0073c6d436
parent1cc48481a2ca530bab04999da78eddc8a1b98adb (diff)
downloadtoolchain-utils-8332364c0237ca6c4976c5206346ab9a596c8e98.tar.gz
crosperf: set recommended iterations for benchmarks
This CL associates estimated standard deviations to each benchmark, according to experiments. The recommended iterations can be specified by setting iterations = 0 in the experiment files. Setting it to numbers greater than 0 will override the default iterations and behaves exactly the same as before. With this change, benchmarks in all_toolchain_perf get no more than 2% margin of error within 90% of time. See crbug.com/673558 for how the standard deviations are estimated. BUG=chromium:673558 TEST=all_toolchain_perf + page_cycler_v2.typical_25 finishes in 3.5 hours for an image on chell. Change-Id: Ie2ed07878c1237ad31a8568ae3fd3fb96cd11f3b Reviewed-on: https://chromium-review.googlesource.com/424915 Commit-Ready: Ting-Yuan Huang <laszio@chromium.org> Tested-by: Ting-Yuan Huang <laszio@chromium.org> Reviewed-by: Caroline Tice <cmtice@chromium.org>
-rwxr-xr-xbuildbot_test_toolchains.py4
-rw-r--r--crosperf/benchmark.py29
-rw-r--r--crosperf/settings_factory.py16
3 files changed, 40 insertions, 9 deletions
diff --git a/buildbot_test_toolchains.py b/buildbot_test_toolchains.py
index 5bef28a4..028d7f1a 100755
--- a/buildbot_test_toolchains.py
+++ b/buildbot_test_toolchains.py
@@ -175,12 +175,12 @@ class ToolchainComparator(object):
experiment_tests = """
benchmark: all_toolchain_perf {
suite: telemetry_Crosperf
- iterations: 3
+ iterations: 0
}
benchmark: page_cycler_v2.typical_25 {
suite: telemetry_Crosperf
- iterations: 2
+ iterations: 0
run_local: False
retries: 0
}
diff --git a/crosperf/benchmark.py b/crosperf/benchmark.py
index a2a34bca..55673a56 100644
--- a/crosperf/benchmark.py
+++ b/crosperf/benchmark.py
@@ -5,6 +5,33 @@
"""Define a type that wraps a Benchmark instance."""
+import math
+from scipy import stats
+
+# See crbug.com/673558 for how these are estimated.
+_estimated_stddev = {
+ 'octane': 0.015,
+ 'kraken': 0.019,
+ 'speedometer': 0.007,
+ 'dromaeo.domcoreattr': 0.023,
+ 'dromaeo.domcoremodify': 0.011,
+ 'smoothness.tough_webgl_cases': 0.025,
+ 'graphics_WebGLAquarium': 0.008,
+ 'page_cycler_v2.typical_25': 0.021,
+}
+
+# Get #samples needed to guarantee a given confidence interval, assuming the
+# samples follow normal distribution.
+def _samples(b):
+ # TODO: Make this an option
+ # CI = (0.9, 0.02), i.e., 90% chance that |sample mean - true mean| < 2%.
+ p = 0.9
+ e = 0.02
+ if b not in _estimated_stddev:
+ return 1
+ d = _estimated_stddev[b]
+ return int(math.ceil((stats.norm.isf((1 - p) / 2) * d / e) ** 2))
+
class Benchmark(object):
"""Class representing a benchmark to be run.
@@ -31,7 +58,7 @@ class Benchmark(object):
self.test_name = test_name
#For telemetry, this is the data.
self.test_args = test_args
- self.iterations = iterations
+ self.iterations = iterations if iterations > 0 else _samples(name)
self.perf_args = perf_args
self.rm_chroot_tmp = rm_chroot_tmp
self.iteration_adjusted = False
diff --git a/crosperf/settings_factory.py b/crosperf/settings_factory.py
index e42d82a9..05e3fbbb 100644
--- a/crosperf/settings_factory.py
+++ b/crosperf/settings_factory.py
@@ -29,9 +29,11 @@ class BenchmarkSettings(Settings):
self.AddField(
IntegerField(
'iterations',
- default=1,
- description='Number of iterations to run the '
- 'test.'))
+ required=False,
+ default=0,
+ description='Number of iterations to run the test. '
+ 'If not set, will run each benchmark test the optimum number of '
+ 'times to get a stable result.'))
self.AddField(
TextField(
'suite', default='', description='The type of the benchmark.'))
@@ -186,9 +188,11 @@ class GlobalSettings(Settings):
self.AddField(
IntegerField(
'iterations',
- default=1,
- description='Number of iterations to run all '
- 'tests.'))
+ required=False,
+ default=0,
+ description='Number of iterations to run all tests. '
+ 'If not set, will run each benchmark test the optimum number of '
+ 'times to get a stable result.'))
self.AddField(
TextField(
'chromeos_root',