crosperf: set recommended iterations for benchmarks

This CL associates estimated standard deviations to each benchmark, according to experiments. The recommended iterations can be specified by setting iterations = 0 in the experiment files. Setting it to numbers greater than 0 will override the default iterations and behaves exactly the same as before. With this change, benchmarks in all_toolchain_perf get no more than 2% margin of error within 90% of time. See crbug.com/673558 for how the standard deviations are estimated. BUG=chromium:673558 TEST=all_toolchain_perf + page_cycler_v2.typical_25 finishes in 3.5 hours for an image on chell. Change-Id: Ie2ed07878c1237ad31a8568ae3fd3fb96cd11f3b Reviewed-on: https://chromium-review.googlesource.com/424915 Commit-Ready: Ting-Yuan Huang <laszio@chromium.org> Tested-by: Ting-Yuan Huang <laszio@chromium.org> Reviewed-by: Caroline Tice <cmtice@chromium.org>
author: Ting-Yuan Huang <laszio@google.com> 2017-02-17 12:20:02 -0800
committer: chrome-bot <chrome-bot@chromium.org> 2017-02-21 16:16:41 -0800
commit: 8332364c0237ca6c4976c5206346ab9a596c8e98 (patch)
tree: acb32cdab517b4d2d78836ce2d1dba0073c6d436
parent: 1cc48481a2ca530bab04999da78eddc8a1b98adb (diff)
download: toolchain-utils-8332364c0237ca6c4976c5206346ab9a596c8e98.tar.gz
3 files changed, 40 insertions, 9 deletions
diff --git a/buildbot_test_toolchains.py b/buildbot_test_toolchains.py
index 5bef28a4..028d7f1a 100755
--- a/buildbot_test_toolchains.py
+++ b/buildbot_test_toolchains.py
@@ -175,12 +175,12 @@ class ToolchainComparator(object):
     experiment_tests = """
     benchmark: all_toolchain_perf {
       suite: telemetry_Crosperf
-      iterations: 3
+      iterations: 0
     }
 
     benchmark: page_cycler_v2.typical_25 {
       suite: telemetry_Crosperf
-      iterations: 2
+      iterations: 0
       run_local: False
       retries: 0
     }
diff --git a/crosperf/benchmark.py b/crosperf/benchmark.py
index a2a34bca..55673a56 100644
--- a/crosperf/benchmark.py
+++ b/crosperf/benchmark.py
@@ -5,6 +5,33 @@
 
 """Define a type that wraps a Benchmark instance."""
 
+import math
+from scipy import stats
+
+# See crbug.com/673558 for how these are estimated.
+_estimated_stddev = {
+    'octane': 0.015,
+    'kraken': 0.019,
+    'speedometer': 0.007,
+    'dromaeo.domcoreattr': 0.023,
+    'dromaeo.domcoremodify': 0.011,
+    'smoothness.tough_webgl_cases': 0.025,
+    'graphics_WebGLAquarium': 0.008,
+    'page_cycler_v2.typical_25': 0.021,
+}
+
+# Get #samples needed to guarantee a given confidence interval, assuming the
+# samples follow normal distribution.
+def _samples(b):
+    # TODO: Make this an option
+    # CI = (0.9, 0.02), i.e., 90% chance that |sample mean - true mean| < 2%.
+    p = 0.9
+    e = 0.02
+    if b not in _estimated_stddev:
+        return 1
+    d = _estimated_stddev[b]
+    return int(math.ceil((stats.norm.isf((1 - p) / 2) * d / e) ** 2))
+
 class Benchmark(object):
   """Class representing a benchmark to be run.
 
@@ -31,7 +58,7 @@ class Benchmark(object):
     self.test_name = test_name
     #For telemetry, this is the data.
     self.test_args = test_args
-    self.iterations = iterations
+    self.iterations = iterations if iterations > 0 else _samples(name)
     self.perf_args = perf_args
     self.rm_chroot_tmp = rm_chroot_tmp
     self.iteration_adjusted = False
diff --git a/crosperf/settings_factory.py b/crosperf/settings_factory.py
index e42d82a9..05e3fbbb 100644
--- a/crosperf/settings_factory.py
+++ b/crosperf/settings_factory.py
@@ -29,9 +29,11 @@ class BenchmarkSettings(Settings):
     self.AddField(
         IntegerField(
             'iterations',
-            default=1,
-            description='Number of iterations to run the '
-            'test.'))
+            required=False,
+            default=0,
+            description='Number of iterations to run the test. '
+            'If not set, will run each benchmark test the optimum number of '
+            'times to get a stable result.'))
     self.AddField(
         TextField(
             'suite', default='', description='The type of the benchmark.'))
@@ -186,9 +188,11 @@ class GlobalSettings(Settings):
     self.AddField(
         IntegerField(
             'iterations',
-            default=1,
-            description='Number of iterations to run all '
-            'tests.'))
+            required=False,
+            default=0,
+            description='Number of iterations to run all tests. '
+            'If not set, will run each benchmark test the optimum number of '
+            'times to get a stable result.'))
     self.AddField(
         TextField(
             'chromeos_root',
author	Ting-Yuan Huang <laszio@google.com>	2017-02-17 12:20:02 -0800
committer	chrome-bot <chrome-bot@chromium.org>	2017-02-21 16:16:41 -0800
commit	8332364c0237ca6c4976c5206346ab9a596c8e98 (patch)
tree	acb32cdab517b4d2d78836ce2d1dba0073c6d436
parent	1cc48481a2ca530bab04999da78eddc8a1b98adb (diff)
download	toolchain-utils-8332364c0237ca6c4976c5206346ab9a596c8e98.tar.gz