3 files changed, 40 insertions, 9 deletions
diff --git a/buildbot_test_toolchains.py b/buildbot_test_toolchains.py
index 5bef28a4..028d7f1a 100755
--- a/buildbot_test_toolchains.py
+++ b/buildbot_test_toolchains.py
@@ -175,12 +175,12 @@ class ToolchainComparator(object):
     experiment_tests = """
     benchmark: all_toolchain_perf {
       suite: telemetry_Crosperf
-      iterations: 3
+      iterations: 0
     }
 
     benchmark: page_cycler_v2.typical_25 {
       suite: telemetry_Crosperf
-      iterations: 2
+      iterations: 0
       run_local: False
       retries: 0
     }
diff --git a/crosperf/benchmark.py b/crosperf/benchmark.py
index a2a34bca..55673a56 100644
--- a/crosperf/benchmark.py
+++ b/crosperf/benchmark.py
@@ -5,6 +5,33 @@
 
 """Define a type that wraps a Benchmark instance."""
 
+import math
+from scipy import stats
+
+# See crbug.com/673558 for how these are estimated.
+_estimated_stddev = {
+    'octane': 0.015,
+    'kraken': 0.019,
+    'speedometer': 0.007,
+    'dromaeo.domcoreattr': 0.023,
+    'dromaeo.domcoremodify': 0.011,
+    'smoothness.tough_webgl_cases': 0.025,
+    'graphics_WebGLAquarium': 0.008,
+    'page_cycler_v2.typical_25': 0.021,
+}
+
+# Get #samples needed to guarantee a given confidence interval, assuming the
+# samples follow normal distribution.
+def _samples(b):
+    # TODO: Make this an option
+    # CI = (0.9, 0.02), i.e., 90% chance that |sample mean - true mean| < 2%.
+    p = 0.9
+    e = 0.02
+    if b not in _estimated_stddev:
+        return 1
+    d = _estimated_stddev[b]
+    return int(math.ceil((stats.norm.isf((1 - p) / 2) * d / e) ** 2))
+
 class Benchmark(object):
   """Class representing a benchmark to be run.
 
@@ -31,7 +58,7 @@ class Benchmark(object):
     self.test_name = test_name
     #For telemetry, this is the data.
     self.test_args = test_args
-    self.iterations = iterations
+    self.iterations = iterations if iterations > 0 else _samples(name)
     self.perf_args = perf_args
     self.rm_chroot_tmp = rm_chroot_tmp
     self.iteration_adjusted = False
diff --git a/crosperf/settings_factory.py b/crosperf/settings_factory.py
index e42d82a9..05e3fbbb 100644
--- a/crosperf/settings_factory.py
+++ b/crosperf/settings_factory.py
@@ -29,9 +29,11 @@ class BenchmarkSettings(Settings):
     self.AddField(
         IntegerField(
             'iterations',
-            default=1,
-            description='Number of iterations to run the '
-            'test.'))
+            required=False,
+            default=0,
+            description='Number of iterations to run the test. '
+            'If not set, will run each benchmark test the optimum number of '
+            'times to get a stable result.'))
     self.AddField(
         TextField(
             'suite', default='', description='The type of the benchmark.'))
@@ -186,9 +188,11 @@ class GlobalSettings(Settings):
     self.AddField(
         IntegerField(
             'iterations',
-            default=1,
-            description='Number of iterations to run all '
-            'tests.'))
+            required=False,
+            default=0,
+            description='Number of iterations to run all tests. '
+            'If not set, will run each benchmark test the optimum number of '
+            'times to get a stable result.'))
     self.AddField(
         TextField(
             'chromeos_root',