crosperf: Feature to ignore min and max value in results

This patch provides an option for user to ignore min and max value in the results when generating report. User can use this feature by specifying `ignore_min_max: True` (which by default is False) in experiment file. When values count in a single test is smaller than 3, we automatically ignore this option for it, and print out a warning. BUG=chromium:938758 TEST=Tested with examples in general/cwp mode report generation. Passed all unit tests. Change-Id: I36a4c4d99836c201cdd2f2f9f2a4b1a4ffdaa47d Reviewed-on: https://chromium-review.googlesource.com/1521054 Commit-Ready: Zhizhou Yang <zhizhouy@google.com> Tested-by: Zhizhou Yang <zhizhouy@google.com> Reviewed-by: Zhizhou Yang <zhizhouy@google.com>
author: Zhizhou Yang <zhizhouy@google.com> 2019-03-14 13:25:06 -0700
committer: chrome-bot <chrome-bot@chromium.org> 2019-03-15 16:48:42 -0700
commit: 1a5a3163b7fe75e053282e7d67148c5f4e409414 (patch)
tree: f2c94f394dcc3ec171a41b492ba533f4121a1be0
parent: 6512f0baf7a540267b735accac77cf6903a63306 (diff)
download: toolchain-utils-1a5a3163b7fe75e053282e7d67148c5f4e409414.tar.gz
8 files changed, 123 insertions, 73 deletions
diff --git a/cros_utils/tabulator.py b/cros_utils/tabulator.py
index 94c49424..ed93de7a 100644
--- a/cros_utils/tabulator.py
+++ b/cros_utils/tabulator.py
@@ -87,6 +87,17 @@ def _StripNone(results):
   return res
 
 
+def _RemoveMinMax(cell, values):
+  if len(values) < 3:
+    print('WARNING: Values count is less than 3, not ignoring min/max values')
+    print('WARNING: Cell name:', cell.name, 'Values:', values)
+    return values
+
+  values.remove(min(values))
+  values.remove(max(values))
+  return values
+
+
 class TableGenerator(object):
   """Creates a table from a list of list of dicts.
 
@@ -529,7 +540,13 @@ class StringMeanResult(Result):
 class AmeanResult(StringMeanResult):
   """Arithmetic mean."""
 
+  def __init__(self, ignore_min_max=False):
+    super(AmeanResult, self).__init__()
+    self.ignore_min_max = ignore_min_max
+
   def _ComputeFloat(self, cell, values, baseline_values):
+    if self.ignore_min_max:
+      values = _RemoveMinMax(cell, values)
     cell.value = numpy.mean(values)
 
 
@@ -579,14 +596,26 @@ class NumericalResult(Result):
 class StdResult(NumericalResult):
   """Standard deviation."""
 
+  def __init__(self, ignore_min_max=False):
+    super(StdResult, self).__init__()
+    self.ignore_min_max = ignore_min_max
+
   def _ComputeFloat(self, cell, values, baseline_values):
+    if self.ignore_min_max:
+      values = _RemoveMinMax(cell, values)
     cell.value = numpy.std(values)
 
 
 class CoeffVarResult(NumericalResult):
   """Standard deviation / Mean"""
 
+  def __init__(self, ignore_min_max=False):
+    super(CoeffVarResult, self).__init__()
+    self.ignore_min_max = ignore_min_max
+
   def _ComputeFloat(self, cell, values, baseline_values):
+    if self.ignore_min_max:
+      values = _RemoveMinMax(cell, values)
     if numpy.mean(values) != 0.0:
       noise = numpy.abs(numpy.std(values) / numpy.mean(values))
     else:
@@ -619,7 +648,14 @@ class ComparisonResult(Result):
 class PValueResult(ComparisonResult):
   """P-value."""
 
+  def __init__(self, ignore_min_max=False):
+    super(PValueResult, self).__init__()
+    self.ignore_min_max = ignore_min_max
+
   def _ComputeFloat(self, cell, values, baseline_values):
+    if self.ignore_min_max:
+      values = _RemoveMinMax(cell, values)
+      baseline_values = _RemoveMinMax(cell, baseline_values)
     if len(values) < 2 or len(baseline_values) < 2:
       cell.value = float('nan')
       return
@@ -674,7 +710,14 @@ class KeyAwareComparisonResult(ComparisonResult):
 class AmeanRatioResult(KeyAwareComparisonResult):
   """Ratio of arithmetic means of values vs. baseline values."""
 
+  def __init__(self, ignore_min_max=False):
+    super(AmeanRatioResult, self).__init__()
+    self.ignore_min_max = ignore_min_max
+
   def _ComputeFloat(self, cell, values, baseline_values):
+    if self.ignore_min_max:
+      values = _RemoveMinMax(cell, values)
+      baseline_values = _RemoveMinMax(cell, baseline_values)
     if numpy.mean(baseline_values) != 0:
       cell.value = numpy.mean(values) / numpy.mean(baseline_values)
     elif numpy.mean(values) != 0:
@@ -688,7 +731,14 @@ class AmeanRatioResult(KeyAwareComparisonResult):
 class GmeanRatioResult(KeyAwareComparisonResult):
   """Ratio of geometric means of values vs. baseline values."""
 
+  def __init__(self, ignore_min_max=False):
+    super(GmeanRatioResult, self).__init__()
+    self.ignore_min_max = ignore_min_max
+
   def _ComputeFloat(self, cell, values, baseline_values):
+    if self.ignore_min_max:
+      values = _RemoveMinMax(cell, values)
+      baseline_values = _RemoveMinMax(cell, baseline_values)
     if self._GetGmean(baseline_values) != 0:
       cell.value = self._GetGmean(values) / self._GetGmean(baseline_values)
     elif self._GetGmean(values) != 0:
diff --git a/cros_utils/tabulator_test.py b/cros_utils/tabulator_test.py
index 943d9349..33c8da25 100644
--- a/cros_utils/tabulator_test.py
+++ b/cros_utils/tabulator_test.py
@@ -73,20 +73,22 @@ class TabulatorTest(unittest.TestCase):
     b = tabulator.Result()._GetGmean(a)
     self.assertTrue(b >= 0.99e+308 and b <= 1.01e+308)
 
+  def testIgnoreMinMax(self):
+    amr = tabulator.AmeanResult(ignore_min_max=True)
+    cell = tabulator.Cell()
+    values = [1, 2]
+    amr.Compute(cell, values, None)
+    self.assertTrue(cell.value == 1.5)
+    values = [1, 2, 8]
+    amr.Compute(cell, values, None)
+    self.assertTrue(cell.value == 2)
+
   def testTableGenerator(self):
-    runs = [[{
-        'k1': '10',
-        'k2': '12'
-    }, {
-        'k1': '13',
-        'k2': '14',
-        'k3': '15'
-    }], [{
-        'k1': '50',
-        'k2': '51',
-        'k3': '52',
-        'k4': '53'
-    }]]
+    # yapf: disable
+    runs = [[{'k1': '10', 'k2': '12'},
+             {'k1': '13', 'k2': '14', 'k3': '15'}],
+            [{'k1': '50', 'k2': '51', 'k3': '52', 'k4': '53'}]]
+    # yapf: enable
     labels = ['vanilla', 'modified']
     tg = tabulator.TableGenerator(runs, labels)
     table = tg.GetTable()
@@ -113,24 +115,14 @@ class TabulatorTest(unittest.TestCase):
     self.assertTrue(table)
 
   def testSamplesTableGenerator(self):
+    # yapf: disable
     keyvals = {
-        'bench1': [[{
-            'samples': 1
-        }, {
-            'samples': 2
-        }], [{
-            'samples': 3
-        }, {
-            'samples': 4
-        }]],
-        'bench2': [[{
-            'samples': 5
-        }, {}], [{
-            'samples': 6
-        }, {
-            'samples': 7
-        }]]
+        'bench1': [[{'samples': 1}, {'samples': 2}],
+                   [{'samples': 3}, {'samples': 4}]],
+        'bench2': [[{'samples': 5}, {}],
+                   [{'samples': 6}, {'samples': 7}]]
     }
+    # yapf: enable
     weights = {'bench1': 0.2, 'bench2': 0.7}
     iter_counts = {'bench1': 2, 'bench2': 2}
     labels = ['vanilla', 'modified']
@@ -152,22 +144,19 @@ class TabulatorTest(unittest.TestCase):
     header = table.pop(0)
     self.assertTrue(header == ['Benchmarks', 'Weights', 'vanilla', 'modified'])
     row = table.pop(0)
-    self.assertTrue(row == [
-        'bench1', 0.2, ((2, 0), [1 * 0.2, 2 * 0.2]), ((2, 0),
-                                                      [3 * 0.2, 4 * 0.2])
-    ])
+    # yapf: disable
+    self.assertTrue(row == ['bench1', 0.2,
+                            ((2, 0), [1 * 0.2, 2 * 0.2]),
+                            ((2, 0), [3 * 0.2, 4 * 0.2])])
     row = table.pop(0)
-    self.assertTrue(row == [
-        'bench2', 0.7, ((1, 1), [5 * 0.7, None]), ((2, 0), [6 * 0.7, 7 * 0.7])
-    ])
+    self.assertTrue(row == ['bench2', 0.7,
+                            ((1, 1), [5 * 0.7, None]),
+                            ((2, 0), [6 * 0.7, 7 * 0.7])])
     row = table.pop(0)
-    self.assertTrue(row == [
-        'Composite Benchmark (samples)', 'N/A',
-        ((1, 1),
-         [1 * 0.2 +
-          5 * 0.7, None]), ((2, 0), [3 * 0.2 + 6 * 0.7, 4 * 0.2 + 7 * 0.7])
-    ])
-
+    self.assertTrue(row == ['Composite Benchmark (samples)', 'N/A',
+                            ((1, 1), [1 * 0.2 + 5 * 0.7, None]),
+                            ((2, 0), [3 * 0.2 + 6 * 0.7, 4 * 0.2 + 7 * 0.7])])
+    # yapf: enable
     self.assertTrue('Composite Benchmark' in new_keyvals.keys())
     self.assertTrue('Composite Benchmark' in new_iter_counts.keys())
 
diff --git a/crosperf/crosperf_unittest.py b/crosperf/crosperf_unittest.py
index bfa61391..4fb58793 100755
--- a/crosperf/crosperf_unittest.py
+++ b/crosperf/crosperf_unittest.py
@@ -58,7 +58,7 @@ class CrosperfTest(unittest.TestCase):
     settings = crosperf.ConvertOptionsToSettings(options)
     self.assertIsNotNone(settings)
     self.assertIsInstance(settings, settings_factory.GlobalSettings)
-    self.assertEqual(len(settings.fields), 27)
+    self.assertEqual(len(settings.fields), 28)
     self.assertTrue(settings.GetField('rerun'))
     argv = ['crosperf/crosperf.py', 'temp.exp']
     options, _ = parser.parse_known_args(argv)
diff --git a/crosperf/experiment.py b/crosperf/experiment.py
index d5770d46..63e6a104 100644
--- a/crosperf/experiment.py
+++ b/crosperf/experiment.py
@@ -28,7 +28,8 @@ class Experiment(object):
   def __init__(self, name, remote, working_directory, chromeos_root,
                cache_conditions, labels, benchmarks, experiment_file, email_to,
                acquire_timeout, log_dir, log_level, share_cache,
-               results_directory, locks_directory, cwp_dso, enable_aslr):
+               results_directory, locks_directory, cwp_dso, enable_aslr,
+               ignore_min_max):
     self.name = name
     self.working_directory = working_directory
     self.remote = remote
@@ -56,6 +57,7 @@ class Experiment(object):
     self.locked_machines = []
     self.cwp_dso = cwp_dso
     self.enable_aslr = enable_aslr
+    self.ignore_min_max = ignore_min_max
 
     if not remote:
       raise RuntimeError('No remote hosts specified')
diff --git a/crosperf/experiment_factory.py b/crosperf/experiment_factory.py
index 8fc4ea33..b1e12be9 100644
--- a/crosperf/experiment_factory.py
+++ b/crosperf/experiment_factory.py
@@ -148,6 +148,7 @@ class ExperimentFactory(object):
     if cwp_dso and not cwp_dso in dso_list:
       raise RuntimeError('The DSO specified is not supported')
     enable_aslr = global_settings.GetField('enable_aslr')
+    ignore_min_max = global_settings.GetField('ignore_min_max')
 
     # Default cache hit conditions. The image checksum in the cache and the
     # computed checksum of the image must match. Also a cache file must exist.
@@ -372,7 +373,8 @@ class ExperimentFactory(object):
                             chromeos_root, cache_conditions, labels, benchmarks,
                             experiment_file.Canonicalize(), email,
                             acquire_timeout, log_dir, log_level, share_cache,
-                            results_dir, locks_dir, cwp_dso, enable_aslr)
+                            results_dir, locks_dir, cwp_dso, enable_aslr,
+                            ignore_min_max)
 
     return experiment
 
diff --git a/crosperf/results_report.py b/crosperf/results_report.py
index ba4ccd88..90312a33 100644
--- a/crosperf/results_report.py
+++ b/crosperf/results_report.py
@@ -273,36 +273,31 @@ class ResultsReport(object):
     return ret
 
   def GetFullTables(self, perf=False):
+    ignore_min_max = self.benchmark_results.ignore_min_max
     columns = [
         Column(RawResult(), Format()),
         Column(MinResult(), Format()),
         Column(MaxResult(), Format()),
-        Column(AmeanResult(), Format()),
-        Column(StdResult(), Format(), 'StdDev'),
-        Column(CoeffVarResult(), CoeffVarFormat(), 'StdDev/Mean'),
-        Column(GmeanRatioResult(), RatioFormat(), 'GmeanSpeedup'),
-        Column(PValueResult(), PValueFormat(), 'p-value')
+        Column(AmeanResult(ignore_min_max), Format()),
+        Column(StdResult(ignore_min_max), Format(), 'StdDev'),
+        Column(CoeffVarResult(ignore_min_max), CoeffVarFormat(), 'StdDev/Mean'),
+        Column(GmeanRatioResult(ignore_min_max), RatioFormat(), 'GmeanSpeedup'),
+        Column(PValueResult(ignore_min_max), PValueFormat(), 'p-value')
     ]
     return self._GetTablesWithColumns(columns, 'full', perf)
 
   def GetSummaryTables(self, summary_type=''):
+    ignore_min_max = self.benchmark_results.ignore_min_max
+    columns = []
     if summary_type == 'samples':
-      columns = [
-          Column(IterationResult(), Format(), 'Iterations [Pass:Fail]'),
-          Column(AmeanResult(), Format(), 'Weighted Samples Amean'),
-          Column(StdResult(), Format(), 'StdDev'),
-          Column(CoeffVarResult(), CoeffVarFormat(), 'StdDev/Mean'),
-          Column(GmeanRatioResult(), RatioFormat(), 'GmeanSpeedup'),
-          Column(PValueResult(), PValueFormat(), 'p-value')
-      ]
-    else:
-      columns = [
-          Column(AmeanResult(), Format()),
-          Column(StdResult(), Format(), 'StdDev'),
-          Column(CoeffVarResult(), CoeffVarFormat(), 'StdDev/Mean'),
-          Column(GmeanRatioResult(), RatioFormat(), 'GmeanSpeedup'),
-          Column(PValueResult(), PValueFormat(), 'p-value')
-      ]
+      columns += [Column(IterationResult(), Format(), 'Iterations [Pass:Fail]')]
+    columns += [
+        Column(AmeanResult(ignore_min_max), Format()),
+        Column(StdResult(ignore_min_max), Format(), 'StdDev'),
+        Column(CoeffVarResult(ignore_min_max), CoeffVarFormat(), 'StdDev/Mean'),
+        Column(GmeanRatioResult(ignore_min_max), RatioFormat(), 'GmeanSpeedup'),
+        Column(PValueResult(ignore_min_max), PValueFormat(), 'p-value')
+    ]
     return self._GetTablesWithColumns(columns, 'summary', summary_type)
 
 
@@ -396,9 +391,9 @@ class TextResultsReport(ResultsReport):
       table = _PrintTable(self.GetStatusTable(), output_type)
       sections.append(self._MakeSection('Benchmark Run Status', table))
 
-    perf_table = _PrintTable(
-        self.GetSummaryTables(summary_type='perf'), output_type)
-    if perf_table and not self.benchmark_results.cwp_dso:
+    if not self.benchmark_results.cwp_dso:
+      perf_table = _PrintTable(
+          self.GetSummaryTables(summary_type='perf'), output_type)
       sections.append(self._MakeSection('Perf Data', perf_table))
 
     if experiment is not None:
@@ -600,6 +595,7 @@ class BenchmarkResults(object):
                label_names,
                benchmark_names_and_iterations,
                run_keyvals,
+               ignore_min_max=False,
                read_perf_report=None,
                cwp_dso=None,
                weights=None):
@@ -614,6 +610,7 @@ class BenchmarkResults(object):
     self.benchmark_names_and_iterations = benchmark_names_and_iterations
     self.iter_counts = dict(benchmark_names_and_iterations)
     self.run_keyvals = run_keyvals
+    self.ignore_min_max = ignore_min_max
     self.read_perf_report = read_perf_report
     self.cwp_dso = cwp_dso
     self.weights = dict(weights) if weights else None
@@ -624,13 +621,15 @@ class BenchmarkResults(object):
     benchmark_names_and_iterations = [(benchmark.name, benchmark.iterations)
                                       for benchmark in experiment.benchmarks]
     run_keyvals = _ExperimentToKeyvals(experiment, for_json_report)
+    ignore_min_max = experiment.ignore_min_max
     read_perf_report = functools.partial(_ReadExperimentPerfReport,
                                          experiment.results_directory)
     cwp_dso = experiment.cwp_dso
     weights = [(benchmark.name, benchmark.weight)
                for benchmark in experiment.benchmarks]
     return BenchmarkResults(label_names, benchmark_names_and_iterations,
-                            run_keyvals, read_perf_report, cwp_dso, weights)
+                            run_keyvals, ignore_min_max, read_perf_report,
+                            cwp_dso, weights)
 
 
 def _GetElemByName(name, from_list):
diff --git a/crosperf/settings_factory.py b/crosperf/settings_factory.py
index 8bc52a45..82956501 100644
--- a/crosperf/settings_factory.py
+++ b/crosperf/settings_factory.py
@@ -310,6 +310,13 @@ class GlobalSettings(Settings):
             'benchmarks. ASLR is disabled by default',
             required=False,
             default=False))
+    self.AddField(
+        BooleanField(
+            'ignore_min_max',
+            description='When doing math for the raw results, '
+            'ignore min and max values to reduce noise.',
+            required=False,
+            default=False))
 
 
 class SettingsFactory(object):
diff --git a/crosperf/settings_factory_unittest.py b/crosperf/settings_factory_unittest.py
index 43af7713..729a8d06 100755
--- a/crosperf/settings_factory_unittest.py
+++ b/crosperf/settings_factory_unittest.py
@@ -49,7 +49,7 @@ class GlobalSettingsTest(unittest.TestCase):
   def test_init(self):
     res = settings_factory.GlobalSettings('g_settings')
     self.assertIsNotNone(res)
-    self.assertEqual(len(res.fields), 27)
+    self.assertEqual(len(res.fields), 28)
     self.assertEqual(res.GetField('name'), '')
     self.assertEqual(res.GetField('board'), '')
     self.assertEqual(res.GetField('remote'), None)
@@ -73,6 +73,7 @@ class GlobalSettingsTest(unittest.TestCase):
     self.assertEqual(res.GetField('chrome_src'), '')
     self.assertEqual(res.GetField('cwp_dso'), '')
     self.assertEqual(res.GetField('enable_aslr'), False)
+    self.assertEqual(res.GetField('ignore_min_max'), False)
 
 
 class SettingsFactoryTest(unittest.TestCase):
@@ -95,7 +96,7 @@ class SettingsFactoryTest(unittest.TestCase):
     g_settings = settings_factory.SettingsFactory().GetSettings(
         'global', 'global')
     self.assertIsInstance(g_settings, settings_factory.GlobalSettings)
-    self.assertEqual(len(g_settings.fields), 27)
+    self.assertEqual(len(g_settings.fields), 28)
 
 
 if __name__ == '__main__':
author	Zhizhou Yang <zhizhouy@google.com>	2019-03-14 13:25:06 -0700
committer	chrome-bot <chrome-bot@chromium.org>	2019-03-15 16:48:42 -0700
commit	1a5a3163b7fe75e053282e7d67148c5f4e409414 (patch)
tree	f2c94f394dcc3ec171a41b492ba533f4121a1be0
parent	6512f0baf7a540267b735accac77cf6903a63306 (diff)
download	toolchain-utils-1a5a3163b7fe75e053282e7d67148c5f4e409414.tar.gz