aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZhizhou Yang <zhizhouy@google.com>2019-03-14 13:25:06 -0700
committerchrome-bot <chrome-bot@chromium.org>2019-03-15 16:48:42 -0700
commit1a5a3163b7fe75e053282e7d67148c5f4e409414 (patch)
treef2c94f394dcc3ec171a41b492ba533f4121a1be0
parent6512f0baf7a540267b735accac77cf6903a63306 (diff)
downloadtoolchain-utils-1a5a3163b7fe75e053282e7d67148c5f4e409414.tar.gz
crosperf: Feature to ignore min and max value in results
This patch provides an option for user to ignore min and max value in the results when generating report. User can use this feature by specifying `ignore_min_max: True` (which by default is False) in experiment file. When values count in a single test is smaller than 3, we automatically ignore this option for it, and print out a warning. BUG=chromium:938758 TEST=Tested with examples in general/cwp mode report generation. Passed all unit tests. Change-Id: I36a4c4d99836c201cdd2f2f9f2a4b1a4ffdaa47d Reviewed-on: https://chromium-review.googlesource.com/1521054 Commit-Ready: Zhizhou Yang <zhizhouy@google.com> Tested-by: Zhizhou Yang <zhizhouy@google.com> Reviewed-by: Zhizhou Yang <zhizhouy@google.com>
-rw-r--r--cros_utils/tabulator.py50
-rw-r--r--cros_utils/tabulator_test.py75
-rwxr-xr-xcrosperf/crosperf_unittest.py2
-rw-r--r--crosperf/experiment.py4
-rw-r--r--crosperf/experiment_factory.py4
-rw-r--r--crosperf/results_report.py49
-rw-r--r--crosperf/settings_factory.py7
-rwxr-xr-xcrosperf/settings_factory_unittest.py5
8 files changed, 123 insertions, 73 deletions
diff --git a/cros_utils/tabulator.py b/cros_utils/tabulator.py
index 94c49424..ed93de7a 100644
--- a/cros_utils/tabulator.py
+++ b/cros_utils/tabulator.py
@@ -87,6 +87,17 @@ def _StripNone(results):
return res
+def _RemoveMinMax(cell, values):
+ if len(values) < 3:
+ print('WARNING: Values count is less than 3, not ignoring min/max values')
+ print('WARNING: Cell name:', cell.name, 'Values:', values)
+ return values
+
+ values.remove(min(values))
+ values.remove(max(values))
+ return values
+
+
class TableGenerator(object):
"""Creates a table from a list of list of dicts.
@@ -529,7 +540,13 @@ class StringMeanResult(Result):
class AmeanResult(StringMeanResult):
"""Arithmetic mean."""
+ def __init__(self, ignore_min_max=False):
+ super(AmeanResult, self).__init__()
+ self.ignore_min_max = ignore_min_max
+
def _ComputeFloat(self, cell, values, baseline_values):
+ if self.ignore_min_max:
+ values = _RemoveMinMax(cell, values)
cell.value = numpy.mean(values)
@@ -579,14 +596,26 @@ class NumericalResult(Result):
class StdResult(NumericalResult):
"""Standard deviation."""
+ def __init__(self, ignore_min_max=False):
+ super(StdResult, self).__init__()
+ self.ignore_min_max = ignore_min_max
+
def _ComputeFloat(self, cell, values, baseline_values):
+ if self.ignore_min_max:
+ values = _RemoveMinMax(cell, values)
cell.value = numpy.std(values)
class CoeffVarResult(NumericalResult):
"""Standard deviation / Mean"""
+ def __init__(self, ignore_min_max=False):
+ super(CoeffVarResult, self).__init__()
+ self.ignore_min_max = ignore_min_max
+
def _ComputeFloat(self, cell, values, baseline_values):
+ if self.ignore_min_max:
+ values = _RemoveMinMax(cell, values)
if numpy.mean(values) != 0.0:
noise = numpy.abs(numpy.std(values) / numpy.mean(values))
else:
@@ -619,7 +648,14 @@ class ComparisonResult(Result):
class PValueResult(ComparisonResult):
"""P-value."""
+ def __init__(self, ignore_min_max=False):
+ super(PValueResult, self).__init__()
+ self.ignore_min_max = ignore_min_max
+
def _ComputeFloat(self, cell, values, baseline_values):
+ if self.ignore_min_max:
+ values = _RemoveMinMax(cell, values)
+ baseline_values = _RemoveMinMax(cell, baseline_values)
if len(values) < 2 or len(baseline_values) < 2:
cell.value = float('nan')
return
@@ -674,7 +710,14 @@ class KeyAwareComparisonResult(ComparisonResult):
class AmeanRatioResult(KeyAwareComparisonResult):
"""Ratio of arithmetic means of values vs. baseline values."""
+ def __init__(self, ignore_min_max=False):
+ super(AmeanRatioResult, self).__init__()
+ self.ignore_min_max = ignore_min_max
+
def _ComputeFloat(self, cell, values, baseline_values):
+ if self.ignore_min_max:
+ values = _RemoveMinMax(cell, values)
+ baseline_values = _RemoveMinMax(cell, baseline_values)
if numpy.mean(baseline_values) != 0:
cell.value = numpy.mean(values) / numpy.mean(baseline_values)
elif numpy.mean(values) != 0:
@@ -688,7 +731,14 @@ class AmeanRatioResult(KeyAwareComparisonResult):
class GmeanRatioResult(KeyAwareComparisonResult):
"""Ratio of geometric means of values vs. baseline values."""
+ def __init__(self, ignore_min_max=False):
+ super(GmeanRatioResult, self).__init__()
+ self.ignore_min_max = ignore_min_max
+
def _ComputeFloat(self, cell, values, baseline_values):
+ if self.ignore_min_max:
+ values = _RemoveMinMax(cell, values)
+ baseline_values = _RemoveMinMax(cell, baseline_values)
if self._GetGmean(baseline_values) != 0:
cell.value = self._GetGmean(values) / self._GetGmean(baseline_values)
elif self._GetGmean(values) != 0:
diff --git a/cros_utils/tabulator_test.py b/cros_utils/tabulator_test.py
index 943d9349..33c8da25 100644
--- a/cros_utils/tabulator_test.py
+++ b/cros_utils/tabulator_test.py
@@ -73,20 +73,22 @@ class TabulatorTest(unittest.TestCase):
b = tabulator.Result()._GetGmean(a)
self.assertTrue(b >= 0.99e+308 and b <= 1.01e+308)
+ def testIgnoreMinMax(self):
+ amr = tabulator.AmeanResult(ignore_min_max=True)
+ cell = tabulator.Cell()
+ values = [1, 2]
+ amr.Compute(cell, values, None)
+ self.assertTrue(cell.value == 1.5)
+ values = [1, 2, 8]
+ amr.Compute(cell, values, None)
+ self.assertTrue(cell.value == 2)
+
def testTableGenerator(self):
- runs = [[{
- 'k1': '10',
- 'k2': '12'
- }, {
- 'k1': '13',
- 'k2': '14',
- 'k3': '15'
- }], [{
- 'k1': '50',
- 'k2': '51',
- 'k3': '52',
- 'k4': '53'
- }]]
+ # yapf: disable
+ runs = [[{'k1': '10', 'k2': '12'},
+ {'k1': '13', 'k2': '14', 'k3': '15'}],
+ [{'k1': '50', 'k2': '51', 'k3': '52', 'k4': '53'}]]
+ # yapf: enable
labels = ['vanilla', 'modified']
tg = tabulator.TableGenerator(runs, labels)
table = tg.GetTable()
@@ -113,24 +115,14 @@ class TabulatorTest(unittest.TestCase):
self.assertTrue(table)
def testSamplesTableGenerator(self):
+ # yapf: disable
keyvals = {
- 'bench1': [[{
- 'samples': 1
- }, {
- 'samples': 2
- }], [{
- 'samples': 3
- }, {
- 'samples': 4
- }]],
- 'bench2': [[{
- 'samples': 5
- }, {}], [{
- 'samples': 6
- }, {
- 'samples': 7
- }]]
+ 'bench1': [[{'samples': 1}, {'samples': 2}],
+ [{'samples': 3}, {'samples': 4}]],
+ 'bench2': [[{'samples': 5}, {}],
+ [{'samples': 6}, {'samples': 7}]]
}
+ # yapf: enable
weights = {'bench1': 0.2, 'bench2': 0.7}
iter_counts = {'bench1': 2, 'bench2': 2}
labels = ['vanilla', 'modified']
@@ -152,22 +144,19 @@ class TabulatorTest(unittest.TestCase):
header = table.pop(0)
self.assertTrue(header == ['Benchmarks', 'Weights', 'vanilla', 'modified'])
row = table.pop(0)
- self.assertTrue(row == [
- 'bench1', 0.2, ((2, 0), [1 * 0.2, 2 * 0.2]), ((2, 0),
- [3 * 0.2, 4 * 0.2])
- ])
+ # yapf: disable
+ self.assertTrue(row == ['bench1', 0.2,
+ ((2, 0), [1 * 0.2, 2 * 0.2]),
+ ((2, 0), [3 * 0.2, 4 * 0.2])])
row = table.pop(0)
- self.assertTrue(row == [
- 'bench2', 0.7, ((1, 1), [5 * 0.7, None]), ((2, 0), [6 * 0.7, 7 * 0.7])
- ])
+ self.assertTrue(row == ['bench2', 0.7,
+ ((1, 1), [5 * 0.7, None]),
+ ((2, 0), [6 * 0.7, 7 * 0.7])])
row = table.pop(0)
- self.assertTrue(row == [
- 'Composite Benchmark (samples)', 'N/A',
- ((1, 1),
- [1 * 0.2 +
- 5 * 0.7, None]), ((2, 0), [3 * 0.2 + 6 * 0.7, 4 * 0.2 + 7 * 0.7])
- ])
-
+ self.assertTrue(row == ['Composite Benchmark (samples)', 'N/A',
+ ((1, 1), [1 * 0.2 + 5 * 0.7, None]),
+ ((2, 0), [3 * 0.2 + 6 * 0.7, 4 * 0.2 + 7 * 0.7])])
+ # yapf: enable
self.assertTrue('Composite Benchmark' in new_keyvals.keys())
self.assertTrue('Composite Benchmark' in new_iter_counts.keys())
diff --git a/crosperf/crosperf_unittest.py b/crosperf/crosperf_unittest.py
index bfa61391..4fb58793 100755
--- a/crosperf/crosperf_unittest.py
+++ b/crosperf/crosperf_unittest.py
@@ -58,7 +58,7 @@ class CrosperfTest(unittest.TestCase):
settings = crosperf.ConvertOptionsToSettings(options)
self.assertIsNotNone(settings)
self.assertIsInstance(settings, settings_factory.GlobalSettings)
- self.assertEqual(len(settings.fields), 27)
+ self.assertEqual(len(settings.fields), 28)
self.assertTrue(settings.GetField('rerun'))
argv = ['crosperf/crosperf.py', 'temp.exp']
options, _ = parser.parse_known_args(argv)
diff --git a/crosperf/experiment.py b/crosperf/experiment.py
index d5770d46..63e6a104 100644
--- a/crosperf/experiment.py
+++ b/crosperf/experiment.py
@@ -28,7 +28,8 @@ class Experiment(object):
def __init__(self, name, remote, working_directory, chromeos_root,
cache_conditions, labels, benchmarks, experiment_file, email_to,
acquire_timeout, log_dir, log_level, share_cache,
- results_directory, locks_directory, cwp_dso, enable_aslr):
+ results_directory, locks_directory, cwp_dso, enable_aslr,
+ ignore_min_max):
self.name = name
self.working_directory = working_directory
self.remote = remote
@@ -56,6 +57,7 @@ class Experiment(object):
self.locked_machines = []
self.cwp_dso = cwp_dso
self.enable_aslr = enable_aslr
+ self.ignore_min_max = ignore_min_max
if not remote:
raise RuntimeError('No remote hosts specified')
diff --git a/crosperf/experiment_factory.py b/crosperf/experiment_factory.py
index 8fc4ea33..b1e12be9 100644
--- a/crosperf/experiment_factory.py
+++ b/crosperf/experiment_factory.py
@@ -148,6 +148,7 @@ class ExperimentFactory(object):
if cwp_dso and not cwp_dso in dso_list:
raise RuntimeError('The DSO specified is not supported')
enable_aslr = global_settings.GetField('enable_aslr')
+ ignore_min_max = global_settings.GetField('ignore_min_max')
# Default cache hit conditions. The image checksum in the cache and the
# computed checksum of the image must match. Also a cache file must exist.
@@ -372,7 +373,8 @@ class ExperimentFactory(object):
chromeos_root, cache_conditions, labels, benchmarks,
experiment_file.Canonicalize(), email,
acquire_timeout, log_dir, log_level, share_cache,
- results_dir, locks_dir, cwp_dso, enable_aslr)
+ results_dir, locks_dir, cwp_dso, enable_aslr,
+ ignore_min_max)
return experiment
diff --git a/crosperf/results_report.py b/crosperf/results_report.py
index ba4ccd88..90312a33 100644
--- a/crosperf/results_report.py
+++ b/crosperf/results_report.py
@@ -273,36 +273,31 @@ class ResultsReport(object):
return ret
def GetFullTables(self, perf=False):
+ ignore_min_max = self.benchmark_results.ignore_min_max
columns = [
Column(RawResult(), Format()),
Column(MinResult(), Format()),
Column(MaxResult(), Format()),
- Column(AmeanResult(), Format()),
- Column(StdResult(), Format(), 'StdDev'),
- Column(CoeffVarResult(), CoeffVarFormat(), 'StdDev/Mean'),
- Column(GmeanRatioResult(), RatioFormat(), 'GmeanSpeedup'),
- Column(PValueResult(), PValueFormat(), 'p-value')
+ Column(AmeanResult(ignore_min_max), Format()),
+ Column(StdResult(ignore_min_max), Format(), 'StdDev'),
+ Column(CoeffVarResult(ignore_min_max), CoeffVarFormat(), 'StdDev/Mean'),
+ Column(GmeanRatioResult(ignore_min_max), RatioFormat(), 'GmeanSpeedup'),
+ Column(PValueResult(ignore_min_max), PValueFormat(), 'p-value')
]
return self._GetTablesWithColumns(columns, 'full', perf)
def GetSummaryTables(self, summary_type=''):
+ ignore_min_max = self.benchmark_results.ignore_min_max
+ columns = []
if summary_type == 'samples':
- columns = [
- Column(IterationResult(), Format(), 'Iterations [Pass:Fail]'),
- Column(AmeanResult(), Format(), 'Weighted Samples Amean'),
- Column(StdResult(), Format(), 'StdDev'),
- Column(CoeffVarResult(), CoeffVarFormat(), 'StdDev/Mean'),
- Column(GmeanRatioResult(), RatioFormat(), 'GmeanSpeedup'),
- Column(PValueResult(), PValueFormat(), 'p-value')
- ]
- else:
- columns = [
- Column(AmeanResult(), Format()),
- Column(StdResult(), Format(), 'StdDev'),
- Column(CoeffVarResult(), CoeffVarFormat(), 'StdDev/Mean'),
- Column(GmeanRatioResult(), RatioFormat(), 'GmeanSpeedup'),
- Column(PValueResult(), PValueFormat(), 'p-value')
- ]
+ columns += [Column(IterationResult(), Format(), 'Iterations [Pass:Fail]')]
+ columns += [
+ Column(AmeanResult(ignore_min_max), Format()),
+ Column(StdResult(ignore_min_max), Format(), 'StdDev'),
+ Column(CoeffVarResult(ignore_min_max), CoeffVarFormat(), 'StdDev/Mean'),
+ Column(GmeanRatioResult(ignore_min_max), RatioFormat(), 'GmeanSpeedup'),
+ Column(PValueResult(ignore_min_max), PValueFormat(), 'p-value')
+ ]
return self._GetTablesWithColumns(columns, 'summary', summary_type)
@@ -396,9 +391,9 @@ class TextResultsReport(ResultsReport):
table = _PrintTable(self.GetStatusTable(), output_type)
sections.append(self._MakeSection('Benchmark Run Status', table))
- perf_table = _PrintTable(
- self.GetSummaryTables(summary_type='perf'), output_type)
- if perf_table and not self.benchmark_results.cwp_dso:
+ if not self.benchmark_results.cwp_dso:
+ perf_table = _PrintTable(
+ self.GetSummaryTables(summary_type='perf'), output_type)
sections.append(self._MakeSection('Perf Data', perf_table))
if experiment is not None:
@@ -600,6 +595,7 @@ class BenchmarkResults(object):
label_names,
benchmark_names_and_iterations,
run_keyvals,
+ ignore_min_max=False,
read_perf_report=None,
cwp_dso=None,
weights=None):
@@ -614,6 +610,7 @@ class BenchmarkResults(object):
self.benchmark_names_and_iterations = benchmark_names_and_iterations
self.iter_counts = dict(benchmark_names_and_iterations)
self.run_keyvals = run_keyvals
+ self.ignore_min_max = ignore_min_max
self.read_perf_report = read_perf_report
self.cwp_dso = cwp_dso
self.weights = dict(weights) if weights else None
@@ -624,13 +621,15 @@ class BenchmarkResults(object):
benchmark_names_and_iterations = [(benchmark.name, benchmark.iterations)
for benchmark in experiment.benchmarks]
run_keyvals = _ExperimentToKeyvals(experiment, for_json_report)
+ ignore_min_max = experiment.ignore_min_max
read_perf_report = functools.partial(_ReadExperimentPerfReport,
experiment.results_directory)
cwp_dso = experiment.cwp_dso
weights = [(benchmark.name, benchmark.weight)
for benchmark in experiment.benchmarks]
return BenchmarkResults(label_names, benchmark_names_and_iterations,
- run_keyvals, read_perf_report, cwp_dso, weights)
+ run_keyvals, ignore_min_max, read_perf_report,
+ cwp_dso, weights)
def _GetElemByName(name, from_list):
diff --git a/crosperf/settings_factory.py b/crosperf/settings_factory.py
index 8bc52a45..82956501 100644
--- a/crosperf/settings_factory.py
+++ b/crosperf/settings_factory.py
@@ -310,6 +310,13 @@ class GlobalSettings(Settings):
'benchmarks. ASLR is disabled by default',
required=False,
default=False))
+ self.AddField(
+ BooleanField(
+ 'ignore_min_max',
+ description='When doing math for the raw results, '
+ 'ignore min and max values to reduce noise.',
+ required=False,
+ default=False))
class SettingsFactory(object):
diff --git a/crosperf/settings_factory_unittest.py b/crosperf/settings_factory_unittest.py
index 43af7713..729a8d06 100755
--- a/crosperf/settings_factory_unittest.py
+++ b/crosperf/settings_factory_unittest.py
@@ -49,7 +49,7 @@ class GlobalSettingsTest(unittest.TestCase):
def test_init(self):
res = settings_factory.GlobalSettings('g_settings')
self.assertIsNotNone(res)
- self.assertEqual(len(res.fields), 27)
+ self.assertEqual(len(res.fields), 28)
self.assertEqual(res.GetField('name'), '')
self.assertEqual(res.GetField('board'), '')
self.assertEqual(res.GetField('remote'), None)
@@ -73,6 +73,7 @@ class GlobalSettingsTest(unittest.TestCase):
self.assertEqual(res.GetField('chrome_src'), '')
self.assertEqual(res.GetField('cwp_dso'), '')
self.assertEqual(res.GetField('enable_aslr'), False)
+ self.assertEqual(res.GetField('ignore_min_max'), False)
class SettingsFactoryTest(unittest.TestCase):
@@ -95,7 +96,7 @@ class SettingsFactoryTest(unittest.TestCase):
g_settings = settings_factory.SettingsFactory().GetSettings(
'global', 'global')
self.assertIsInstance(g_settings, settings_factory.GlobalSettings)
- self.assertEqual(len(g_settings.fields), 27)
+ self.assertEqual(len(g_settings.fields), 28)
if __name__ == '__main__':