aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexandre Rames <alexandre.rames@linaro.org>2016-05-23 13:28:29 +0100
committerAlexandre Rames <alexandre.rames@linaro.org>2016-05-23 14:04:12 +0100
commit0af84bec28fe5c4dda123142b163c16e90336c22 (patch)
treeff50bcfbe57d63adc3385fbe2a5c859e0a184113
parent2e38daa21106bd3b0c0ce828401c8e744694f7d8 (diff)
downloadart-testing-0af84bec28fe5c4dda123142b163c16e90336c22.tar.gz
Print geomean in `compare.py`.
Change-Id: I01dfa8fc6e33b2a771264a4e52f6b81378b86a1e
-rwxr-xr-xcompare.py6
-rwxr-xr-xtest/test.py16
-rwxr-xr-xtools/benchmarks/compare.py10
-rw-r--r--tools/utils.py14
-rw-r--r--tools/utils_stats.py95
5 files changed, 42 insertions, 99 deletions
diff --git a/compare.py b/compare.py
index d6a1f65..7ef36a9 100755
--- a/compare.py
+++ b/compare.py
@@ -117,3 +117,9 @@ if __name__ == "__main__":
args.ttest_p_threshold)
PrintDiff(res_1, res_2, print_extended=args.print_extended)
+ if utils.HaveSameKeys(res_1, res_2):
+ utils_stats.ComputeAndPrintRelationGeomean(utils.Unflatten(res_1),
+ utils.Unflatten(res_2))
+ else:
+ utils.Info("Not comparing the geomeans because the two result sets "
+ "have different keys.")
diff --git a/test/test.py b/test/test.py
index 93e460d..5e24eaf 100755
--- a/test/test.py
+++ b/test/test.py
@@ -107,21 +107,6 @@ def TestBenchmarksOnTarget(target):
return TestBenchmarksCommon(target)
-def TestBenchmarksCompareScript():
- rc = 0
- run_py = os.path.join(".", "tools", "benchmarks", "run.py")
- compare_py = os.path.join(".", "tools", "benchmarks", "compare.py")
- benchmarks_filter = ["--filter", "benchmarks/algorithm/*"]
- rc |= TestCommand([run_py, "--output-json=/tmp/res1"] + benchmarks_filter, _cwd=utils.dir_root)
- rc |= TestCommand([run_py, "--output-json=/tmp/res2"] + benchmarks_filter, _cwd=utils.dir_root)
- rc |= TestCommand([compare_py, "/tmp/res1", "/tmp/res2"], _cwd=utils.dir_root)
- rc |= TestCommand([compare_py, "--significant-changes", "/tmp/res1", "/tmp/res2"], _cwd=utils.dir_root)
- rc |= TestCommand([compare_py, "--order-by-diff", "/tmp/res1", "/tmp/res2"], _cwd=utils.dir_root)
- rc |= TestCommand([compare_py, "--filter", "benchmarks/algorithm/Crypto*", "/tmp/res1", "/tmp/res2"], _cwd=utils.dir_root)
- rc |= TestCommand([compare_py, "--filter-out", "benchmarks/algorithm/Crypto*", "/tmp/res1", "/tmp/res2"], _cwd=utils.dir_root)
- return rc
-
-
def TestBenchmarkPackages():
benchmark_files = []
# TODO: Automatically test that each benchmark has the correct package.
@@ -180,7 +165,6 @@ if __name__ == "__main__":
rc = 0
if not args.no_host_tests:
rc |= TestBenchmarksOnHost()
- rc |= TestBenchmarksCompareScript()
rc |= TestBenchmarkPackages()
rc |= TestLint(args.jobs)
rc |= TestTopLevelWrapperScripts()
diff --git a/tools/benchmarks/compare.py b/tools/benchmarks/compare.py
index 9aebb3f..8df2f68 100755
--- a/tools/benchmarks/compare.py
+++ b/tools/benchmarks/compare.py
@@ -119,13 +119,9 @@ if __name__ == "__main__":
FilterSignificantChanges(res_1, res_2,
args.significant_diff_threshold,
args.significant_deviation_threshold)
- if args.order_by_diff:
- regressions, improvements = OrderResultsByDifference(res_1, res_2)
- utils_stats.PrintDiff(regressions[0], regressions[1], "REGRESSIONS")
- print("")
- utils_stats.PrintDiff(improvements[0], improvements[1], "IMPROVEMENTS")
- else:
- utils_stats.PrintDiff(res_1, res_2)
+
+ utils.Error('This script is deprecated. Use the top-level `compare.py` '
+ 'script instead.')
file_1.close()
file_2.close()
diff --git a/tools/utils.py b/tools/utils.py
index c8a26cf..6eb5f71 100644
--- a/tools/utils.py
+++ b/tools/utils.py
@@ -404,3 +404,17 @@ def Unflatten(data, separator='/'):
for k in data:
UnflattenHelper(res, k.split(separator), Unflatten(data[k], separator))
return res
+
+
+def HaveSameKeys(data_1, data_2):
+ if IsDictionary(data_1) and IsDictionary(data_2):
+ diff = set(data_1.keys()) ^ set(data_2.keys())
+ if diff:
+ return False
+ for k in data_1:
+ if not HaveSameKeys(data_1[k], data_2[k]):
+ return False
+ return True
+ elif type(data_1) == type(data_2):
+ return True
+ return False
diff --git a/tools/utils_stats.py b/tools/utils_stats.py
index b5c4b68..ec2a895 100644
--- a/tools/utils_stats.py
+++ b/tools/utils_stats.py
@@ -90,9 +90,6 @@ def ComputeStatsTests(list1, list2):
pass
return wilcoxon_p, ttest_p
-def GetSuiteName(benchmark):
- return benchmark.split("/", 2)[1]
-
def ComputeGeomeanHelper(data, res, current_key, compute_leaf_geomean):
if isinstance(data, dict) or isinstance(data, OrderedDict):
means = []
@@ -130,76 +127,22 @@ def ComputeAndPrintGeomeanWithRelativeDiff(data, key='OVERALL', compute_leaf_geo
res = list(map(lambda x: [x[0], x[1], GetRatio(x[2], x[1])], res))
utils_print.PrintTable(['', 'geomean', 'geomean error (%)'], res)
-# Print a table showing the difference between two runs of benchmarks.
-def PrintDiff(res_1, res_2, title = ''):
- # Only print results for benchmarks present in both sets of results.
- # Pay attention to maintain the order of the keys.
- benchmarks = [b for b in res_1.keys() if b in res_2.keys()]
- if not benchmarks: return
- headers = [title, 'mean1', 'stdev1 (% of mean1)', 'mean2',
- 'stdev2 (% of mean2)', '(mean2 - mean1) / mean1 * 100']
- results = []
- stats_dict = {}
- # collecting data
- for bench in benchmarks:
- suite_name = GetSuiteName(bench)
-
- if (suite_name not in stats_dict):
- stats_dict[suite_name] = {}
-
- stats_dict[suite_name][bench] = []
- data1 = m1, M1, median1, mad1, madp1, ave1, d1, dp1 = ComputeStats(res_1[bench])
- data2 = m2, M2, median2, mad2, madp2, ave2, d2, dp2 = ComputeStats(res_2[bench])
-
- stats_dict[suite_name][bench].append(data1)
- stats_dict[suite_name][bench].append(data2)
- diff = GetRelativeDiff(ave1, ave2)
- results.append([bench, ave1, dp1, ave2, dp2, diff])
-
- utils_print.PrintTable(headers, results)
-
- # overall and per suite geomeans calculations
- print("\nGEOMEANS:")
- mean_list1 = []
- mean_list2 = []
- stdev_list1 = []
- stdev_list2 = []
- headers = ['suite', 'geomean', 'error', 'error (% of geomean)']
- results = []
-
- for suite_name in stats_dict:
- suite_mean_list1 = []
- suite_mean_list2 = []
- suite_stdev_list1 = []
- suite_stdev_list2 = []
-
- for benchmark in stats_dict[suite_name]:
- bench_mean1 = stats_dict[suite_name][benchmark][0][5]
- bench_mean2 = stats_dict[suite_name][benchmark][1][5]
- bench_stdev1 = stats_dict[suite_name][benchmark][0][6]
- bench_stdev2 = stats_dict[suite_name][benchmark][1][6]
-
- suite_mean_list1.append(bench_mean1)
- suite_mean_list2.append(bench_mean2)
- suite_stdev_list1.append(bench_stdev1)
- suite_stdev_list2.append(bench_stdev2)
-
- mean_list1.append(bench_mean1)
- mean_list2.append(bench_mean2)
-
- stdev_list1.append(bench_stdev1)
- stdev_list2.append(bench_stdev2)
-
- suite_geomean = CalcGeomean(suite_mean_list2) / CalcGeomean(suite_mean_list1)
- suite_geomean_err = CalcGeomeanRelationError(suite_mean_list1, suite_mean_list2,
- suite_stdev_list1, suite_stdev_list2, suite_geomean)
- results.append([suite_name, suite_geomean, suite_geomean_err,
- GetRatio(suite_geomean_err, suite_geomean)])
-
- geomean = CalcGeomean(mean_list2) / CalcGeomean(mean_list1)
- geomean_err = CalcGeomeanRelationError(mean_list1, mean_list2,
- stdev_list1, stdev_list2, geomean)
-
- results.append(['OVERALL', geomean, geomean_err,
- GetRatio(geomean_err, geomean)])
- utils_print.PrintTable(headers, results)
+def ComputeAndPrintRelationGeomean(data_1, data_2):
+ if not data_1 or not data_2:
+ return
+ geomeans_1 = ComputeGeomean(data_1)
+ geomeans_2 = ComputeGeomean(data_2)
+ assert(len(geomeans_1) == len(geomeans_2))
+ res = []
+ for i in range(len(geomeans_1)):
+ g1 = geomeans_1[i]
+ g2 = geomeans_2[i]
+ assert(g1[0] == g2[0])
+ res.append([g1[0], # Name.
+ GetRatio(g1[1], g2[1]), # Diff.
+ GetRatio(g1[2], g1[1]), GetRatio(g2[2], g2[1]), # Errors.
+ g1[1], g2[1]]) # Values.
+
+ utils_print.PrintTable(['', 'geomean diff (%)',
+ 'geomean error 1 (%)', 'geomean error 2 (%)',
+ 'geomean 1', 'geomean 2',], res)