Print geomean in `compare.py`.

Change-Id: I01dfa8fc6e33b2a771264a4e52f6b81378b86a1e
author: Alexandre Rames <alexandre.rames@linaro.org> 2016-05-23 13:28:29 +0100
committer: Alexandre Rames <alexandre.rames@linaro.org> 2016-05-23 14:04:12 +0100
commit: 0af84bec28fe5c4dda123142b163c16e90336c22 (patch)
tree: ff50bcfbe57d63adc3385fbe2a5c859e0a184113
parent: 2e38daa21106bd3b0c0ce828401c8e744694f7d8 (diff)
download: art-testing-0af84bec28fe5c4dda123142b163c16e90336c22.tar.gz
5 files changed, 42 insertions, 99 deletions
diff --git a/compare.py b/compare.py
index d6a1f65..7ef36a9 100755
--- a/compare.py
+++ b/compare.py
@@ -117,3 +117,9 @@ if __name__ == "__main__":
                                      args.ttest_p_threshold)
 
     PrintDiff(res_1, res_2, print_extended=args.print_extended)
+    if utils.HaveSameKeys(res_1, res_2):
+        utils_stats.ComputeAndPrintRelationGeomean(utils.Unflatten(res_1),
+                                                   utils.Unflatten(res_2))
+    else:
+        utils.Info("Not comparing the geomeans because the two result sets "
+                   "have different keys.")
diff --git a/test/test.py b/test/test.py
index 93e460d..5e24eaf 100755
--- a/test/test.py
+++ b/test/test.py
@@ -107,21 +107,6 @@ def TestBenchmarksOnTarget(target):
     return TestBenchmarksCommon(target)
 
 
-def TestBenchmarksCompareScript():
-    rc = 0
-    run_py = os.path.join(".", "tools", "benchmarks", "run.py")
-    compare_py = os.path.join(".", "tools", "benchmarks", "compare.py")
-    benchmarks_filter = ["--filter", "benchmarks/algorithm/*"]
-    rc |= TestCommand([run_py, "--output-json=/tmp/res1"] + benchmarks_filter, _cwd=utils.dir_root)
-    rc |= TestCommand([run_py, "--output-json=/tmp/res2"] + benchmarks_filter, _cwd=utils.dir_root)
-    rc |= TestCommand([compare_py, "/tmp/res1", "/tmp/res2"], _cwd=utils.dir_root)
-    rc |= TestCommand([compare_py, "--significant-changes", "/tmp/res1", "/tmp/res2"], _cwd=utils.dir_root)
-    rc |= TestCommand([compare_py, "--order-by-diff", "/tmp/res1", "/tmp/res2"], _cwd=utils.dir_root)
-    rc |= TestCommand([compare_py, "--filter", "benchmarks/algorithm/Crypto*", "/tmp/res1", "/tmp/res2"], _cwd=utils.dir_root)
-    rc |= TestCommand([compare_py, "--filter-out", "benchmarks/algorithm/Crypto*", "/tmp/res1", "/tmp/res2"], _cwd=utils.dir_root)
-    return rc
-
-
 def TestBenchmarkPackages():
     benchmark_files = []
     # TODO: Automatically test that each benchmark has the correct package.
@@ -180,7 +165,6 @@ if __name__ == "__main__":
     rc = 0
     if not args.no_host_tests:
         rc |= TestBenchmarksOnHost()
-        rc |= TestBenchmarksCompareScript()
         rc |= TestBenchmarkPackages()
         rc |= TestLint(args.jobs)
         rc |= TestTopLevelWrapperScripts()
diff --git a/tools/benchmarks/compare.py b/tools/benchmarks/compare.py
index 9aebb3f..8df2f68 100755
--- a/tools/benchmarks/compare.py
+++ b/tools/benchmarks/compare.py
@@ -119,13 +119,9 @@ if __name__ == "__main__":
             FilterSignificantChanges(res_1, res_2,
                                      args.significant_diff_threshold,
                                      args.significant_deviation_threshold)
-    if args.order_by_diff:
-        regressions, improvements = OrderResultsByDifference(res_1, res_2)
-        utils_stats.PrintDiff(regressions[0], regressions[1], "REGRESSIONS")
-        print("")
-        utils_stats.PrintDiff(improvements[0], improvements[1], "IMPROVEMENTS")
-    else:
-        utils_stats.PrintDiff(res_1, res_2)
+
+    utils.Error('This script is deprecated. Use the top-level `compare.py` '
+                'script instead.')
 
     file_1.close()
     file_2.close()
diff --git a/tools/utils.py b/tools/utils.py
index c8a26cf..6eb5f71 100644
--- a/tools/utils.py
+++ b/tools/utils.py
@@ -404,3 +404,17 @@ def Unflatten(data, separator='/'):
     for k in data:
         UnflattenHelper(res, k.split(separator), Unflatten(data[k], separator))
     return res
+
+
+def HaveSameKeys(data_1, data_2):
+    if IsDictionary(data_1) and IsDictionary(data_2):
+        diff = set(data_1.keys()) ^ set(data_2.keys())
+        if diff:
+            return False
+        for k in data_1:
+            if not HaveSameKeys(data_1[k], data_2[k]):
+                return False
+        return True
+    elif type(data_1) == type(data_2):
+        return True
+    return False
diff --git a/tools/utils_stats.py b/tools/utils_stats.py
index b5c4b68..ec2a895 100644
--- a/tools/utils_stats.py
+++ b/tools/utils_stats.py
@@ -90,9 +90,6 @@ def ComputeStatsTests(list1, list2):
         pass
     return wilcoxon_p, ttest_p
 
-def GetSuiteName(benchmark):
-    return benchmark.split("/", 2)[1]
-
 def ComputeGeomeanHelper(data, res, current_key, compute_leaf_geomean):
     if isinstance(data, dict) or isinstance(data, OrderedDict):
         means = []
@@ -130,76 +127,22 @@ def ComputeAndPrintGeomeanWithRelativeDiff(data, key='OVERALL', compute_leaf_geo
     res = list(map(lambda x: [x[0], x[1], GetRatio(x[2], x[1])], res))
     utils_print.PrintTable(['', 'geomean', 'geomean error (%)'], res)
 
-# Print a table showing the difference between two runs of benchmarks.
-def PrintDiff(res_1, res_2, title = ''):
-    # Only print results for benchmarks present in both sets of results.
-    # Pay attention to maintain the order of the keys.
-    benchmarks = [b for b in res_1.keys() if b in res_2.keys()]
-    if not benchmarks: return
-    headers = [title, 'mean1', 'stdev1 (% of mean1)', 'mean2',
-               'stdev2 (% of mean2)', '(mean2 - mean1) / mean1 * 100']
-    results = []
-    stats_dict = {}
-    # collecting data
-    for bench in benchmarks:
-        suite_name = GetSuiteName(bench)
-
-        if (suite_name not in stats_dict):
-             stats_dict[suite_name] = {}
-
-        stats_dict[suite_name][bench] = []
-        data1 = m1, M1, median1, mad1, madp1, ave1, d1, dp1 = ComputeStats(res_1[bench])
-        data2 = m2, M2, median2, mad2, madp2, ave2, d2, dp2 = ComputeStats(res_2[bench])
-
-        stats_dict[suite_name][bench].append(data1)
-        stats_dict[suite_name][bench].append(data2)
-        diff = GetRelativeDiff(ave1, ave2)
-        results.append([bench, ave1, dp1, ave2, dp2, diff])
-
-    utils_print.PrintTable(headers, results)
-
-    # overall and per suite geomeans calculations
-    print("\nGEOMEANS:")
-    mean_list1  = []
-    mean_list2  = []
-    stdev_list1 = []
-    stdev_list2 = []
-    headers = ['suite', 'geomean', 'error', 'error (% of geomean)']
-    results = []
-
-    for suite_name in stats_dict:
-        suite_mean_list1  = []
-        suite_mean_list2  = []
-        suite_stdev_list1 = []
-        suite_stdev_list2 = []
-
-        for benchmark in stats_dict[suite_name]:
-            bench_mean1  = stats_dict[suite_name][benchmark][0][5]
-            bench_mean2  = stats_dict[suite_name][benchmark][1][5]
-            bench_stdev1 = stats_dict[suite_name][benchmark][0][6]
-            bench_stdev2 = stats_dict[suite_name][benchmark][1][6]
-
-            suite_mean_list1.append(bench_mean1)
-            suite_mean_list2.append(bench_mean2)
-            suite_stdev_list1.append(bench_stdev1)
-            suite_stdev_list2.append(bench_stdev2)
-
-            mean_list1.append(bench_mean1)
-            mean_list2.append(bench_mean2)
-
-            stdev_list1.append(bench_stdev1)
-            stdev_list2.append(bench_stdev2)
-
-        suite_geomean = CalcGeomean(suite_mean_list2) / CalcGeomean(suite_mean_list1)
-        suite_geomean_err = CalcGeomeanRelationError(suite_mean_list1, suite_mean_list2,
-                suite_stdev_list1, suite_stdev_list2, suite_geomean)
-        results.append([suite_name, suite_geomean, suite_geomean_err,
-                GetRatio(suite_geomean_err, suite_geomean)])
-
-    geomean     = CalcGeomean(mean_list2) / CalcGeomean(mean_list1)
-    geomean_err = CalcGeomeanRelationError(mean_list1, mean_list2,
-            stdev_list1, stdev_list2, geomean)
-
-    results.append(['OVERALL', geomean, geomean_err,
-                    GetRatio(geomean_err, geomean)])
-    utils_print.PrintTable(headers, results)
+def ComputeAndPrintRelationGeomean(data_1, data_2):
+    if not data_1 or not data_2:
+        return
+    geomeans_1 = ComputeGeomean(data_1)
+    geomeans_2 = ComputeGeomean(data_2)
+    assert(len(geomeans_1) == len(geomeans_2))
+    res = []
+    for i in range(len(geomeans_1)):
+        g1 = geomeans_1[i]
+        g2 = geomeans_2[i]
+        assert(g1[0] == g2[0])
+        res.append([g1[0],                                          # Name.
+                    GetRatio(g1[1], g2[1]),                         # Diff.
+                    GetRatio(g1[2], g1[1]), GetRatio(g2[2], g2[1]), # Errors.
+                    g1[1], g2[1]])                                  # Values.
+
+    utils_print.PrintTable(['', 'geomean diff (%)',
+                            'geomean error 1 (%)', 'geomean error 2 (%)',
+                            'geomean 1', 'geomean 2',], res)
author	Alexandre Rames <alexandre.rames@linaro.org>	2016-05-23 13:28:29 +0100
committer	Alexandre Rames <alexandre.rames@linaro.org>	2016-05-23 14:04:12 +0100
commit	0af84bec28fe5c4dda123142b163c16e90336c22 (patch)
tree	ff50bcfbe57d63adc3385fbe2a5c859e0a184113
parent	2e38daa21106bd3b0c0ce828401c8e744694f7d8 (diff)
download	art-testing-0af84bec28fe5c4dda123142b163c16e90336c22.tar.gz