diff options
author | Alexandre Rames <alexandre.rames@linaro.org> | 2016-05-23 13:28:29 +0100 |
---|---|---|
committer | Alexandre Rames <alexandre.rames@linaro.org> | 2016-05-23 14:04:12 +0100 |
commit | 0af84bec28fe5c4dda123142b163c16e90336c22 (patch) | |
tree | ff50bcfbe57d63adc3385fbe2a5c859e0a184113 | |
parent | 2e38daa21106bd3b0c0ce828401c8e744694f7d8 (diff) | |
download | art-testing-0af84bec28fe5c4dda123142b163c16e90336c22.tar.gz |
Print geomean in `compare.py`.
Change-Id: I01dfa8fc6e33b2a771264a4e52f6b81378b86a1e
-rwxr-xr-x | compare.py | 6 | ||||
-rwxr-xr-x | test/test.py | 16 | ||||
-rwxr-xr-x | tools/benchmarks/compare.py | 10 | ||||
-rw-r--r-- | tools/utils.py | 14 | ||||
-rw-r--r-- | tools/utils_stats.py | 95 |
5 files changed, 42 insertions, 99 deletions
@@ -117,3 +117,9 @@ if __name__ == "__main__": args.ttest_p_threshold) PrintDiff(res_1, res_2, print_extended=args.print_extended) + if utils.HaveSameKeys(res_1, res_2): + utils_stats.ComputeAndPrintRelationGeomean(utils.Unflatten(res_1), + utils.Unflatten(res_2)) + else: + utils.Info("Not comparing the geomeans because the two result sets " + "have different keys.") diff --git a/test/test.py b/test/test.py index 93e460d..5e24eaf 100755 --- a/test/test.py +++ b/test/test.py @@ -107,21 +107,6 @@ def TestBenchmarksOnTarget(target): return TestBenchmarksCommon(target) -def TestBenchmarksCompareScript(): - rc = 0 - run_py = os.path.join(".", "tools", "benchmarks", "run.py") - compare_py = os.path.join(".", "tools", "benchmarks", "compare.py") - benchmarks_filter = ["--filter", "benchmarks/algorithm/*"] - rc |= TestCommand([run_py, "--output-json=/tmp/res1"] + benchmarks_filter, _cwd=utils.dir_root) - rc |= TestCommand([run_py, "--output-json=/tmp/res2"] + benchmarks_filter, _cwd=utils.dir_root) - rc |= TestCommand([compare_py, "/tmp/res1", "/tmp/res2"], _cwd=utils.dir_root) - rc |= TestCommand([compare_py, "--significant-changes", "/tmp/res1", "/tmp/res2"], _cwd=utils.dir_root) - rc |= TestCommand([compare_py, "--order-by-diff", "/tmp/res1", "/tmp/res2"], _cwd=utils.dir_root) - rc |= TestCommand([compare_py, "--filter", "benchmarks/algorithm/Crypto*", "/tmp/res1", "/tmp/res2"], _cwd=utils.dir_root) - rc |= TestCommand([compare_py, "--filter-out", "benchmarks/algorithm/Crypto*", "/tmp/res1", "/tmp/res2"], _cwd=utils.dir_root) - return rc - - def TestBenchmarkPackages(): benchmark_files = [] # TODO: Automatically test that each benchmark has the correct package. @@ -180,7 +165,6 @@ if __name__ == "__main__": rc = 0 if not args.no_host_tests: rc |= TestBenchmarksOnHost() - rc |= TestBenchmarksCompareScript() rc |= TestBenchmarkPackages() rc |= TestLint(args.jobs) rc |= TestTopLevelWrapperScripts() diff --git a/tools/benchmarks/compare.py b/tools/benchmarks/compare.py index 9aebb3f..8df2f68 100755 --- a/tools/benchmarks/compare.py +++ b/tools/benchmarks/compare.py @@ -119,13 +119,9 @@ if __name__ == "__main__": FilterSignificantChanges(res_1, res_2, args.significant_diff_threshold, args.significant_deviation_threshold) - if args.order_by_diff: - regressions, improvements = OrderResultsByDifference(res_1, res_2) - utils_stats.PrintDiff(regressions[0], regressions[1], "REGRESSIONS") - print("") - utils_stats.PrintDiff(improvements[0], improvements[1], "IMPROVEMENTS") - else: - utils_stats.PrintDiff(res_1, res_2) + + utils.Error('This script is deprecated. Use the top-level `compare.py` ' + 'script instead.') file_1.close() file_2.close() diff --git a/tools/utils.py b/tools/utils.py index c8a26cf..6eb5f71 100644 --- a/tools/utils.py +++ b/tools/utils.py @@ -404,3 +404,17 @@ def Unflatten(data, separator='/'): for k in data: UnflattenHelper(res, k.split(separator), Unflatten(data[k], separator)) return res + + +def HaveSameKeys(data_1, data_2): + if IsDictionary(data_1) and IsDictionary(data_2): + diff = set(data_1.keys()) ^ set(data_2.keys()) + if diff: + return False + for k in data_1: + if not HaveSameKeys(data_1[k], data_2[k]): + return False + return True + elif type(data_1) == type(data_2): + return True + return False diff --git a/tools/utils_stats.py b/tools/utils_stats.py index b5c4b68..ec2a895 100644 --- a/tools/utils_stats.py +++ b/tools/utils_stats.py @@ -90,9 +90,6 @@ def ComputeStatsTests(list1, list2): pass return wilcoxon_p, ttest_p -def GetSuiteName(benchmark): - return benchmark.split("/", 2)[1] - def ComputeGeomeanHelper(data, res, current_key, compute_leaf_geomean): if isinstance(data, dict) or isinstance(data, OrderedDict): means = [] @@ -130,76 +127,22 @@ def ComputeAndPrintGeomeanWithRelativeDiff(data, key='OVERALL', compute_leaf_geo res = list(map(lambda x: [x[0], x[1], GetRatio(x[2], x[1])], res)) utils_print.PrintTable(['', 'geomean', 'geomean error (%)'], res) -# Print a table showing the difference between two runs of benchmarks. -def PrintDiff(res_1, res_2, title = ''): - # Only print results for benchmarks present in both sets of results. - # Pay attention to maintain the order of the keys. - benchmarks = [b for b in res_1.keys() if b in res_2.keys()] - if not benchmarks: return - headers = [title, 'mean1', 'stdev1 (% of mean1)', 'mean2', - 'stdev2 (% of mean2)', '(mean2 - mean1) / mean1 * 100'] - results = [] - stats_dict = {} - # collecting data - for bench in benchmarks: - suite_name = GetSuiteName(bench) - - if (suite_name not in stats_dict): - stats_dict[suite_name] = {} - - stats_dict[suite_name][bench] = [] - data1 = m1, M1, median1, mad1, madp1, ave1, d1, dp1 = ComputeStats(res_1[bench]) - data2 = m2, M2, median2, mad2, madp2, ave2, d2, dp2 = ComputeStats(res_2[bench]) - - stats_dict[suite_name][bench].append(data1) - stats_dict[suite_name][bench].append(data2) - diff = GetRelativeDiff(ave1, ave2) - results.append([bench, ave1, dp1, ave2, dp2, diff]) - - utils_print.PrintTable(headers, results) - - # overall and per suite geomeans calculations - print("\nGEOMEANS:") - mean_list1 = [] - mean_list2 = [] - stdev_list1 = [] - stdev_list2 = [] - headers = ['suite', 'geomean', 'error', 'error (% of geomean)'] - results = [] - - for suite_name in stats_dict: - suite_mean_list1 = [] - suite_mean_list2 = [] - suite_stdev_list1 = [] - suite_stdev_list2 = [] - - for benchmark in stats_dict[suite_name]: - bench_mean1 = stats_dict[suite_name][benchmark][0][5] - bench_mean2 = stats_dict[suite_name][benchmark][1][5] - bench_stdev1 = stats_dict[suite_name][benchmark][0][6] - bench_stdev2 = stats_dict[suite_name][benchmark][1][6] - - suite_mean_list1.append(bench_mean1) - suite_mean_list2.append(bench_mean2) - suite_stdev_list1.append(bench_stdev1) - suite_stdev_list2.append(bench_stdev2) - - mean_list1.append(bench_mean1) - mean_list2.append(bench_mean2) - - stdev_list1.append(bench_stdev1) - stdev_list2.append(bench_stdev2) - - suite_geomean = CalcGeomean(suite_mean_list2) / CalcGeomean(suite_mean_list1) - suite_geomean_err = CalcGeomeanRelationError(suite_mean_list1, suite_mean_list2, - suite_stdev_list1, suite_stdev_list2, suite_geomean) - results.append([suite_name, suite_geomean, suite_geomean_err, - GetRatio(suite_geomean_err, suite_geomean)]) - - geomean = CalcGeomean(mean_list2) / CalcGeomean(mean_list1) - geomean_err = CalcGeomeanRelationError(mean_list1, mean_list2, - stdev_list1, stdev_list2, geomean) - - results.append(['OVERALL', geomean, geomean_err, - GetRatio(geomean_err, geomean)]) - utils_print.PrintTable(headers, results) +def ComputeAndPrintRelationGeomean(data_1, data_2): + if not data_1 or not data_2: + return + geomeans_1 = ComputeGeomean(data_1) + geomeans_2 = ComputeGeomean(data_2) + assert(len(geomeans_1) == len(geomeans_2)) + res = [] + for i in range(len(geomeans_1)): + g1 = geomeans_1[i] + g2 = geomeans_2[i] + assert(g1[0] == g2[0]) + res.append([g1[0], # Name. + GetRatio(g1[1], g2[1]), # Diff. + GetRatio(g1[2], g1[1]), GetRatio(g2[2], g2[1]), # Errors. + g1[1], g2[1]]) # Values. + + utils_print.PrintTable(['', 'geomean diff (%)', + 'geomean error 1 (%)', 'geomean error 2 (%)', + 'geomean 1', 'geomean 2',], res) |