diff options
author | Eric <eric@efcs.ca> | 2016-08-09 12:33:57 -0600 |
---|---|---|
committer | GitHub <noreply@github.com> | 2016-08-09 12:33:57 -0600 |
commit | 5eac66249ce28f6baae80a2565d8d53e1a3f3945 (patch) | |
tree | 351eeb39761672e8830cbb916871793aa44d8dae /tools | |
parent | de4ead7a53a3ee1128e0d86150fc8ef598d7ffbd (diff) | |
download | google-benchmark-5eac66249ce28f6baae80a2565d8d53e1a3f3945.tar.gz |
Add a "compare_bench.py" tooling script. (#266)
This patch adds the compare_bench.py utility which can be used to compare the result of benchmarks.
The program is invoked like:
$ compare_bench.py <old-benchmark> <new-benchmark> [benchmark options]...
Where <old-benchmark> and <new-benchmark> either specify a benchmark executable file, or a JSON output file. The type of the input file is automatically detected. If a benchmark executable is specified then the benchmark is run to obtain the results. Otherwise the results are simply loaded from the output file.
Diffstat (limited to 'tools')
-rwxr-xr-x | tools/compare_bench.py | 30 | ||||
-rw-r--r-- | tools/gbench/Inputs/test1_run1.json | 46 | ||||
-rw-r--r-- | tools/gbench/Inputs/test1_run2.json | 46 | ||||
-rw-r--r-- | tools/gbench/__init__.py | 8 | ||||
-rw-r--r-- | tools/gbench/report.py | 136 | ||||
-rw-r--r-- | tools/gbench/util.py | 130 |
6 files changed, 396 insertions, 0 deletions
diff --git a/tools/compare_bench.py b/tools/compare_bench.py new file mode 100755 index 0000000..ed0f133 --- /dev/null +++ b/tools/compare_bench.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python +""" +compare_bench.py - Compare two benchmarks or their results and report the + difference. +""" +import sys +import gbench +from gbench import util, report + +def main(): + # Parse the command line flags + def usage(): + print('compare_bench.py <test1> <test2> [benchmark options]...') + exit(1) + if '--help' in sys.argv or len(sys.argv) < 3: + usage() + tests = sys.argv[1:3] + bench_opts = sys.argv[3:] + bench_opts = list(bench_opts) + # Run the benchmarks and report the results + json1 = gbench.util.run_or_load_benchmark(tests[0], bench_opts) + json2 = gbench.util.run_or_load_benchmark(tests[1], bench_opts) + output_lines = gbench.report.generate_difference_report(json1, json2) + print 'Comparing %s to %s' % (tests[0], tests[1]) + for ln in output_lines: + print(ln) + + +if __name__ == '__main__': + main() diff --git a/tools/gbench/Inputs/test1_run1.json b/tools/gbench/Inputs/test1_run1.json new file mode 100644 index 0000000..da9425e --- /dev/null +++ b/tools/gbench/Inputs/test1_run1.json @@ -0,0 +1,46 @@ +{ + "context": { + "date": "2016-08-02 17:44:46", + "num_cpus": 4, + "mhz_per_cpu": 4228, + "cpu_scaling_enabled": false, + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "BM_SameTimes", + "iterations": 1000, + "real_time": 10, + "cpu_time": 10, + "time_unit": "ns" + }, + { + "name": "BM_2xFaster", + "iterations": 1000, + "real_time": 50, + "cpu_time": 50, + "time_unit": "ns" + }, + { + "name": "BM_2xSlower", + "iterations": 1000, + "real_time": 50, + "cpu_time": 50, + "time_unit": "ns" + }, + { + "name": "BM_10PercentFaster", + "iterations": 1000, + "real_time": 100, + "cpu_time": 100, + "time_unit": "ns" + }, + { + "name": "BM_10PercentSlower", + "iterations": 1000, + "real_time": 100, + "cpu_time": 100, + "time_unit": "ns" + } + ] +}
\ No newline at end of file diff --git a/tools/gbench/Inputs/test1_run2.json b/tools/gbench/Inputs/test1_run2.json new file mode 100644 index 0000000..d8bc72d --- /dev/null +++ b/tools/gbench/Inputs/test1_run2.json @@ -0,0 +1,46 @@ +{ + "context": { + "date": "2016-08-02 17:44:46", + "num_cpus": 4, + "mhz_per_cpu": 4228, + "cpu_scaling_enabled": false, + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "BM_SameTimes", + "iterations": 1000, + "real_time": 10, + "cpu_time": 10, + "time_unit": "ns" + }, + { + "name": "BM_2xFaster", + "iterations": 1000, + "real_time": 25, + "cpu_time": 25, + "time_unit": "ns" + }, + { + "name": "BM_2xSlower", + "iterations": 20833333, + "real_time": 100, + "cpu_time": 100, + "time_unit": "ns" + }, + { + "name": "BM_10PercentFaster", + "iterations": 1000, + "real_time": 90, + "cpu_time": 90, + "time_unit": "ns" + }, + { + "name": "BM_10PercentSlower", + "iterations": 1000, + "real_time": 110, + "cpu_time": 110, + "time_unit": "ns" + } + ] +}
\ No newline at end of file diff --git a/tools/gbench/__init__.py b/tools/gbench/__init__.py new file mode 100644 index 0000000..fce1a1a --- /dev/null +++ b/tools/gbench/__init__.py @@ -0,0 +1,8 @@ +"""Google Benchmark tooling""" + +__author__ = 'Eric Fiselier' +__email__ = 'eric@efcs.ca' +__versioninfo__ = (0, 5, 0) +__version__ = '.'.join(str(v) for v in __versioninfo__) + 'dev' + +__all__ = [] diff --git a/tools/gbench/report.py b/tools/gbench/report.py new file mode 100644 index 0000000..44fa4a5 --- /dev/null +++ b/tools/gbench/report.py @@ -0,0 +1,136 @@ +"""report.py - Utilities for reporting statistics about benchmark results +""" +import os + +class BenchmarkColor(object): + def __init__(self, name, code): + self.name = name + self.code = code + + def __repr__(self): + return '%s%r' % (self.__class__.__name__, + (self.name, self.code)) + + def __format__(self, format): + return self.code + +# Benchmark Colors Enumeration +BC_NONE = BenchmarkColor('NONE', '') +BC_MAGENTA = BenchmarkColor('MAGENTA', '\033[95m') +BC_CYAN = BenchmarkColor('CYAN', '\033[96m') +BC_OKBLUE = BenchmarkColor('OKBLUE', '\033[94m') +BC_HEADER = BenchmarkColor('HEADER', '\033[92m') +BC_WARNING = BenchmarkColor('WARNING', '\033[93m') +BC_WHITE = BenchmarkColor('WHITE', '\033[97m') +BC_FAIL = BenchmarkColor('FAIL', '\033[91m') +BC_ENDC = BenchmarkColor('ENDC', '\033[0m') +BC_BOLD = BenchmarkColor('BOLD', '\033[1m') +BC_UNDERLINE = BenchmarkColor('UNDERLINE', '\033[4m') + +def color_format(use_color, fmt_str, *args, **kwargs): + """ + Return the result of 'fmt_str.format(*args, **kwargs)' after transforming + 'args' and 'kwargs' according to the value of 'use_color'. If 'use_color' + is False then all color codes in 'args' and 'kwargs' are replaced with + the empty string. + """ + assert use_color is True or use_color is False + if not use_color: + args = [arg if not isinstance(arg, BenchmarkColor) else BC_NONE + for arg in args] + kwargs = {key: arg if not isinstance(arg, BenchmarkColor) else BC_NONE + for key, arg in kwargs.items()} + return fmt_str.format(*args, **kwargs) + + +def find_longest_name(benchmark_list): + """ + Return the length of the longest benchmark name in a given list of + benchmark JSON objects + """ + longest_name = 1 + for bc in benchmark_list: + if len(bc['name']) > longest_name: + longest_name = len(bc['name']) + return longest_name + + +def calculate_change(old_val, new_val): + """ + Return a float representing the decimal change between old_val and new_val. + """ + return float(new_val - old_val) / abs(old_val) + + +def generate_difference_report(json1, json2, use_color=True): + """ + Calculate and report the difference between each test of two benchmarks + runs specified as 'json1' and 'json2'. + """ + first_col_width = find_longest_name(json1['benchmarks']) + 5 + def find_test(name): + for b in json2['benchmarks']: + if b['name'] == name: + return b + return None + first_line = "{:<{}s} Time CPU".format( + 'Benchmark', first_col_width) + output_strs = [first_line, '-' * len(first_line)] + for bn in json1['benchmarks']: + other_bench = find_test(bn['name']) + if not other_bench: + continue + + def get_color(res): + if res > 0.05: + return BC_FAIL + elif res > -0.07: + return BC_WHITE + else: + return BC_CYAN + fmt_str = "{}{:<{}s}{endc} {}{:+.2f}{endc} {}{:+.2f}{endc}" + tres = calculate_change(bn['real_time'], other_bench['real_time']) + cpures = calculate_change(bn['cpu_time'], other_bench['cpu_time']) + output_strs += [color_format(use_color, fmt_str, + BC_HEADER, bn['name'], first_col_width, + get_color(tres), tres, get_color(cpures), cpures, + endc=BC_ENDC)] + return output_strs + +############################################################################### +# Unit tests + +import unittest + +class TestReportDifference(unittest.TestCase): + def load_results(self): + import json + testInputs = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'Inputs') + testOutput1 = os.path.join(testInputs, 'test1_run1.json') + testOutput2 = os.path.join(testInputs, 'test1_run2.json') + with open(testOutput1, 'r') as f: + json1 = json.load(f) + with open(testOutput2, 'r') as f: + json2 = json.load(f) + return json1, json2 + + def test_basic(self): + expect_lines = [ + ['BM_SameTimes', '+0.00', '+0.00'], + ['BM_2xFaster', '-0.50', '-0.50'], + ['BM_2xSlower', '+1.00', '+1.00'], + ['BM_10PercentFaster', '-0.10', '-0.10'], + ['BM_10PercentSlower', '+0.10', '+0.10'] + ] + json1, json2 = self.load_results() + output_lines = generate_difference_report(json1, json2, use_color=False) + print output_lines + self.assertEqual(len(output_lines), len(expect_lines)) + for i in xrange(0, len(output_lines)): + parts = [x for x in output_lines[i].split(' ') if x] + self.assertEqual(len(parts), 3) + self.assertEqual(parts, expect_lines[i]) + + +if __name__ == '__main__': + unittest.main() diff --git a/tools/gbench/util.py b/tools/gbench/util.py new file mode 100644 index 0000000..169b71c --- /dev/null +++ b/tools/gbench/util.py @@ -0,0 +1,130 @@ +"""util.py - General utilities for running, loading, and processing benchmarks +""" +import json +import os +import tempfile +import subprocess +import sys + +# Input file type enumeration +IT_Invalid = 0 +IT_JSON = 1 +IT_Executable = 2 + +_num_magic_bytes = 2 if sys.platform.startswith('win') else 4 +def is_executable_file(filename): + """ + Return 'True' if 'filename' names a valid file which is likely + an executable. A file is considered an executable if it starts with the + magic bytes for a EXE, Mach O, or ELF file. + """ + if not os.path.isfile(filename): + return False + with open(filename, 'r') as f: + magic_bytes = f.read(_num_magic_bytes) + if sys.platform == 'darwin': + return magic_bytes in [ + '\xfe\xed\xfa\xce', # MH_MAGIC + '\xce\xfa\xed\xfe', # MH_CIGAM + '\xfe\xed\xfa\xcf', # MH_MAGIC_64 + '\xcf\xfa\xed\xfe', # MH_CIGAM_64 + '\xca\xfe\xba\xbe', # FAT_MAGIC + '\xbe\xba\xfe\xca' # FAT_CIGAM + ] + elif sys.platform.startswith('win'): + return magic_bytes == 'MZ' + else: + return magic_bytes == '\x7FELF' + + +def is_json_file(filename): + """ + Returns 'True' if 'filename' names a valid JSON output file. + 'False' otherwise. + """ + try: + with open(filename, 'r') as f: + json.load(f) + return True + except: + pass + return False + + +def classify_input_file(filename): + """ + Return a tuple (type, msg) where 'type' specifies the classified type + of 'filename'. If 'type' is 'IT_Invalid' then 'msg' is a human readable + string represeting the error. + """ + ftype = IT_Invalid + err_msg = None + if not os.path.exists(filename): + err_msg = "'%s' does not exist" % filename + elif not os.path.isfile(filename): + err_msg = "'%s' does not name a file" % filename + elif is_executable_file(filename): + ftype = IT_Executable + elif is_json_file(filename): + ftype = IT_JSON + else: + err_msg = "'%s' does not name a valid benchmark executable or JSON file" + return ftype, err_msg + + +def check_input_file(filename): + """ + Classify the file named by 'filename' and return the classification. + If the file is classified as 'IT_Invalid' print an error message and exit + the program. + """ + ftype, msg = classify_input_file(filename) + if ftype == IT_Invalid: + print "Invalid input file: %s" % msg + sys.exit(1) + return ftype + + +def load_benchmark_results(fname): + """ + Read benchmark output from a file and return the JSON object. + REQUIRES: 'fname' names a file containing JSON benchmark output. + """ + with open(fname, 'r') as f: + return json.load(f) + + +def run_benchmark(exe_name, benchmark_flags): + """ + Run a benchmark specified by 'exe_name' with the specified + 'benchmark_flags'. The benchmark is run directly as a subprocess to preserve + real time console output. + RETURNS: A JSON object representing the benchmark output + """ + thandle, tname = tempfile.mkstemp() + os.close(thandle) + cmd = [exe_name] + benchmark_flags + print("RUNNING: %s" % ' '.join(cmd)) + exitCode = subprocess.call(cmd + ['--benchmark_out=%s' % tname]) + if exitCode != 0: + print('TEST FAILED...') + sys.exit(exitCode) + json_res = load_benchmark_results(tname) + os.unlink(tname) + return json_res + + +def run_or_load_benchmark(filename, benchmark_flags): + """ + Get the results for a specified benchmark. If 'filename' specifies + an executable benchmark then the results are generated by running the + benchmark. Otherwise 'filename' must name a valid JSON output file, + which is loaded and the result returned. + """ + ftype = check_input_file(filename) + if ftype == IT_JSON: + return load_benchmark_results(filename) + elif ftype == IT_Executable: + return run_benchmark(filename, benchmark_flags) + else: + assert False # This branch is unreachable
\ No newline at end of file |