crosperf/generate_report.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300

#!/usr/bin/env python2
#
# Copyright 2016 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Given a specially-formatted JSON object, generates results report(s).

The JSON object should look like:
{"data": BenchmarkData, "platforms": BenchmarkPlatforms}

BenchmarkPlatforms is a [str], each of which names a platform the benchmark
  was run on (e.g. peppy, shamu, ...). Note that the order of this list is
  related with the order of items in BenchmarkData.

BenchmarkData is a {str: [PlatformData]}. The str is the name of the benchmark,
and a PlatformData is a set of data for a given platform. There must be one
PlatformData for each benchmark, for each element in BenchmarkPlatforms.

A PlatformData is a [{str: float}], where each str names a metric we recorded,
and the float is the value for that metric. Each element is considered to be
the metrics collected from an independent run of this benchmark. NOTE: Each
PlatformData is expected to have a "retval" key, with the return value of
the benchmark. If the benchmark is successful, said return value should be 0.
Otherwise, this will break some of our JSON functionality.

Putting it all together, a JSON object will end up looking like:
  { "platforms": ["peppy", "peppy-new-crosstool"],
    "data": {
      "bench_draw_line": [
        [{"time (ms)": 1.321, "memory (mb)": 128.1, "retval": 0},
         {"time (ms)": 1.920, "memory (mb)": 128.4, "retval": 0}],
        [{"time (ms)": 1.221, "memory (mb)": 124.3, "retval": 0},
         {"time (ms)": 1.423, "memory (mb)": 123.9, "retval": 0}]
      ]
    }
  }

Which says that we ran a benchmark on platforms named peppy, and
  peppy-new-crosstool.
We ran one benchmark, named bench_draw_line.
It was run twice on each platform.
Peppy's runs took 1.321ms and 1.920ms, while peppy-new-crosstool's took 1.221ms
  and 1.423ms. None of the runs failed to complete.
"""

from __future__ import division
from __future__ import print_function

import argparse
import functools
import json
import os
import sys
import traceback

from results_report import BenchmarkResults
from results_report import HTMLResultsReport
from results_report import JSONResultsReport
from results_report import TextResultsReport


def CountBenchmarks(benchmark_runs):
  """Counts the number of iterations for each benchmark in benchmark_runs."""

  # Example input for benchmark_runs:
  # {"bench": [[run1, run2, run3], [run1, run2, run3, run4]]}
  def _MaxLen(results):
    return 0 if not results else max(len(r) for r in results)

  return [(name, _MaxLen(results))
          for name, results in benchmark_runs.iteritems()]


def CutResultsInPlace(results, max_keys=50, complain_on_update=True):
  """Limits the given benchmark results to max_keys keys in-place.

  This takes the `data` field from the benchmark input, and mutates each
  benchmark run to contain `max_keys` elements (ignoring special elements, like
  "retval"). At the moment, it just selects the first `max_keys` keyvals,
  alphabetically.

  If complain_on_update is true, this will print a message noting that a
  truncation occurred.

  This returns the `results` object that was passed in, for convenience.

  e.g.
  >>> benchmark_data = {
  ...   "bench_draw_line": [
  ...     [{"time (ms)": 1.321, "memory (mb)": 128.1, "retval": 0},
  ...      {"time (ms)": 1.920, "memory (mb)": 128.4, "retval": 0}],
  ...     [{"time (ms)": 1.221, "memory (mb)": 124.3, "retval": 0},
  ...      {"time (ms)": 1.423, "memory (mb)": 123.9, "retval": 0}]
  ...   ]
  ... }
  >>> CutResultsInPlace(benchmark_data, max_keys=1, complain_on_update=False)
  {
    'bench_draw_line': [
      [{'memory (mb)': 128.1, 'retval': 0},
       {'memory (mb)': 128.4, 'retval': 0}],
      [{'memory (mb)': 124.3, 'retval': 0},
       {'memory (mb)': 123.9, 'retval': 0}]
    ]
  }
  """
  actually_updated = False
  for bench_results in results.itervalues():
    for platform_results in bench_results:
      for i, result in enumerate(platform_results):
        # Keep the keys that come earliest when sorted alphabetically.
        # Forcing alphabetical order is arbitrary, but necessary; otherwise,
        # the keyvals we'd emit would depend on our iteration order through a
        # map.
        removable_keys = sorted(k for k in result if k != 'retval')
        retained_keys = removable_keys[:max_keys]
        platform_results[i] = {k: result[k] for k in retained_keys}
        # retval needs to be passed through all of the time.
        retval = result.get('retval')
        if retval is not None:
          platform_results[i]['retval'] = retval
        actually_updated = actually_updated or \
          len(retained_keys) != len(removable_keys)

  if actually_updated and complain_on_update:
    print(
        'Warning: Some benchmark keyvals have been truncated.', file=sys.stderr)
  return results


def _ConvertToASCII(obj):
  """Convert an object loaded from JSON to ASCII; JSON gives us unicode."""

  # Using something like `object_hook` is insufficient, since it only fires on
  # actual JSON objects. `encoding` fails, too, since the default decoder always
  # uses unicode() to decode strings.
  if isinstance(obj, unicode):
    return str(obj)
  if isinstance(obj, dict):
    return {_ConvertToASCII(k): _ConvertToASCII(v) for k, v in obj.iteritems()}
  if isinstance(obj, list):
    return [_ConvertToASCII(v) for v in obj]
  return obj


def _PositiveInt(s):
  i = int(s)
  if i < 0:
    raise argparse.ArgumentTypeError('%d is not a positive integer.' % (i,))
  return i


def _AccumulateActions(args):
  """Given program arguments, determines what actions we want to run.

  Returns [(ResultsReportCtor, str)], where ResultsReportCtor can construct a
  ResultsReport, and the str is the file extension for the given report.
  """
  results = []
  # The order of these is arbitrary.
  if args.json:
    results.append((JSONResultsReport, 'json'))
  if args.text:
    results.append((TextResultsReport, 'txt'))
  if args.email:
    email_ctor = functools.partial(TextResultsReport, email=True)
    results.append((email_ctor, 'email'))
  # We emit HTML if nothing else was specified.
  if args.html or not results:
    results.append((HTMLResultsReport, 'html'))
  return results


# Note: get_contents is a function, because it may be expensive (generating some
# HTML reports takes O(seconds) on my machine, depending on the size of the
# input data).
def WriteFile(output_prefix, extension, get_contents, overwrite, verbose):
  """Writes `contents` to a file named "${output_prefix}.${extension}".

  get_contents should be a zero-args function that returns a string (of the
  contents to write).
  If output_prefix == '-', this writes to stdout.
  If overwrite is False, this will not overwrite files.
  """
  if output_prefix == '-':
    if verbose:
      print('Writing %s report to stdout' % (extension,), file=sys.stderr)
    sys.stdout.write(get_contents())
    return

  file_name = '%s.%s' % (output_prefix, extension)
  if not overwrite and os.path.exists(file_name):
    raise IOError('Refusing to write %s -- it already exists' % (file_name,))

  with open(file_name, 'w') as out_file:
    if verbose:
      print('Writing %s report to %s' % (extension, file_name), file=sys.stderr)
    out_file.write(get_contents())


def RunActions(actions, benchmark_results, output_prefix, overwrite, verbose):
  """Runs `actions`, returning True if all succeeded."""
  failed = False

  report_ctor = None  # Make the linter happy
  for report_ctor, extension in actions:
    try:
      get_contents = lambda: report_ctor(benchmark_results).GetReport()
      WriteFile(output_prefix, extension, get_contents, overwrite, verbose)
    except Exception:
      # Complain and move along; we may have more actions that might complete
      # successfully.
      failed = True
      traceback.print_exc()
  return not failed


def PickInputFile(input_name):
  """Given program arguments, returns file to read for benchmark input."""
  return sys.stdin if input_name == '-' else open(input_name)


def _NoPerfReport(_label_name, _benchmark_name, _benchmark_iteration):
  return {}


def _ParseArgs(argv):
  parser = argparse.ArgumentParser(description='Turns JSON into results '
                                   'report(s).')
  parser.add_argument(
      '-v',
      '--verbose',
      action='store_true',
      help='Be a tiny bit more verbose.')
  parser.add_argument(
      '-f',
      '--force',
      action='store_true',
      help='Overwrite existing results files.')
  parser.add_argument(
      '-o',
      '--output',
      default='report',
      type=str,
      help='Prefix of the output filename (default: report). '
      '- means stdout.')
  parser.add_argument(
      '-i',
      '--input',
      required=True,
      type=str,
      help='Where to read the JSON from. - means stdin.')
  parser.add_argument(
      '-l',
      '--statistic-limit',
      default=0,
      type=_PositiveInt,
      help='The maximum number of benchmark statistics to '
      'display from a single run. 0 implies unlimited.')
  parser.add_argument(
      '--json', action='store_true', help='Output a JSON report.')
  parser.add_argument(
      '--text', action='store_true', help='Output a text report.')
  parser.add_argument(
      '--email',
      action='store_true',
      help='Output a text report suitable for email.')
  parser.add_argument(
      '--html',
      action='store_true',
      help='Output an HTML report (this is the default if no '
      'other output format is specified).')
  return parser.parse_args(argv)


def Main(argv):
  args = _ParseArgs(argv)
  # JSON likes to load UTF-8; our results reporter *really* doesn't like
  # UTF-8.
  with PickInputFile(args.input) as in_file:
    raw_results = _ConvertToASCII(json.load(in_file))

  platform_names = raw_results['platforms']
  results = raw_results['data']
  if args.statistic_limit:
    results = CutResultsInPlace(results, max_keys=args.statistic_limit)
  benches = CountBenchmarks(results)
  # In crosperf, a label is essentially a platform+configuration. So, a name of
  # a label and a name of a platform are equivalent for our purposes.
  bench_results = BenchmarkResults(
      label_names=platform_names,
      benchmark_names_and_iterations=benches,
      run_keyvals=results,
      read_perf_report=_NoPerfReport)
  actions = _AccumulateActions(args)
  ok = RunActions(actions, bench_results, args.output, args.force, args.verbose)
  return 0 if ok else 1


if __name__ == '__main__':
  sys.exit(Main(sys.argv[1:]))