android_bench_suite/fix_skia_results.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144

#!/usr/bin/env python2
#
# Copyright 2017 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
#
# pylint: disable=cros-logging-import
"""Transforms skia benchmark results to ones that crosperf can understand."""

from __future__ import print_function

import itertools
import logging
import json
import sys

# Turn the logging level to INFO before importing other autotest
# code, to avoid having failed import logging messages confuse the
# test_droid user.
logging.basicConfig(level=logging.INFO)

# All of the results we care about, by name.
# Each of these *must* end in _ns, _us, _ms, or _s, since all the metrics we
# collect (so far) are related to time, and we alter the results based on the
# suffix of these strings (so we don't have 0.000421ms per sample, for example)
_RESULT_RENAMES = {
    'memset32_100000_640_480_nonrendering': 'memset_time_ms',
    'path_equality_50%_640_480_nonrendering': 'path_equality_ns',
    'sort_qsort_backward_640_480_nonrendering': 'qsort_us'
}


def _GetFamiliarName(name):
  r = _RESULT_RENAMES[name]
  return r if r else name


def _IsResultInteresting(name):
  return name in _RESULT_RENAMES


def _GetTimeMultiplier(label_name):
  """Given a time (in milliseconds), normalize it to what label_name expects.

  "What label_name expects" meaning "we pattern match against the last few
  non-space chars in label_name."

  This expects the time unit to be separated from anything else by '_'.
  """
  ms_mul = 1000 * 1000.
  endings = [('_ns', 1), ('_us', 1000), ('_ms', ms_mul), ('_s', ms_mul * 1000)]
  for end, mul in endings:
    if label_name.endswith(end):
      return ms_mul / mul
  raise ValueError('Unknown ending in "%s"; expecting one of %s' %
                   (label_name, [end for end, _ in endings]))


def _GetTimeDenom(ms):
  """Given a list of times (in milliseconds), find a sane time unit for them.

  Returns the unit name, and `ms` normalized to that time unit.

  >>> _GetTimeDenom([1, 2, 3])
  ('ms', [1.0, 2.0, 3.0])
  >>> _GetTimeDenom([.1, .2, .3])
  ('us', [100.0, 200.0, 300.0])
  """

  ms_mul = 1000 * 1000
  units = [('us', 1000), ('ms', ms_mul), ('s', ms_mul * 1000)]
  for name, mul in reversed(units):
    normalized = [float(t) * ms_mul / mul for t in ms]
    average = sum(normalized) / len(normalized)
    if all(n > 0.1 for n in normalized) and average >= 1:
      return name, normalized

  normalized = [float(t) * ms_mul for t in ms]
  return 'ns', normalized


def _TransformBenchmarks(raw_benchmarks):
  # We get {"results": {"bench_name": Results}}
  # where
  #   Results = {"config_name": {"samples": [float], etc.}}
  #
  # We want {"data": {"skia": [[BenchmarkData]]},
  #          "platforms": ["platform1, ..."]}
  # where
  #   BenchmarkData = {"bench_name": bench_samples[N], ..., "retval": 0}
  #
  # Note that retval is awkward -- crosperf's JSON reporter reports the result
  # as a failure if it's not there. Everything else treats it like a
  # statistic...
  benchmarks = raw_benchmarks['results']
  results = []
  for bench_name, bench_result in benchmarks.iteritems():
    try:
      for cfg_name, keyvals in bench_result.iteritems():
        # Some benchmarks won't have timing data (either it won't exist at all,
        # or it'll be empty); skip them.
        samples = keyvals.get('samples')
        if not samples:
          continue

        bench_name = '%s_%s' % (bench_name, cfg_name)
        if not _IsResultInteresting(bench_name):
          continue

        friendly_name = _GetFamiliarName(bench_name)
        if len(results) < len(samples):
          results.extend({
              'retval': 0
          } for _ in xrange(len(samples) - len(results)))

        time_mul = _GetTimeMultiplier(friendly_name)
        for sample, app in itertools.izip(samples, results):
          assert friendly_name not in app
          app[friendly_name] = sample * time_mul
    except (KeyError, ValueError) as e:
      logging.error('While converting "%s" (key: %s): %s',
                    bench_result, bench_name, e.message)
      raise

  # Realistically, [results] should be multiple results, where each entry in the
  # list is the result for a different label. Because we only deal with one
  # label at the moment, we need to wrap it in its own list.
  return results


if __name__ == '__main__':

  def _GetUserFile(argv):
    if not argv or argv[0] == '-':
      return sys.stdin
    return open(argv[0])

  def _Main():
    with _GetUserFile(sys.argv[1:]) as in_file:
      obj = json.load(in_file)
    output = _TransformBenchmarks(obj)
    json.dump(output, sys.stdout)

  _Main()