crosperf/results_organizer.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232

# -*- coding: utf-8 -*-
# Copyright 2013 The ChromiumOS Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

"""Parse data from benchmark_runs for tabulator."""


import errno
import json
import os
import re
import sys

from cros_utils import misc


_TELEMETRY_RESULT_DEFAULTS_FILE = "default-telemetry-results.json"
_DUP_KEY_REGEX = re.compile(r"(\w+)\{(\d+)\}")


def _AdjustIteration(benchmarks, max_dup, bench):
    """Adjust the interation numbers if they have keys like ABCD{i}."""
    for benchmark in benchmarks:
        if benchmark.name != bench or benchmark.iteration_adjusted:
            continue
        benchmark.iteration_adjusted = True
        benchmark.iterations *= max_dup + 1


def _GetMaxDup(data):
    """Find the maximum i inside ABCD{i}.

    data should be a [[[Key]]], where Key is a string that may look like
    ABCD{i}.
    """
    max_dup = 0
    for label in data:
        for run in label:
            for key in run:
                match = _DUP_KEY_REGEX.match(key)
                if match:
                    max_dup = max(max_dup, int(match.group(2)))
    return max_dup


def _Repeat(func, times):
    """Returns the result of running func() n times."""
    return [func() for _ in range(times)]


def _DictWithReturnValues(retval, pass_fail):
    """Create a new dictionary pre-populated with success/fail values."""
    new_dict = {}
    # Note: 0 is a valid retval; test to make sure it's not None.
    if retval is not None:
        new_dict["retval"] = retval
    if pass_fail:
        new_dict[""] = pass_fail
    return new_dict


def _GetNonDupLabel(max_dup, runs):
    """Create new list for the runs of the same label.

    Specifically, this will split out keys like foo{0}, foo{1} from one run into
    their own runs. For example, given a run like:
      {"foo": 1, "bar{0}": 2, "baz": 3, "qux{1}": 4, "pirate{0}": 5}

    You'll get:
      [{"foo": 1, "baz": 3}, {"bar": 2, "pirate": 5}, {"qux": 4}]

    Hands back the lists of transformed runs, all concatenated together.
    """
    new_runs = []
    for run in runs:
        run_retval = run.get("retval", None)
        run_pass_fail = run.get("", None)
        new_run = {}
        # pylint: disable=cell-var-from-loop
        added_runs = _Repeat(
            lambda: _DictWithReturnValues(run_retval, run_pass_fail), max_dup
        )
        for key, value in run.items():
            match = _DUP_KEY_REGEX.match(key)
            if not match:
                new_run[key] = value
            else:
                new_key, index_str = match.groups()
                added_runs[int(index_str) - 1][new_key] = str(value)
        new_runs.append(new_run)
        new_runs += added_runs
    return new_runs


def _DuplicatePass(result, benchmarks):
    """Properly expands keys like `foo{1}` in `result`."""
    for bench, data in result.items():
        max_dup = _GetMaxDup(data)
        # If there's nothing to expand, there's nothing to do.
        if not max_dup:
            continue
        for i, runs in enumerate(data):
            data[i] = _GetNonDupLabel(max_dup, runs)
        _AdjustIteration(benchmarks, max_dup, bench)


def _ReadSummaryFile(filename):
    """Reads the summary file at filename."""
    dirname, _ = misc.GetRoot(filename)
    fullname = os.path.join(dirname, _TELEMETRY_RESULT_DEFAULTS_FILE)
    try:
        # Slurp the summary file into a dictionary. The keys in the dictionary are
        # the benchmark names. The value for a key is a list containing the names
        # of all the result fields that should be returned in a 'default' report.
        with open(fullname) as in_file:
            return json.load(in_file)
    except IOError as e:
        # ENOENT means "no such file or directory"
        if e.errno == errno.ENOENT:
            return {}
        raise


def _MakeOrganizeResultOutline(benchmark_runs, labels):
    """Creates the "outline" of the OrganizeResults result for a set of runs.

    Report generation returns lists of different sizes, depending on the input
    data. Depending on the order in which we iterate through said input data, we
    may populate the Nth index of a list, then the N-1st, then the N+Mth, ...

    It's cleaner to figure out the "skeleton"/"outline" ahead of time, so we don't
    have to worry about resizing while computing results.
    """
    # Count how many iterations exist for each benchmark run.
    # We can't simply count up, since we may be given an incomplete set of
    # iterations (e.g. [r.iteration for r in benchmark_runs] == [1, 3])
    iteration_count = {}
    for run in benchmark_runs:
        name = run.benchmark.name
        old_iterations = iteration_count.get(name, -1)
        # N.B. run.iteration starts at 1, not 0.
        iteration_count[name] = max(old_iterations, run.iteration)

    # Result structure: {benchmark_name: [[{key: val}]]}
    result = {}
    for run in benchmark_runs:
        name = run.benchmark.name
        num_iterations = iteration_count[name]
        # default param makes cros lint be quiet about defining num_iterations in a
        # loop.
        make_dicts = lambda n=num_iterations: _Repeat(dict, n)
        result[name] = _Repeat(make_dicts, len(labels))
    return result


def OrganizeResults(benchmark_runs, labels, benchmarks=None, json_report=False):
    """Create a dict from benchmark_runs.

    The structure of the output dict is as follows:
    {"benchmark_1":[
      [{"key1":"v1", "key2":"v2"},{"key1":"v1", "key2","v2"}]
      #one label
      []
      #the other label
      ]
     "benchmark_2":
      [
      ]}.
    """
    result = _MakeOrganizeResultOutline(benchmark_runs, labels)
    label_names = [label.name for label in labels]
    label_indices = {name: i for i, name in enumerate(label_names)}
    summary_file = _ReadSummaryFile(sys.argv[0])

    if benchmarks is None:
        benchmarks = []

    for benchmark_run in benchmark_runs:
        if not benchmark_run.result:
            continue
        benchmark = benchmark_run.benchmark
        label_index = label_indices[benchmark_run.label.name]
        cur_label_list = result[benchmark.name][label_index]
        cur_dict = cur_label_list[benchmark_run.iteration - 1]

        show_all_results = json_report or benchmark.show_all_results
        if not show_all_results:
            summary_list = summary_file.get(benchmark.name)
            if summary_list:
                for key in benchmark_run.result.keyvals.keys():
                    if any(
                        key.startswith(added_key)
                        for added_key in ["retval", "cpufreq", "cputemp"]
                    ):
                        summary_list.append(key)
            else:
                # Did not find test_name in json file; show everything.
                show_all_results = True
        if benchmark_run.result.cwp_dso:
            # If we are in cwp approximation mode, we only care about samples
            if "samples" in benchmark_run.result.keyvals:
                cur_dict["samples"] = benchmark_run.result.keyvals["samples"]
            cur_dict["retval"] = benchmark_run.result.keyvals["retval"]
            for key, value in benchmark_run.result.keyvals.items():
                if any(
                    key.startswith(cpustat_keyword)
                    for cpustat_keyword in ["cpufreq", "cputemp"]
                ):
                    cur_dict[key] = value
        else:
            for test_key in benchmark_run.result.keyvals:
                if show_all_results or test_key in summary_list:
                    cur_dict[test_key] = benchmark_run.result.keyvals[test_key]
        # Occasionally Telemetry tests will not fail but they will not return a
        # result, either.  Look for those cases, and force them to be a fail.
        # (This can happen if, for example, the test has been disabled.)
        if len(cur_dict) == 1 and cur_dict["retval"] == 0:
            cur_dict["retval"] = 1
            benchmark_run.result.keyvals["retval"] = 1
            # TODO: This output should be sent via logger.
            print(
                "WARNING: Test '%s' appears to have succeeded but returned"
                " no results." % benchmark.name,
                file=sys.stderr,
            )
        if json_report and benchmark_run.machine:
            cur_dict["machine"] = benchmark_run.machine.name
            cur_dict["machine_checksum"] = benchmark_run.machine.checksum
            cur_dict["machine_string"] = benchmark_run.machine.checksum_string
    _DuplicatePass(result, benchmarks)
    return result