user_activity_benchmarks/benchmark_metrics_experiment.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307

#!/usr/bin/python2
#
# Copyright 2016 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Runs an experiment with the benchmark metrics on a pair of CWP data sets.

A data set should contain the files with the pairwise inclusive and the
inclusive statistics. The pairwise inclusive file contains pairs of
parent and child functions with their inclusive count fractions out of the
total amount of inclusive count values and the files of the child functions.
The inclusive file contains the functions with their inclusive count fraction
out of the total amount of inclusive count values and the file name of the
function. The input data should be collected using the scripts
collect_experiment_data.sh or collect_experiment_data_odd_even_session.sh

For every function, this script computes the distance and the score values.
The output is stored in the file cwp_functions_statistics_file.

For every Chrome OS component, this script computes a set of metrics consisting
in the number of functions, the average and cumulative distance and score of
the functions matching the group. The output is stored in the file
cwp_function_groups_statistics_file.
"""

import argparse
from collections import defaultdict
import csv
import os
import sys
import benchmark_metrics


class MetricsExperiment(object):
  """Runs an experiment with the benchmark metrics on a pair of data sets."""

  def __init__(self, cwp_pairwise_inclusive_reference,
               cwp_pairwise_inclusive_test, cwp_inclusive_reference,
               cwp_inclusive_test, cwp_function_groups_file,
               cwp_function_groups_statistics_file,
               cwp_function_statistics_file):
    """Initializes the MetricsExperiment class.

    Args:
      cwp_pairwise_inclusive_reference: The CSV file containing the pairwise
        inclusive values from the reference data set.
      cwp_pairwise_inclusive_test: The CSV file containing the pairwise
        inclusive values from the test data set.
      cwp_inclusive_reference: The CSV file containing the inclusive values
        from the reference data set.
      cwp_inclusive_test: The CSV file containing the inclusive values from
        the test data set.
      cwp_function_groups_file: The CSV file containing the groups of functions.
      cwp_function_groups_statistics_file: The output CSV file that will
        contain the metrics for the function groups.
      cwp_function_statistics_file: The output CSV file that will contain the
        metrics for the CWP functions.
    """
    self._cwp_pairwise_inclusive_reference = cwp_pairwise_inclusive_reference
    self._cwp_pairwise_inclusive_test = cwp_pairwise_inclusive_test
    self._cwp_inclusive_reference = cwp_inclusive_reference
    self._cwp_inclusive_test = cwp_inclusive_test
    self._cwp_function_groups_file = cwp_function_groups_file
    self._cwp_function_groups_statistics_file = \
        cwp_function_groups_statistics_file
    self._cwp_function_statistics_file = cwp_function_statistics_file

  @staticmethod
  def ParsePairwiseInclusiveStatisticsFile(file_name):
    """Parses the pairwise inclusive statistics files.

    A line of the file should contain a pair of a parent and a child function,
    concatenated by a ;;, the name of the file where the child function is
    defined and the inclusive count fractions of the pair of functions out of
    the total amount of inclusive count values.

    Args:
      file_name: The file containing the pairwise inclusive statistics of the
      CWP functions.

    Returns:
      A dict containing the statistics of the parent functions and each of
      their child functions. The key of the dict is the name of the parent
      function. The value is a dict having as a key the name of the child
      function with its file name separated by a ',' and as a value the
      inclusive count fraction of the child function.
    """
    pairwise_inclusive_statistics = defaultdict(lambda: defaultdict(float))

    with open(file_name) as \
        pairwise_inclusive_statistics_file:
      statistics_reader = csv.DictReader(
          pairwise_inclusive_statistics_file, delimiter=',')
      for statistic in statistics_reader:
        parent_function_name, child_function_name = \
            statistic['parent_child_functions'].split(';;')
        child_function_file_name = \
            os.path.normpath(statistic['child_function_file'])
        inclusive_count_fraction = \
            float(statistic['inclusive_count_fraction'])

        if all([parent_function_name, child_function_name, \
                inclusive_count_fraction]):

          # There might be situations where a child function appears in
          # multiple files or objects. Such situations can occur when in the
          # Dremel queries there are not specified the Chrome OS version and the
          # name of the board (i.e the files can belong to different kernel or
          # library versions), when the child function is a template function
          # that is declared in a header file or there are name collisions
          # between multiple executable objects.
          # If a pair of child and parent functions appears multiple times, we
          # add their inclusive count values.
          child_function_key = ','.join([child_function_name,
                                         child_function_file_name])
          pairwise_inclusive_statistics[parent_function_name]\
              [child_function_key] += inclusive_count_fraction

    return pairwise_inclusive_statistics

  @staticmethod
  def ParseInclusiveStatisticsFile(inclusive_statistics_file_name):
    """Parses the inclusive statistics files.

    Args:
      inclusive_statistics_file_name: The file containing the inclusive
        statistics of the CWP functions.

    Returns:
      A dict having as a key the function name and file where the function is
      defined separated by a ',' and as a value the inclusive count fraction.
    """
    inclusive_statistics = defaultdict(float)

    with open(inclusive_statistics_file_name) as inclusive_statistics_file:
      statistics_reader = \
          csv.DictReader(inclusive_statistics_file, delimiter=',')

      for statistic in statistics_reader:
        function_name = statistic['function']
        file_name = os.path.normpath(statistic['file'])
        inclusive_count_fraction = \
            float(statistic['inclusive_count_fraction'])

        # There might be situations where a function appears in multiple files
        # or objects. Such situations can occur when in the Dremel queries there
        # are not specified the Chrome OS version and the name of the board (i.e
        # the files can belong to different kernel or library versions).
        if all([function_name, file_name, inclusive_count_fraction]):
          parent_function_key = ','.join([function_name, file_name])
          inclusive_statistics[parent_function_key] += inclusive_count_fraction

    return inclusive_statistics

  def PerformComputation(self):
    """Does the benchmark metrics experimental computation.

    For every function, it is computed a distance based on the sum of the
    differences of the fractions spent in the child functions. Afterwards,
    it is computed a score based on the inclusive values fractions and the
    distance value. The statistics for all the function are written in the file
    self._cwp_function_statistics_file.

    The functions are grouped on Chrome OS components based on the path of the
    file where a function is defined. For every group, there are computed the
    total number of functions matching that group, the cumulative distance, the
    average distance and the cumulative score of the functions.
    """

    inclusive_statistics_reference = \
        self.ParseInclusiveStatisticsFile(self._cwp_inclusive_reference)
    inclusive_statistics_test = \
        self.ParseInclusiveStatisticsFile(self._cwp_inclusive_test)
    pairwise_inclusive_statistics_reference = \
        self.ParsePairwiseInclusiveStatisticsFile(
            self._cwp_pairwise_inclusive_reference)
    pairwise_inclusive_statistics_test = \
        self.ParsePairwiseInclusiveStatisticsFile(
            self._cwp_pairwise_inclusive_test)
    parent_function_statistics = {}

    with open(self._cwp_function_groups_file, 'r') as input_file:
      cwp_function_groups = [line.split() for line in input_file]

    for parent_function_key, parent_function_fraction_test \
        in inclusive_statistics_test.iteritems():
      parent_function_name, parent_function_file_name = \
          parent_function_key.split(',')

      parent_function_fraction_reference = \
          inclusive_statistics_reference.get(parent_function_key, 0.0)

      child_functions_statistics_test = \
          pairwise_inclusive_statistics_test.get(parent_function_name, {})

      child_functions_statistics_reference = \
          pairwise_inclusive_statistics_reference.get(parent_function_name, {})

      distance = benchmark_metrics.ComputeDistanceForFunction(
          child_functions_statistics_test, child_functions_statistics_reference)

      parent_function_score_test = benchmark_metrics.ComputeScoreForFunction(
          distance, parent_function_fraction_test,
          parent_function_fraction_reference)

      parent_function_statistics[parent_function_key] = \
          (distance, parent_function_score_test)

    with open(self._cwp_function_statistics_file, 'w') as output_file:
      statistics_lines = ['function,file,distance,score']
      statistics_lines += \
          [','.join([parent_function_key.replace(';;', ','),
                     str(statistic[0]),
                     str(statistic[1])])
           for parent_function_key, statistic
           in parent_function_statistics.iteritems()]
      output_file.write('\n'.join(statistics_lines))

    cwp_groups_statistics_test = benchmark_metrics.ComputeMetricsForComponents(
        cwp_function_groups, parent_function_statistics)

    with open(self._cwp_function_groups_statistics_file, 'w') as output_file:
      group_statistics_lines = \
          ['group,file_path,function_count,distance_cum,distance_avg,score_cum,'
           'score_avg']
      group_statistics_lines += \
          [','.join([group_name,
                     str(statistic[0]),
                     str(statistic[1]),
                     str(statistic[2]),
                     str(statistic[3]),
                     str(statistic[4]),
                     str(statistic[5])])
           for group_name, statistic
           in cwp_groups_statistics_test.iteritems()]
      output_file.write('\n'.join(group_statistics_lines))


def ParseArguments(arguments):
  parser = argparse.ArgumentParser(
      description='Runs an experiment with the benchmark metrics on a pair of '
      'CWP data sets.')
  parser.add_argument(
      '--cwp_pairwise_inclusive_reference',
      required=True,
      help='The reference CSV file that will contain a pair of parent and '
      'child functions with their inclusive count fractions out of the total '
      'amount of inclusive count values.')
  parser.add_argument(
      '--cwp_pairwise_inclusive_test',
      required=True,
      help='The test CSV file that will contain a pair of parent and '
      'child functions with their inclusive count fractions out of the total '
      'amount of inclusive count values.')
  parser.add_argument(
      '--cwp_inclusive_reference',
      required=True,
      help='The reference CSV file that will contain a function with its '
      'inclusive count fraction out of the total amount of inclusive count '
      'values.')
  parser.add_argument(
      '--cwp_inclusive_test',
      required=True,
      help='The test CSV file that will contain a function with its '
      'inclusive count fraction out of the total amount of inclusive count '
      'values.')
  parser.add_argument(
      '-g',
      '--cwp_function_groups_file',
      required=True,
      help='The file that will contain the CWP function groups.'
      'A line consists in the group name and a file path. A group must '
      'represent a ChromeOS component.')
  parser.add_argument(
      '-s',
      '--cwp_function_groups_statistics_file',
      required=True,
      help='The output file that will contain the metric statistics for the '
      'CWP function groups in CSV format. A line consists in the group name, '
      'file path, number of functions matching the group, the total score '
      'and distance values.')
  parser.add_argument(
      '-f',
      '--cwp_function_statistics_file',
      required=True,
      help='The output file that will contain the metric statistics for the '
      'CWP functions in CSV format. A line consists in the function name, file '
      'name, cummulative distance, average distance, cummulative score and '
      'average score values.')

  options = parser.parse_args(arguments)
  return options


def Main(argv):
  options = ParseArguments(argv)
  metrics_experiment = MetricsExperiment(
      options.cwp_pairwise_inclusive_reference,
      options.cwp_pairwise_inclusive_test, options.cwp_inclusive_reference,
      options.cwp_inclusive_test, options.cwp_function_groups_file,
      options.cwp_function_groups_statistics_file,
      options.cwp_function_statistics_file)
  metrics_experiment.PerformComputation()


if __name__ == '__main__':
  Main(sys.argv[1:])