afdo_tools/bisection/afdo_prof_analysis.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright 2019 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

"""AFDO Profile analysis tool.

This script takes a good AFDO profile, a bad AFDO profile, and an external
script which deems particular AFDO profiles as GOOD/BAD/SKIP, and an output
file as arguments. Given these pieces of information, it analyzes the profiles
to try and determine what exactly is bad about the bad profile. It does this
with three main techniques: bisecting search, range search, and rough diff-ing.

The external script communicates the 'goodness' of an AFDO profile through its
exit code. The codes known to this script are:
  - 0: the AFDO profile produced a good binary
  - 1: the AFDO profile produced a bad binary
  - 125: no result could be determined; just try another profile
  - >127: quit immediately
"""

from __future__ import division, print_function

import argparse
import json
import logging
import os
import random
import subprocess
import time
from datetime import date
from enum import IntEnum
from tempfile import mkstemp


class StatusEnum(IntEnum):
  """Enum of valid statuses returned by profile decider."""
  GOOD_STATUS = 0
  BAD_STATUS = 1
  SKIP_STATUS = 125
  PROBLEM_STATUS = 127


statuses = StatusEnum.__members__.values()

_NUM_RUNS_RANGE_SEARCH = 20  # how many times range search should run its algo


def json_to_text(json_prof):
  text_profile = []
  for func in json_prof:
    text_profile.append(func)
    text_profile.append(json_prof[func])
  return ''.join(text_profile)


def text_to_json(f):
  """Performs basic parsing of an AFDO text-based profile.

  This parsing expects an input file object with contents of the form generated
  by bin/llvm-profdata (within an LLVM build).
  """
  results = {}
  curr_func = None
  curr_data = []
  for line in f:
    if not line.startswith(' '):
      if curr_func:
        results[curr_func] = ''.join(curr_data)
        curr_data = []
      curr_func, rest = line.split(':', 1)
      curr_func = curr_func.strip()
      curr_data.append(':' + rest)
    else:
      curr_data.append(line)

  if curr_func:
    results[curr_func] = ''.join(curr_data)
  return results


def prof_to_tmp(prof):
  """Creates (and returns) temp filename for given JSON-based AFDO profile."""
  fd, temp_path = mkstemp()
  text_profile = json_to_text(prof)
  with open(temp_path, 'w') as f:
    f.write(text_profile)
  os.close(fd)
  return temp_path


class DeciderState(object):
  """Class for the external decider."""

  def __init__(self, state_file, external_decider, seed):
    self.accumulated_results = []  # over this run of the script
    self.external_decider = external_decider
    self.saved_results = []  # imported from a previous run of this script
    self.state_file = state_file
    self.seed = seed if seed is not None else time.time()

  def load_state(self):
    if not os.path.exists(self.state_file):
      logging.info('State file %s is empty, starting from beginning',
                   self.state_file)
      return

    with open(self.state_file, encoding='utf-8') as f:
      try:
        data = json.load(f)
      except:
        raise ValueError('Provided state file %s to resume from does not'
                         ' contain a valid JSON.' % self.state_file)

    if 'seed' not in data or 'accumulated_results' not in data:
      raise ValueError('Provided state file %s to resume from does not contain'
                       ' the correct information' % self.state_file)

    self.seed = data['seed']
    self.saved_results = data['accumulated_results']
    logging.info('Restored state from %s...', self.state_file)

  def save_state(self):
    state = {'seed': self.seed, 'accumulated_results': self.accumulated_results}
    tmp_file = self.state_file + '.new'
    with open(tmp_file, 'w', encoding='utf-8') as f:
      json.dump(state, f, indent=2)
    os.rename(tmp_file, self.state_file)
    logging.info('Logged state to %s...', self.state_file)

  def run(self, prof, save_run=True):
    """Run the external deciding script on the given profile."""
    if self.saved_results and save_run:
      result = self.saved_results.pop(0)
      self.accumulated_results.append(result)
      self.save_state()
      return StatusEnum(result)

    filename = prof_to_tmp(prof)

    try:
      return_code = subprocess.call([self.external_decider, filename])
    finally:
      os.remove(filename)

    if return_code in statuses:
      status = StatusEnum(return_code)
      if status == StatusEnum.PROBLEM_STATUS:
        prof_file = prof_to_tmp(prof)
        raise RuntimeError('Provided decider script returned PROBLEM_STATUS '
                           'when run on profile stored at %s. AFDO Profile '
                           'analysis aborting' % prof_file)
      if save_run:
        self.accumulated_results.append(status.value)
      logging.info('Run %d of external script %s returned %s',
                   len(self.accumulated_results), self.external_decider,
                   status.name)
      self.save_state()
      return status
    raise ValueError(
        'Provided external script had unexpected return code %d' % return_code)


def bisect_profiles(decider, good, bad, common_funcs, lo, hi):
  """Recursive function which bisects good and bad profiles.

  Args:
    decider: function which, given a JSON-based AFDO profile, returns an
      element of 'statuses' based on the status of the profile
    good: JSON-based good AFDO profile
    bad: JSON-based bad AFDO profile
    common_funcs: the list of functions which have top-level profiles in both
      'good' and 'bad'
    lo: lower bound of range being bisected on
    hi: upper bound of range being bisected on

  Returns a dictionary with two keys: 'individuals' and 'ranges'.
  'individuals': a list of individual functions found to make the profile BAD
  'ranges': a list of lists of function names. Each list of functions is a list
    such that including all of those from the bad profile makes the good
    profile BAD. It may not be the smallest problematic combination, but
    definitely contains a problematic combination of profiles.
  """

  results = {'individuals': [], 'ranges': []}
  if hi - lo <= 1:
    logging.info('Found %s as a problematic function profile', common_funcs[lo])
    results['individuals'].append(common_funcs[lo])
    return results

  mid = (lo + hi) // 2
  lo_mid_prof = good.copy()  # covers bad from lo:mid
  mid_hi_prof = good.copy()  # covers bad from mid:hi
  for func in common_funcs[lo:mid]:
    lo_mid_prof[func] = bad[func]
  for func in common_funcs[mid:hi]:
    mid_hi_prof[func] = bad[func]

  lo_mid_verdict = decider.run(lo_mid_prof)
  mid_hi_verdict = decider.run(mid_hi_prof)

  if lo_mid_verdict == StatusEnum.BAD_STATUS:
    result = bisect_profiles(decider, good, bad, common_funcs, lo, mid)
    results['individuals'].extend(result['individuals'])
    results['ranges'].extend(result['ranges'])
  if mid_hi_verdict == StatusEnum.BAD_STATUS:
    result = bisect_profiles(decider, good, bad, common_funcs, mid, hi)
    results['individuals'].extend(result['individuals'])
    results['ranges'].extend(result['ranges'])

  # neither half is bad -> the issue is caused by several things occuring
  # in conjunction, and this combination crosses 'mid'
  if lo_mid_verdict == mid_hi_verdict == StatusEnum.GOOD_STATUS:
    problem_range = range_search(decider, good, bad, common_funcs, lo, hi)
    if problem_range:
      logging.info('Found %s as a problematic combination of profiles',
                   str(problem_range))
      results['ranges'].append(problem_range)

  return results


def bisect_profiles_wrapper(decider, good, bad, perform_check=True):
  """Wrapper for recursive profile bisection."""

  # Validate good and bad profiles are such, otherwise bisection reports noise
  # Note that while decider is a random mock, these assertions may fail.
  if perform_check:
    if decider.run(good, save_run=False) != StatusEnum.GOOD_STATUS:
      raise ValueError('Supplied good profile is not actually GOOD')
    if decider.run(bad, save_run=False) != StatusEnum.BAD_STATUS:
      raise ValueError('Supplied bad profile is not actually BAD')

  common_funcs = sorted(func for func in good if func in bad)
  if not common_funcs:
    return {'ranges': [], 'individuals': []}

  # shuffle because the results of our analysis can be quite order-dependent
  # but this list has no inherent ordering. By shuffling each time, the chances
  # of finding new, potentially interesting results are increased each time
  # the program is run
  random.shuffle(common_funcs)
  results = bisect_profiles(decider, good, bad, common_funcs, 0,
                            len(common_funcs))
  results['ranges'].sort()
  results['individuals'].sort()
  return results


def range_search(decider, good, bad, common_funcs, lo, hi):
  """Searches for problematic range crossing mid border.

  The main inner algorithm is the following, which looks for the smallest
  possible ranges with problematic combinations. It starts the upper bound at
  the midpoint, and increments in halves until it gets a BAD profile.
  Then, it increments the lower bound (in halves) until the resultant profile
  is GOOD, and then we have a range that causes 'BAD'ness.

  It does this _NUM_RUNS_RANGE_SEARCH times, and shuffles the functions being
  looked at uniquely each time to try and get the smallest possible range
  of functions in a reasonable timeframe.
  """

  average = lambda x, y: int(round((x + y) // 2.0))

  def find_upper_border(good_copy, funcs, lo, hi, last_bad_val=None):
    """Finds the upper border of problematic range."""
    mid = average(lo, hi)
    if mid in (lo, hi):
      return last_bad_val or hi

    for func in funcs[lo:mid]:
      good_copy[func] = bad[func]
    verdict = decider.run(good_copy)

    # reset for next iteration
    for func in funcs:
      good_copy[func] = good[func]

    if verdict == StatusEnum.BAD_STATUS:
      return find_upper_border(good_copy, funcs, lo, mid, mid)
    return find_upper_border(good_copy, funcs, mid, hi, last_bad_val)

  def find_lower_border(good_copy, funcs, lo, hi, last_bad_val=None):
    """Finds the lower border of problematic range."""
    mid = average(lo, hi)
    if mid in (lo, hi):
      return last_bad_val or lo

    for func in funcs[lo:mid]:
      good_copy[func] = good[func]
    verdict = decider.run(good_copy)

    # reset for next iteration
    for func in funcs:
      good_copy[func] = bad[func]

    if verdict == StatusEnum.BAD_STATUS:
      return find_lower_border(good_copy, funcs, mid, hi, lo)
    return find_lower_border(good_copy, funcs, lo, mid, last_bad_val)

  lo_mid_funcs = []
  mid_hi_funcs = []
  min_range_funcs = []
  for _ in range(_NUM_RUNS_RANGE_SEARCH):

    if min_range_funcs:  # only examine range we've already narrowed to
      random.shuffle(lo_mid_funcs)
      random.shuffle(mid_hi_funcs)
    else:  # consider lo-mid and mid-hi separately bc must cross border
      mid = (lo + hi) // 2
      lo_mid_funcs = common_funcs[lo:mid]
      mid_hi_funcs = common_funcs[mid:hi]

    funcs = lo_mid_funcs + mid_hi_funcs
    hi = len(funcs)
    mid = len(lo_mid_funcs)
    lo = 0

    # because we need the problematic pair to pop up before we can narrow it
    prof = good.copy()
    for func in lo_mid_funcs:
      prof[func] = bad[func]

    upper_border = find_upper_border(prof, funcs, mid, hi)
    for func in lo_mid_funcs + funcs[mid:upper_border]:
      prof[func] = bad[func]

    lower_border = find_lower_border(prof, funcs, lo, mid)
    curr_range_funcs = funcs[lower_border:upper_border]

    if not min_range_funcs or len(curr_range_funcs) < len(min_range_funcs):
      min_range_funcs = curr_range_funcs
      lo_mid_funcs = lo_mid_funcs[lo_mid_funcs.index(min_range_funcs[0]):]
      mid_hi_funcs = mid_hi_funcs[:mid_hi_funcs.index(min_range_funcs[-1]) + 1]
      if len(min_range_funcs) == 2:
        min_range_funcs.sort()
        return min_range_funcs  # can't get any smaller

  min_range_funcs.sort()
  return min_range_funcs


def check_good_not_bad(decider, good, bad):
  """Check if bad prof becomes GOOD by adding funcs it lacks from good prof"""
  bad_copy = bad.copy()
  for func in good:
    if func not in bad:
      bad_copy[func] = good[func]
  return decider.run(bad_copy) == StatusEnum.GOOD_STATUS


def check_bad_not_good(decider, good, bad):
  """Check if good prof BAD after adding funcs bad prof has that good doesnt"""
  good_copy = good.copy()
  for func in bad:
    if func not in good:
      good_copy[func] = bad[func]
  return decider.run(good_copy) == StatusEnum.BAD_STATUS


def parse_args():
  parser = argparse.ArgumentParser(
      description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
  parser.add_argument(
      '--good_prof',
      required=True,
      help='Text-based "Good" profile for analysis')
  parser.add_argument(
      '--bad_prof', required=True, help='Text-based "Bad" profile for analysis')
  parser.add_argument(
      '--external_decider',
      required=True,
      help='External script that, given an AFDO profile, returns '
      'GOOD/BAD/SKIP')
  parser.add_argument(
      '--analysis_output_file',
      required=True,
      help='File to output JSON results to')
  parser.add_argument(
      '--state_file',
      default='%s/afdo_analysis_state.json' % os.getcwd(),
      help='File path containing state to load from initially, and will be '
      'overwritten with new state on each iteration')
  parser.add_argument(
      '--no_resume',
      action='store_true',
      help='If enabled, no initial state will be loaded and the program will '
      'run from the beginning')
  parser.add_argument(
      '--remove_state_on_completion',
      action='store_true',
      help='If enabled, state file will be removed once profile analysis is '
      'completed')
  parser.add_argument(
      '--seed', type=float, help='Float specifying seed for randomness')
  return parser.parse_args()


def main(flags):
  logging.getLogger().setLevel(logging.INFO)
  if not flags.no_resume and flags.seed:  # conflicting seeds
    raise RuntimeError('Ambiguous seed value; do not resume from existing '
                       'state and also specify seed by command line flag')

  decider = DeciderState(
      flags.state_file, flags.external_decider, seed=flags.seed)
  if not flags.no_resume:
    decider.load_state()
  random.seed(decider.seed)

  with open(flags.good_prof) as good_f:
    good_items = text_to_json(good_f)
  with open(flags.bad_prof) as bad_f:
    bad_items = text_to_json(bad_f)

  bisect_results = bisect_profiles_wrapper(decider, good_items, bad_items)
  gnb_result = check_good_not_bad(decider, good_items, bad_items)
  bng_result = check_bad_not_good(decider, good_items, bad_items)

  results = {
      'seed': decider.seed,
      'bisect_results': bisect_results,
      'good_only_functions': gnb_result,
      'bad_only_functions': bng_result
  }
  with open(flags.analysis_output_file, 'w', encoding='utf-8') as f:
    json.dump(results, f, indent=2)
  if flags.remove_state_on_completion:
    os.remove(flags.state_file)
    logging.info('Removed state file %s following completion of script...',
                 flags.state_file)
  else:
    completed_state_file = '%s.completed.%s' % (flags.state_file,
                                                str(date.today()))
    os.rename(flags.state_file, completed_state_file)
    logging.info('Stored completed state file as %s...', completed_state_file)
  return results


if __name__ == '__main__':
  main(parse_args())