diff options
author | George Burgess IV <gbiv@google.com> | 2020-10-05 16:32:36 -0700 |
---|---|---|
committer | George Burgess <gbiv@chromium.org> | 2020-10-07 07:19:07 +0000 |
commit | 6d9e457d1841b6086c51719ee43a56f2ee550c7c (patch) | |
tree | 65503c9666e02d60bf1bd67f0ab0aa9ab1fcae2b | |
parent | 92d7005475c73345f1367892d4e28c618137ffc5 (diff) | |
download | toolchain-utils-6d9e457d1841b6086c51719ee43a56f2ee550c7c.tar.gz |
llvm_tools: add a lexan crash autouploader
This CL adds an autouploader for crashes that land in Lexan's bucket.
This autouploader uploads them to 4c, much like how
`bisect_clang_crashes.py` does. The intent is to run this regularly on
chrotomation.
BUG=None
TEST=Ran it
Change-Id: I7cfbe463d89994f6ed3f750c9e8277e1fad0738e
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/toolchain-utils/+/2451306
Reviewed-by: Jian Cai <jiancai@google.com>
Tested-by: George Burgess <gbiv@chromium.org>
-rw-r--r-- | llvm_tools/README.md | 25 | ||||
-rwxr-xr-x | llvm_tools/upload_lexan_crashes_to_forcey.py | 258 | ||||
-rwxr-xr-x | llvm_tools/upload_lexan_crashes_to_forcey_test.py | 122 |
3 files changed, 404 insertions, 1 deletions
diff --git a/llvm_tools/README.md b/llvm_tools/README.md index d7c20164..783ec22d 100644 --- a/llvm_tools/README.md +++ b/llvm_tools/README.md @@ -555,7 +555,7 @@ PYTHONPATH=../ ./nightly_revert_checker.py \ ### `bisect_clang_crashes.py` This script downloads clang crash diagnoses from -gs://chromeos-toolchain-artifacts/clang-crash-diagnoses and send them to 4c for +gs://chromeos-toolchain-artifacts/clang-crash-diagnoses and sends them to 4c for bisection. Usage example: @@ -569,3 +569,26 @@ to 4c server for bisection. The summary of submitted jobs will be saved in output/state.json under the current path. The output directory will be created automatically if it does not exist yet. To get more information of the submitted jobs, please refer to go/4c-cli. + +### `upload_lexan_crashes_to_forcey.py` + +This script downloads clang crash diagnoses from Lexan's bucket and sends them +to 4c for bisection. + +Usage example: + +``` +$ ./upload_lexan_crashes_to_forcey.py --4c 4c-cli \ + --state_file ./output/state.json +``` + +The above command downloads the artifacts of clang crash diagnoses and send them +to 4c server for bisection. The summary of submitted jobs will be saved in +output/state.json under the current path. The output directory will be created +automatically if it does not exist yet. To get more information of the submitted +jobs, please refer to go/4c-cli. + +Note that it's recommended to 'seed' the state file with a most recent upload +date. This can be done by running this tool *once* with a `--last_date` flag. +This flag has the script override whatever's in the state file (if anything) and +start submitting all crashes uploaded starting at the given day. diff --git a/llvm_tools/upload_lexan_crashes_to_forcey.py b/llvm_tools/upload_lexan_crashes_to_forcey.py new file mode 100755 index 00000000..9cf0c086 --- /dev/null +++ b/llvm_tools/upload_lexan_crashes_to_forcey.py @@ -0,0 +1,258 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# Copyright 2020 The Chromium OS Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""Fetches and submits the latest test-cases from Lexan's crash bucket.""" + +# pylint: disable=cros-logging-import + +import argparse +import contextlib +import datetime +import json +import logging +import os +import shutil +import subprocess +import sys +import tempfile +from typing import Generator, List, Iterable + +gsurl_base = 'gs://chrome-clang-crash-reports/v1' + + +def gsutil_ls(loc: str) -> List[str]: + results = subprocess.run(['gsutil', 'ls', loc], + stdout=subprocess.PIPE, + check=True, + encoding='utf-8') + return [l.strip() for l in results.stdout.splitlines()] + + +def gsurl_ls_last_numbers(url: str) -> List[int]: + return sorted(int(x.rstrip('/').split('/')[-1]) for x in gsutil_ls(url)) + + +def get_available_year_numbers() -> List[int]: + return gsurl_ls_last_numbers(gsurl_base) + + +def get_available_month_numbers(year: int) -> List[int]: + return gsurl_ls_last_numbers(f'{gsurl_base}/{year}') + + +def get_available_day_numbers(year: int, month: int) -> List[int]: + return gsurl_ls_last_numbers(f'{gsurl_base}/{year}/{month:02d}') + + +def get_available_test_case_urls(year: int, month: int, day: int) -> List[str]: + return gsutil_ls(f'{gsurl_base}/{year}/{month:02d}/{day:02d}') + + +def test_cases_on_or_after(date: datetime.datetime + ) -> Generator[str, None, None]: + """Yields all test-cases submitted on or after the given date.""" + for year in get_available_year_numbers(): + if year < date.year: + continue + + for month in get_available_month_numbers(year): + if year == date.year and month < date.month: + continue + + for day in get_available_day_numbers(year, month): + when = datetime.date(year, month, day) + if when < date: + continue + + yield when, get_available_test_case_urls(year, month, day) + + +def to_ymd(date: datetime.date) -> str: + return date.strftime('%Y-%m-%d') + + +def from_ymd(date_str: str) -> datetime.date: + return datetime.datetime.strptime(date_str, '%Y-%m-%d').date() + + +def persist_state(seen_urls: Iterable[str], state_file: str, + current_date: datetime.date): + tmp_state_file = state_file + '.tmp' + with open(tmp_state_file, 'w', encoding='utf-8') as f: + json.dump( + { + 'already_seen': sorted(seen_urls), + 'most_recent_date': to_ymd(current_date), + }, + f, + ) + os.rename(tmp_state_file, state_file) + + +@contextlib.contextmanager +def temp_dir() -> Generator[str, None, None]: + loc = tempfile.mkdtemp('lexan-autosubmit') + try: + yield loc + finally: + shutil.rmtree(loc) + + +def submit_test_case(gs_url: str, cr_tool: str) -> None: + logging.info('Submitting %s', gs_url) + suffix = os.path.splitext(gs_url)[1] + with temp_dir() as tempdir: + target_name = 'test_case' + suffix + target = os.path.join(tempdir, target_name) + subprocess.run(['gsutil', 'cp', gs_url, target], check=True) + subprocess.run(['tar', 'xaf', target_name], check=True, cwd=tempdir) + os.unlink(target) + + # Sometimes (e.g., in + # gs://chrome-clang-crash-reports/v1/2020/03/27/ + # chromium.clang-ToTiOS-12754-GTXToolKit-2bfcde.tgz) + # we'll get `.crash` files. Unclear why, but let's filter them out anyway. + repro_files = [x for x in os.listdir(tempdir) if not x.endswith('.crash')] + assert len(repro_files) == 2, repro_files + if repro_files[0].endswith('.sh'): + sh_file, src_file = repro_files + assert not src_file.endswith('.sh'), repro_files + else: + src_file, sh_file = repro_files + assert sh_file.endswith('.sh'), repro_files + + subprocess.run( + [ + cr_tool, + 'reduce', + '-stream=false', + '-wait=false', + '-note', + gs_url, + '-sh_file', + os.path.join(tempdir, sh_file), + '-src_file', + os.path.join(tempdir, src_file), + ], + check=True, + ) + + +def submit_new_test_cases( + last_seen_test_cases: Iterable[str], + earliest_date_to_check: datetime.date, + forcey: str, + state_file_path: str, +) -> None: + """Submits new test-cases to forcey. + + This will persist state after each test-case is submitted. + + Args: + last_seen_test_cases: test-cases which have been submitted already, and + should be skipped if seen again. + earliest_date_to_check: the earliest date we should consider test-cases + from. + forcey: path to the forcey binary. + state_file_path: path to our state file. + """ + # `all_test_cases_seen` is the union of all test-cases seen on this and prior + # invocations. It guarantees, in all cases we care about, that we won't + # submit the same test-case twice. `test_cases_seen_this_invocation` is + # persisted as "all of the test-cases we've seen on this and prior + # invocations" if we successfully submit _all_ test-cases. + # + # Since you can visualize the test-cases this script considers as a sliding + # window that only moves forward, if we saw a test-case on a prior iteration + # but no longer see it, we'll never see it again (since it fell out of our + # sliding window by being too old). Hence, keeping it around is + # pointless. + # + # We only persist this minimized set of test-cases if _everything_ succeeds, + # since if something fails below, there's a chance that we haven't revisited + # test-cases that we've already seen. + all_test_cases_seen = set(last_seen_test_cases) + test_cases_seen_this_invocation = [] + most_recent_date = earliest_date_to_check + for date, candidates in test_cases_on_or_after(earliest_date_to_check): + most_recent_date = max(most_recent_date, date) + + for url in candidates: + test_cases_seen_this_invocation.append(url) + if url in all_test_cases_seen: + continue + + all_test_cases_seen.add(url) + submit_test_case(url, forcey) + + # Persisting on each iteration of this loop isn't free, but it's the + # easiest way to not resubmit test-cases, and it's good to keep in mind + # that: + # - the state file will be small (<12KB, since it only keeps a few days + # worth of test-cases after the first run) + # - in addition to this, we're downloading+unzipping+reuploading multiple + # MB of test-case bytes. + # + # So comparatively, the overhead here probably isn't an issue. + persist_state(all_test_cases_seen, state_file_path, most_recent_date) + + persist_state(test_cases_seen_this_invocation, state_file_path, + most_recent_date) + + +def main(argv: List[str]): + logging.basicConfig( + format='>> %(asctime)s: %(levelname)s: %(filename)s:%(lineno)d: ' + '%(message)s', + level=logging.INFO, + ) + + my_dir = os.path.dirname(os.path.abspath(__file__)) + + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + '--state_file', default=os.path.join(my_dir, 'lexan-state.json')) + parser.add_argument( + '--last_date', + help='The earliest date that we care about. All test cases from here ' + 'on will be picked up. Format is YYYY-MM-DD.') + parser.add_argument( + '--4c', dest='forcey', required=True, help='Path to a 4c client binary') + opts = parser.parse_args(argv) + + forcey = opts.forcey + state_file = opts.state_file + last_date_str = opts.last_date + + os.makedirs(os.path.dirname(state_file), 0o755) + + if last_date_str is None: + with open(state_file, encoding='utf-8') as f: + data = json.load(f) + most_recent_date = from_ymd(data['most_recent_date']) + submit_new_test_cases( + last_seen_test_cases=data['already_seen'], + # Note that we always subtract one day from this to avoid a race: + # uploads may appear slightly out-of-order (or builders may lag, or + # ...), so the last test-case uploaded for 2020/01/01 might appear + # _after_ the first test-case for 2020/01/02. Assuming that builders + # won't lag behind for over a day, the easiest way to handle this is to + # always check the previous and current days. + earliest_date_to_check=most_recent_date - datetime.timedelta(days=1), + forcey=forcey, + state_file_path=state_file, + ) + else: + submit_new_test_cases( + last_seen_test_cases=(), + earliest_date_to_check=from_ymd(last_date_str), + forcey=forcey, + state_file_path=state_file, + ) + + +if __name__ == '__main__': + sys.exit(main(sys.argv[1:])) diff --git a/llvm_tools/upload_lexan_crashes_to_forcey_test.py b/llvm_tools/upload_lexan_crashes_to_forcey_test.py new file mode 100755 index 00000000..3c9c0d4b --- /dev/null +++ b/llvm_tools/upload_lexan_crashes_to_forcey_test.py @@ -0,0 +1,122 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# Copyright 2020 The Chromium OS Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""Tests for upload_lexan_crashes_to_forcey.""" + +import datetime +import unittest +import unittest.mock + +import upload_lexan_crashes_to_forcey + + +class Test(unittest.TestCase): + """Tests for upload_lexan_crashes_to_forcey.""" + + def test_date_parsing_functions(self): + self.assertEqual( + datetime.date(2020, 2, 1), + upload_lexan_crashes_to_forcey.from_ymd('2020-02-01')) + + @unittest.mock.patch( + 'upload_lexan_crashes_to_forcey.test_cases_on_or_after', + return_value=( + ( + datetime.date(2020, 1, 1), + ('gs://test-case-1', 'gs://test-case-1.1'), + ), + (datetime.date(2020, 1, 2), ('gs://test-case-2',)), + (datetime.date(2020, 1, 1), ('gs://test-case-3',)), + (datetime.date(2020, 1, 4), ('gs://test-case-4',)), + )) + @unittest.mock.patch('upload_lexan_crashes_to_forcey.submit_test_case') + @unittest.mock.patch('upload_lexan_crashes_to_forcey.persist_state') + def test_new_test_case_submission_functions(self, persist_state_mock, + submit_test_case_mock, + test_cases_on_or_after_mock): + forcey_path = '/path/to/4c' + real_state_file_path = '/path/to/state/file' + earliest_date = datetime.date(2020, 1, 1) + + persist_state_calls = [] + + # Since the set this gets is mutated, we need to copy it somehow. + def persist_state_side_effect(test_cases_to_persist, state_file_path, + most_recent_date): + self.assertEqual(state_file_path, real_state_file_path) + persist_state_calls.append( + (sorted(test_cases_to_persist), most_recent_date)) + + persist_state_mock.side_effect = persist_state_side_effect + + upload_lexan_crashes_to_forcey.submit_new_test_cases( + last_seen_test_cases=( + 'gs://test-case-0', + 'gs://test-case-1', + ), + earliest_date_to_check=earliest_date, + forcey=forcey_path, + state_file_path=real_state_file_path, + ) + + test_cases_on_or_after_mock.assert_called_once_with(earliest_date) + self.assertEqual(submit_test_case_mock.call_args_list, [ + unittest.mock.call('gs://test-case-1.1', forcey_path), + unittest.mock.call('gs://test-case-2', forcey_path), + unittest.mock.call('gs://test-case-3', forcey_path), + unittest.mock.call('gs://test-case-4', forcey_path), + ]) + + self.assertEqual(persist_state_calls, [ + ( + ['gs://test-case-0', 'gs://test-case-1', 'gs://test-case-1.1'], + datetime.date(2020, 1, 1), + ), + ( + [ + 'gs://test-case-0', + 'gs://test-case-1', + 'gs://test-case-1.1', + 'gs://test-case-2', + ], + datetime.date(2020, 1, 2), + ), + ( + [ + 'gs://test-case-0', + 'gs://test-case-1', + 'gs://test-case-1.1', + 'gs://test-case-2', + 'gs://test-case-3', + ], + datetime.date(2020, 1, 2), + ), + ( + [ + 'gs://test-case-0', + 'gs://test-case-1', + 'gs://test-case-1.1', + 'gs://test-case-2', + 'gs://test-case-3', + 'gs://test-case-4', + ], + datetime.date(2020, 1, 4), + ), + ( + [ + 'gs://test-case-1', + 'gs://test-case-1.1', + 'gs://test-case-2', + 'gs://test-case-3', + 'gs://test-case-4', + ], + datetime.date(2020, 1, 4), + ), + ]) + + +if __name__ == '__main__': + unittest.main() |