diff options
author | Michael Benfield <mbenfield@google.com> | 2022-07-14 19:59:38 +0000 |
---|---|---|
committer | Chromeos LUCI <chromeos-scoped@luci-project-accounts.iam.gserviceaccount.com> | 2022-08-15 21:29:05 +0000 |
commit | c54fcb81c239264388222af4fdf5e4127f064c94 (patch) | |
tree | 7dd5d32b8ddf5edf5da90ea3890bea381eaa44ac | |
parent | d7a612f02a7118897045e9664a1aa4d498b7f5ec (diff) | |
download | toolchain-utils-c54fcb81c239264388222af4fdf5e4127f064c94.tar.gz |
pgo_rust.py: add
This is a Python script to handle all aspects of creating and
benchmarking PGO profiles for the Rust compiler.
Note that this script is not usable without crrev.com/c/3762719, but as
no one else will need to use the script until the next Rust uprev, there
seems to be no need for Cq-Depend.
BUG=b:234046818
TEST=Used the script to enable PGO for Rust 1.62.1
Change-Id: I9a35cee3a2d6350a3a29b85c8d62ef450ce8fc22
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/toolchain-utils/+/3763629
Commit-Queue: Michael Benfield <mbenfield@google.com>
Tested-by: Michael Benfield <mbenfield@google.com>
Reviewed-by: George Burgess <gbiv@chromium.org>
-rwxr-xr-x | pgo_tools_rust/pgo_rust.py | 541 |
1 files changed, 541 insertions, 0 deletions
diff --git a/pgo_tools_rust/pgo_rust.py b/pgo_tools_rust/pgo_rust.py new file mode 100755 index 00000000..5e09c1c0 --- /dev/null +++ b/pgo_tools_rust/pgo_rust.py @@ -0,0 +1,541 @@ +#!/usr/bin/env python3 +# +# Copyright 2022 The ChromiumOS Authors. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""Handle most aspects of creating and benchmarking PGO profiles for Rust. + +This is meant to be done at Rust uprev time. Ultimately profdata files need +to be placed at + +gs://chromeos-localmirror/distfiles/rust-pgo-{rust_version}-frontend.profdata.tz +and +gs://chromeos-localmirror/distfiles/rust-pgo-{rust_version}-llvm.profdata.tz + +The intended flow is that you first get the new Rust version in a shape so that +it builds, for instance modifying or adding patches as necessary. Note that if +you need to generate manifests for dev-lang/rust and dev-lang/rust-host before +the profdata files are created, which will cause the `ebuild manifest` command +to fail. One way to handle this is to temporarily delete the lines of the +variable SRC_URI in cros-rustc.eclass which refer to profdata files. + +After you have a new working Rust version, you can run the following. + +``` +$ ./pgo_rust.py generate # generate profdata files +$ ./pgo_rust.py benchmark-pgo # benchmark with PGO +$ ./pgo_rust.py benchmark-nopgo # benchmark without PGO +$ ./pgo_rust.py upload-profdata # upload profdata to localmirror +``` + +The benchmark steps aren't strictly necessary, but are recommended and will +upload benchmark data to + +gs://chromeos-toolchain-artifacts/rust-pgo/benchmarks/{rust_version}/ + +Currently by default ripgrep 13.0.0 is used as both the crate to build using an +instrumented Rust while generating profdata, and the crate to build to +benchmark Rust. You may wish to experiment with other crates for either role. +In that case upload your crate to + +gs://chromeos-toolchain-artifacts/rust-pgo/crates/{name}-{version}.tar.xz + +and use `--crate-name` and `--crate-version` to indicate which crate to build +to generate profdata (or which crate's generated profdata to use), and +`--bench-crate-name` to indicate which crate to build in benchmarks. + +Notes on various local and GS locations follow. + +Note that currently we need to keep separate profdata files for the LLVM and +frontend components of Rust. This is because LLVM profdata is instrumented by +the system LLVM, but Rust's profdata is instrumented by its own LLVM, which +may have separate profdata. + +profdata files accessed by ebuilds must be stored in + +gs://chromeos-localmirror/distfiles + +Specifically, they go to + +gs://chromeos-localmirror/distfiles/rust-pgo-{rust-version}-llvm.profdata.xz + +gs://chromeos-localmirror/distfiles/ + rust-pgo-{rust-version}-frontend.profdata.xz + +But we can store other data elsewhere, like gs://chromeos-toolchain-artifacts. + +GS locations: + +{GS_BASE}/crates/ - store crates we may use for generating profiles or +benchmarking PGO optimized Rust compilers + +{GS_BASE}/benchmarks/{rust_version}/nopgo/ + {bench_crate_name}-{bench_crate_version}-{triple} + +{GS_BASE}/benchmarks/{rust_version}/{crate_name}-{crate_version}/ + {bench_crate_name}-{bench_crate_version}-{triple} + +Local locations: + +{LOCAL_BASE}/crates/ + +{LOCAL_BASE}/llvm-profraw/ + +{LOCAL_BASE}/frontend-profraw/ + +{LOCAL_BASE}/profdata/{crate_name}-{crate_version}/llvm.profdata + +{LOCAL_BASE}/profdata/{crate_name}-{crate_version}/frontend.profdata + +{LOCAL_BASE}/benchmarks/{rust_version}/nopgo/ + {bench_crate_name}-{bench_crate_version}-{triple} + +{LOCAL_BASE}/benchmarks/{rust_version}/{crate_name}-{crate_version}/ + {bench_crate_name}-{bench_crate_version}-{triple} + +{LOCAL_BASE}/llvm.profdata - must go here to be used by Rust ebuild +{LOCAL_BASE}/frontend.profdata - must go here to be used by Rust ebuild +""" + +import argparse +import contextlib +import logging +import os +from pathlib import Path +from pathlib import PurePosixPath +import re +import shutil +import subprocess +import sys +from typing import Dict, List, Optional + + +TARGET_TRIPLES = [ + 'x86_64-cros-linux-gnu', + 'x86_64-pc-linux-gnu', + 'armv7a-cros-linux-gnueabihf', + 'aarch64-cros-linux-gnu', +] + +LOCAL_BASE = Path('/tmp/rust-pgo') + +GS_BASE = PurePosixPath('/chromeos-toolchain-artifacts/rust-pgo') + +GS_DISTFILES = PurePosixPath('/chromeos-localmirror/distfiles') + +CRATE_NAME = 'ripgrep' + +CRATE_VERSION = '13.0.0' + + +@contextlib.contextmanager +def chdir(new_directory: Path): + initial_directory = Path.cwd() + os.chdir(new_directory) + try: + yield + finally: + os.chdir(initial_directory) + + +def run(args: List, + *, + indent: int = 4, + env: Optional[Dict[str, str]] = None, + capture_stdout: bool = False, + message: bool = True) -> Optional[str]: + args = [str(arg) for arg in args] + + if env is None: + new_env = os.environ + else: + new_env = os.environ.copy() + new_env.update(env) + + if message: + if env is None: + logging.info('Running %s', args) + else: + logging.info('Running %s in environment %s', args, env) + + result = subprocess.run(args, + env=new_env, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + encoding='utf-8', + check=False) + + stdout = result.stdout + stderr = result.stderr + if indent != 0: + stdout = re.sub('^', ' ' * indent, stdout, flags=re.MULTILINE) + stderr = re.sub('^', ' ' * indent, stderr, flags=re.MULTILINE) + + if capture_stdout: + ret = result.stdout + else: + logging.info('STDOUT:') + logging.info(stdout) + logging.info('STDERR:') + logging.info(stderr) + ret = None + + result.check_returncode() + + if message: + if env is None: + logging.info('Ran %s\n', args) + else: + logging.info('Ran %s in environment %s\n', args, env) + + return ret + + +def get_rust_version() -> str: + s = run(['rustc', '--version'], capture_stdout=True) + m = re.search(r'\d+\.\d+\.\d+', s) + assert m is not None, repr(s) + return m.group(0) + + +def download_unpack_crate(*, crate_name: str, crate_version: str): + filename_no_extension = f'{crate_name}-{crate_version}' + gs_path = GS_BASE / 'crates' / f'{filename_no_extension}.tar.xz' + local_path = LOCAL_BASE / 'crates' + shutil.rmtree(local_path / f'{crate_name}-{crate_version}', + ignore_errors=True) + with chdir(local_path): + run(['gsutil.py', 'cp', f'gs:/{gs_path}', '.']) + run(['xz', '-d', f'{filename_no_extension}.tar.xz']) + run(['tar', 'xvf', f'{filename_no_extension}.tar']) + + +def build_crate(*, + crate_name: str, + crate_version: str, + target_triple: str, + time_file: Optional[str] = None): + local_path = LOCAL_BASE / 'crates' / f'{crate_name}-{crate_version}' + with chdir(local_path): + Path('.cargo').mkdir(exist_ok=True) + with open('.cargo/config.toml', 'w') as f: + f.write('\n'.join(( + '[source.crates-io]', + 'replace-with = "vendored-sources"', + '', + '[source.vendored-sources]', + 'directory = "vendor"', + '', + f'[target.{target_triple}]', + f'linker = "{target_triple}-clang"', + '', + "[target.'cfg(all())']", + 'rustflags = [', + ' "-Clto=thin",', + ' "-Cembed-bitcode=yes",', + ']', + ))) + + run(['cargo', 'clean']) + + cargo_cmd = ['cargo', 'build', '--release', '--target', target_triple] + + if time_file is None: + run(cargo_cmd) + else: + time_cmd = [ + '/usr/bin/time', f'--output={time_file}', + '--format=wall time (s) %e\nuser time (s) %U\nmax RSS %M\n' + ] + run(time_cmd + cargo_cmd) + + +def build_rust(*, + generate_frontend_profile: bool = False, + generate_llvm_profile: bool = False, + use_frontend_profile: bool = False, + use_llvm_profile: bool = False): + + if use_frontend_profile or use_llvm_profile: + assert not generate_frontend_profile and not generate_llvm_profile, ( + "Can't build a compiler to both use profile information and generate it" + ) + + assert not generate_frontend_profile or not generate_llvm_profile, ( + "Can't generate both frontend and LLVM profile information") + + use = '-rust_profile_frontend_use -rust_profile_llvm_use ' + if generate_frontend_profile: + use += 'rust_profile_frontend_generate ' + if generate_llvm_profile: + use += 'rust_profile_llvm_generate ' + if use_frontend_profile: + use += 'rust_profile_frontend_use_local ' + if use_llvm_profile: + use += 'rust_profile_llvm_use_local ' + + # -E to preserve our USE environment variable. + run(['sudo', '-E', 'emerge', 'dev-lang/rust', 'dev-lang/rust-host'], + env={'USE': use}) + + +def merge_profdata(llvm_or_frontend, *, source_directory: Path, dest: Path): + assert llvm_or_frontend in ('llvm', 'frontend') + + # The two `llvm-profdata` programs come from different LLVM versions, and may + # support different versions of the profdata format, so make sure to use the + # right one. + llvm_profdata = ('/usr/bin/llvm-profdata' if llvm_or_frontend == 'llvm' else + '/usr/libexec/rust/llvm-profdata') + + dest.parent.mkdir(parents=True, exist_ok=True) + + files = list(source_directory.glob('*.profraw')) + run([llvm_profdata, 'merge', f'--output={dest}'] + files) + + +def do_upload_profdata(*, source: Path, dest: PurePosixPath): + new_path = source.parent / source.name / '.xz' + run(['xz', '--keep', '--compress', '--force', source]) + upload_file(source=new_path, dest=dest, public_read=True) + + +def upload_file(*, + source: Path, + dest: PurePosixPath, + public_read: bool = False): + if public_read: + run(['gsutil.py', 'cp', '-a', 'public-read', source, f'gs:/{dest}']) + else: + run(['gsutil.py', 'cp', source, f'gs:/{dest}']) + + +def maybe_download_crate(*, crate_name: str, crate_version: str): + directory = LOCAL_BASE / 'crates' / f'{crate_name}-{crate_version}' + if directory.is_dir(): + logging.info('Crate already downloaded') + else: + logging.info('Downloading crate') + download_unpack_crate(crate_name=crate_name, crate_version=crate_version) + + +def generate(args): + maybe_download_crate(crate_name=args.crate_name, + crate_version=args.crate_version) + + llvm_dir = LOCAL_BASE / 'llvm-profraw' + shutil.rmtree(llvm_dir, ignore_errors=True) + frontend_dir = LOCAL_BASE / 'frontend-profraw' + shutil.rmtree(frontend_dir, ignore_errors=True) + + logging.info('Building Rust instrumented for llvm') + build_rust(generate_llvm_profile=True) + + llvm_dir.mkdir(parents=True, exist_ok=True) + for triple in TARGET_TRIPLES: + logging.info('Building crate with LLVM instrumentation, for triple %s', + triple) + build_crate(crate_name=args.crate_name, + crate_version=args.crate_version, + target_triple=triple) + + logging.info('Merging LLVM profile data') + merge_profdata( + 'llvm', + source_directory=LOCAL_BASE / 'llvm-profraw', + dest=(LOCAL_BASE / 'profdata' / + f'{args.crate_name}-{args.crate_version}' / 'llvm.profdata')) + + logging.info('Building Rust instrumented for frontend') + build_rust(generate_frontend_profile=True) + + frontend_dir.mkdir(parents=True, exist_ok=True) + for triple in TARGET_TRIPLES: + logging.info('Building crate with frontend instrumentation, for triple %s', + triple) + build_crate(crate_name=args.crate_name, + crate_version=args.crate_version, + target_triple=triple) + + logging.info('Merging frontend profile data') + merge_profdata( + 'frontend', + source_directory=LOCAL_BASE / 'frontend-profraw', + dest=(LOCAL_BASE / 'profdata' / + f'{args.crate_name}-{args.crate_version}' / 'frontend.profdata')) + + +def benchmark_nopgo(args): + logging.info('Building Rust, no PGO') + build_rust() + + time_directory = LOCAL_BASE / 'benchmarks' / 'nopgo' + logging.info('Benchmarking crate build with no PGO') + time_directory.mkdir(parents=True, exist_ok=True) + for triple in TARGET_TRIPLES: + build_crate( + crate_name=args.bench_crate_name, + crate_version=args.bench_crate_version, + target_triple=triple, + time_file=( + time_directory / + f'{args.bench_crate_name}-{args.bench_crate_version}-{triple}')) + + rust_version = get_rust_version() + dest_directory = GS_BASE / 'benchmarks' / rust_version / 'nopgo' + logging.info('Uploading benchmark data') + for file in time_directory.iterdir(): + upload_file(source=time_directory / file.name, + dest=dest_directory / file.name) + + +def benchmark_pgo(args): + maybe_download_crate(crate_name=args.bench_crate_name, + crate_version=args.bench_crate_version) + + files_dir = Path('/mnt/host/source/src/third_party/chromiumos-overlay', + 'dev-lang/rust/files') + + logging.info('Copying profile data to be used in building Rust') + run([ + 'cp', + (LOCAL_BASE / 'profdata' / f'{args.crate_name}-{args.crate_version}' / + 'llvm.profdata'), files_dir + ]) + run([ + 'cp', + (LOCAL_BASE / 'profdata' / f'{args.crate_name}-{args.crate_version}' / + 'frontend.profdata'), files_dir + ]) + + logging.info('Building Rust with PGO') + build_rust(use_llvm_profile=True, use_frontend_profile=True) + + time_directory = (LOCAL_BASE / 'benchmarks' / + f'{args.crate_name}-{args.crate_version}') + time_directory.mkdir(parents=True, exist_ok=True) + logging.info('Benchmarking crate built with PGO') + for triple in TARGET_TRIPLES: + build_crate( + crate_name=args.bench_crate_name, + crate_version=args.bench_crate_version, + target_triple=triple, + time_file=( + time_directory / + f'{args.bench_crate_name}-{args.bench_crate_version}-{triple}')) + + rust_version = get_rust_version() + dest_directory = (GS_BASE / 'benchmarks' / rust_version / + f'{args.crate_name}-{args.crate_version}') + logging.info('Uploading benchmark data') + for file in time_directory.iterdir(): + upload_file(source=time_directory / file.name, + dest=dest_directory / file.name) + + +def upload_profdata(args): + directory = (LOCAL_BASE / 'profdata /' + f'{args.crate_name}-{args.crate_version}') + rust_version = get_rust_version() + + logging.info('Uploading LLVM profdata') + do_upload_profdata(source=directory / 'llvm.profdata', + dest=(GS_DISTFILES / + f'rust-pgo-{rust_version}-llvm.profdata.xz')) + + logging.info('Uploading frontend profdata') + do_upload_profdata(source=directory / 'frontend.profdata', + dest=(GS_DISTFILES / + f'rust-pgo-{rust_version}-frontend.profdata.xz')) + + +def main(): + logging.basicConfig(stream=sys.stdout, + level=logging.NOTSET, + format='%(message)s') + + parser = argparse.ArgumentParser( + prog=sys.argv[0], + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + subparsers = parser.add_subparsers(dest='command', help='') + subparsers.required = True + + parser_generate = subparsers.add_parser( + 'generate', + help='Generate LLVM and frontend profdata files by building ' + 'instrumented Rust compilers, and using them to build the ' + 'indicated crate (downloading the crate if necessary).') + parser_generate.set_defaults(func=generate) + parser_generate.add_argument('--crate-name', + default=CRATE_NAME, + help='Name of the crate to build') + parser_generate.add_argument('--crate-version', + default=CRATE_VERSION, + help='Version of the crate to build') + + parser_benchmark_nopgo = subparsers.add_parser( + 'benchmark-nopgo', + help='Build the Rust compiler without PGO, benchmark ' + 'the build of the indicated crate, and upload ' + 'the benchmark data.') + parser_benchmark_nopgo.set_defaults(func=benchmark_nopgo) + parser_benchmark_nopgo.add_argument( + '--bench-crate-name', + default=CRATE_NAME, + help='Name of the crate whose build to benchmark') + parser_benchmark_nopgo.add_argument( + '--bench-crate-version', + default=CRATE_VERSION, + help='Version of the crate whose benchmark to build') + + parser_benchmark_pgo = subparsers.add_parser( + 'benchmark-pgo', + help='Build the Rust compiler using PGO with the indicated ' + 'profdata files, benchmark the build of the indicated crate, ' + 'and upload the benchmark data.') + parser_benchmark_pgo.set_defaults(func=benchmark_pgo) + parser_benchmark_pgo.add_argument( + '--bench-crate-name', + default=CRATE_NAME, + help='Name of the crate whose build to benchmark') + parser_benchmark_pgo.add_argument( + '--bench-crate-version', + default=CRATE_VERSION, + help='Version of the crate whose benchmark to build') + parser_benchmark_pgo.add_argument( + '--crate-name', + default=CRATE_NAME, + help='Name of the crate whose profile to use') + parser_benchmark_pgo.add_argument( + '--crate-version', + default=CRATE_VERSION, + help='Version of the crate whose profile to use') + + parser_upload_profdata = subparsers.add_parser( + 'upload-profdata', help='Upload the profdata files') + parser_upload_profdata.set_defaults(func=upload_profdata) + parser_upload_profdata.add_argument( + '--crate-name', + default=CRATE_NAME, + help='Name of the crate whose profile to use') + parser_upload_profdata.add_argument( + '--crate-version', + default=CRATE_VERSION, + help='Version of the crate whose profile to use') + + args = parser.parse_args() + + (LOCAL_BASE / 'crates').mkdir(parents=True, exist_ok=True) + (LOCAL_BASE / 'llvm-profraw').mkdir(parents=True, exist_ok=True) + (LOCAL_BASE / 'frontend-profraw').mkdir(parents=True, exist_ok=True) + (LOCAL_BASE / 'benchmarks').mkdir(parents=True, exist_ok=True) + + args.func(args) + + return 0 + + +if __name__ == '__main__': + sys.exit(main()) |