aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Benfield <mbenfield@google.com>2022-07-14 19:59:38 +0000
committerChromeos LUCI <chromeos-scoped@luci-project-accounts.iam.gserviceaccount.com>2022-08-15 21:29:05 +0000
commitc54fcb81c239264388222af4fdf5e4127f064c94 (patch)
tree7dd5d32b8ddf5edf5da90ea3890bea381eaa44ac
parentd7a612f02a7118897045e9664a1aa4d498b7f5ec (diff)
downloadtoolchain-utils-c54fcb81c239264388222af4fdf5e4127f064c94.tar.gz
pgo_rust.py: add
This is a Python script to handle all aspects of creating and benchmarking PGO profiles for the Rust compiler. Note that this script is not usable without crrev.com/c/3762719, but as no one else will need to use the script until the next Rust uprev, there seems to be no need for Cq-Depend. BUG=b:234046818 TEST=Used the script to enable PGO for Rust 1.62.1 Change-Id: I9a35cee3a2d6350a3a29b85c8d62ef450ce8fc22 Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/toolchain-utils/+/3763629 Commit-Queue: Michael Benfield <mbenfield@google.com> Tested-by: Michael Benfield <mbenfield@google.com> Reviewed-by: George Burgess <gbiv@chromium.org>
-rwxr-xr-xpgo_tools_rust/pgo_rust.py541
1 files changed, 541 insertions, 0 deletions
diff --git a/pgo_tools_rust/pgo_rust.py b/pgo_tools_rust/pgo_rust.py
new file mode 100755
index 00000000..5e09c1c0
--- /dev/null
+++ b/pgo_tools_rust/pgo_rust.py
@@ -0,0 +1,541 @@
+#!/usr/bin/env python3
+#
+# Copyright 2022 The ChromiumOS Authors.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Handle most aspects of creating and benchmarking PGO profiles for Rust.
+
+This is meant to be done at Rust uprev time. Ultimately profdata files need
+to be placed at
+
+gs://chromeos-localmirror/distfiles/rust-pgo-{rust_version}-frontend.profdata.tz
+and
+gs://chromeos-localmirror/distfiles/rust-pgo-{rust_version}-llvm.profdata.tz
+
+The intended flow is that you first get the new Rust version in a shape so that
+it builds, for instance modifying or adding patches as necessary. Note that if
+you need to generate manifests for dev-lang/rust and dev-lang/rust-host before
+the profdata files are created, which will cause the `ebuild manifest` command
+to fail. One way to handle this is to temporarily delete the lines of the
+variable SRC_URI in cros-rustc.eclass which refer to profdata files.
+
+After you have a new working Rust version, you can run the following.
+
+```
+$ ./pgo_rust.py generate # generate profdata files
+$ ./pgo_rust.py benchmark-pgo # benchmark with PGO
+$ ./pgo_rust.py benchmark-nopgo # benchmark without PGO
+$ ./pgo_rust.py upload-profdata # upload profdata to localmirror
+```
+
+The benchmark steps aren't strictly necessary, but are recommended and will
+upload benchmark data to
+
+gs://chromeos-toolchain-artifacts/rust-pgo/benchmarks/{rust_version}/
+
+Currently by default ripgrep 13.0.0 is used as both the crate to build using an
+instrumented Rust while generating profdata, and the crate to build to
+benchmark Rust. You may wish to experiment with other crates for either role.
+In that case upload your crate to
+
+gs://chromeos-toolchain-artifacts/rust-pgo/crates/{name}-{version}.tar.xz
+
+and use `--crate-name` and `--crate-version` to indicate which crate to build
+to generate profdata (or which crate's generated profdata to use), and
+`--bench-crate-name` to indicate which crate to build in benchmarks.
+
+Notes on various local and GS locations follow.
+
+Note that currently we need to keep separate profdata files for the LLVM and
+frontend components of Rust. This is because LLVM profdata is instrumented by
+the system LLVM, but Rust's profdata is instrumented by its own LLVM, which
+may have separate profdata.
+
+profdata files accessed by ebuilds must be stored in
+
+gs://chromeos-localmirror/distfiles
+
+Specifically, they go to
+
+gs://chromeos-localmirror/distfiles/rust-pgo-{rust-version}-llvm.profdata.xz
+
+gs://chromeos-localmirror/distfiles/
+ rust-pgo-{rust-version}-frontend.profdata.xz
+
+But we can store other data elsewhere, like gs://chromeos-toolchain-artifacts.
+
+GS locations:
+
+{GS_BASE}/crates/ - store crates we may use for generating profiles or
+benchmarking PGO optimized Rust compilers
+
+{GS_BASE}/benchmarks/{rust_version}/nopgo/
+ {bench_crate_name}-{bench_crate_version}-{triple}
+
+{GS_BASE}/benchmarks/{rust_version}/{crate_name}-{crate_version}/
+ {bench_crate_name}-{bench_crate_version}-{triple}
+
+Local locations:
+
+{LOCAL_BASE}/crates/
+
+{LOCAL_BASE}/llvm-profraw/
+
+{LOCAL_BASE}/frontend-profraw/
+
+{LOCAL_BASE}/profdata/{crate_name}-{crate_version}/llvm.profdata
+
+{LOCAL_BASE}/profdata/{crate_name}-{crate_version}/frontend.profdata
+
+{LOCAL_BASE}/benchmarks/{rust_version}/nopgo/
+ {bench_crate_name}-{bench_crate_version}-{triple}
+
+{LOCAL_BASE}/benchmarks/{rust_version}/{crate_name}-{crate_version}/
+ {bench_crate_name}-{bench_crate_version}-{triple}
+
+{LOCAL_BASE}/llvm.profdata - must go here to be used by Rust ebuild
+{LOCAL_BASE}/frontend.profdata - must go here to be used by Rust ebuild
+"""
+
+import argparse
+import contextlib
+import logging
+import os
+from pathlib import Path
+from pathlib import PurePosixPath
+import re
+import shutil
+import subprocess
+import sys
+from typing import Dict, List, Optional
+
+
+TARGET_TRIPLES = [
+ 'x86_64-cros-linux-gnu',
+ 'x86_64-pc-linux-gnu',
+ 'armv7a-cros-linux-gnueabihf',
+ 'aarch64-cros-linux-gnu',
+]
+
+LOCAL_BASE = Path('/tmp/rust-pgo')
+
+GS_BASE = PurePosixPath('/chromeos-toolchain-artifacts/rust-pgo')
+
+GS_DISTFILES = PurePosixPath('/chromeos-localmirror/distfiles')
+
+CRATE_NAME = 'ripgrep'
+
+CRATE_VERSION = '13.0.0'
+
+
+@contextlib.contextmanager
+def chdir(new_directory: Path):
+ initial_directory = Path.cwd()
+ os.chdir(new_directory)
+ try:
+ yield
+ finally:
+ os.chdir(initial_directory)
+
+
+def run(args: List,
+ *,
+ indent: int = 4,
+ env: Optional[Dict[str, str]] = None,
+ capture_stdout: bool = False,
+ message: bool = True) -> Optional[str]:
+ args = [str(arg) for arg in args]
+
+ if env is None:
+ new_env = os.environ
+ else:
+ new_env = os.environ.copy()
+ new_env.update(env)
+
+ if message:
+ if env is None:
+ logging.info('Running %s', args)
+ else:
+ logging.info('Running %s in environment %s', args, env)
+
+ result = subprocess.run(args,
+ env=new_env,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ encoding='utf-8',
+ check=False)
+
+ stdout = result.stdout
+ stderr = result.stderr
+ if indent != 0:
+ stdout = re.sub('^', ' ' * indent, stdout, flags=re.MULTILINE)
+ stderr = re.sub('^', ' ' * indent, stderr, flags=re.MULTILINE)
+
+ if capture_stdout:
+ ret = result.stdout
+ else:
+ logging.info('STDOUT:')
+ logging.info(stdout)
+ logging.info('STDERR:')
+ logging.info(stderr)
+ ret = None
+
+ result.check_returncode()
+
+ if message:
+ if env is None:
+ logging.info('Ran %s\n', args)
+ else:
+ logging.info('Ran %s in environment %s\n', args, env)
+
+ return ret
+
+
+def get_rust_version() -> str:
+ s = run(['rustc', '--version'], capture_stdout=True)
+ m = re.search(r'\d+\.\d+\.\d+', s)
+ assert m is not None, repr(s)
+ return m.group(0)
+
+
+def download_unpack_crate(*, crate_name: str, crate_version: str):
+ filename_no_extension = f'{crate_name}-{crate_version}'
+ gs_path = GS_BASE / 'crates' / f'{filename_no_extension}.tar.xz'
+ local_path = LOCAL_BASE / 'crates'
+ shutil.rmtree(local_path / f'{crate_name}-{crate_version}',
+ ignore_errors=True)
+ with chdir(local_path):
+ run(['gsutil.py', 'cp', f'gs:/{gs_path}', '.'])
+ run(['xz', '-d', f'{filename_no_extension}.tar.xz'])
+ run(['tar', 'xvf', f'{filename_no_extension}.tar'])
+
+
+def build_crate(*,
+ crate_name: str,
+ crate_version: str,
+ target_triple: str,
+ time_file: Optional[str] = None):
+ local_path = LOCAL_BASE / 'crates' / f'{crate_name}-{crate_version}'
+ with chdir(local_path):
+ Path('.cargo').mkdir(exist_ok=True)
+ with open('.cargo/config.toml', 'w') as f:
+ f.write('\n'.join((
+ '[source.crates-io]',
+ 'replace-with = "vendored-sources"',
+ '',
+ '[source.vendored-sources]',
+ 'directory = "vendor"',
+ '',
+ f'[target.{target_triple}]',
+ f'linker = "{target_triple}-clang"',
+ '',
+ "[target.'cfg(all())']",
+ 'rustflags = [',
+ ' "-Clto=thin",',
+ ' "-Cembed-bitcode=yes",',
+ ']',
+ )))
+
+ run(['cargo', 'clean'])
+
+ cargo_cmd = ['cargo', 'build', '--release', '--target', target_triple]
+
+ if time_file is None:
+ run(cargo_cmd)
+ else:
+ time_cmd = [
+ '/usr/bin/time', f'--output={time_file}',
+ '--format=wall time (s) %e\nuser time (s) %U\nmax RSS %M\n'
+ ]
+ run(time_cmd + cargo_cmd)
+
+
+def build_rust(*,
+ generate_frontend_profile: bool = False,
+ generate_llvm_profile: bool = False,
+ use_frontend_profile: bool = False,
+ use_llvm_profile: bool = False):
+
+ if use_frontend_profile or use_llvm_profile:
+ assert not generate_frontend_profile and not generate_llvm_profile, (
+ "Can't build a compiler to both use profile information and generate it"
+ )
+
+ assert not generate_frontend_profile or not generate_llvm_profile, (
+ "Can't generate both frontend and LLVM profile information")
+
+ use = '-rust_profile_frontend_use -rust_profile_llvm_use '
+ if generate_frontend_profile:
+ use += 'rust_profile_frontend_generate '
+ if generate_llvm_profile:
+ use += 'rust_profile_llvm_generate '
+ if use_frontend_profile:
+ use += 'rust_profile_frontend_use_local '
+ if use_llvm_profile:
+ use += 'rust_profile_llvm_use_local '
+
+ # -E to preserve our USE environment variable.
+ run(['sudo', '-E', 'emerge', 'dev-lang/rust', 'dev-lang/rust-host'],
+ env={'USE': use})
+
+
+def merge_profdata(llvm_or_frontend, *, source_directory: Path, dest: Path):
+ assert llvm_or_frontend in ('llvm', 'frontend')
+
+ # The two `llvm-profdata` programs come from different LLVM versions, and may
+ # support different versions of the profdata format, so make sure to use the
+ # right one.
+ llvm_profdata = ('/usr/bin/llvm-profdata' if llvm_or_frontend == 'llvm' else
+ '/usr/libexec/rust/llvm-profdata')
+
+ dest.parent.mkdir(parents=True, exist_ok=True)
+
+ files = list(source_directory.glob('*.profraw'))
+ run([llvm_profdata, 'merge', f'--output={dest}'] + files)
+
+
+def do_upload_profdata(*, source: Path, dest: PurePosixPath):
+ new_path = source.parent / source.name / '.xz'
+ run(['xz', '--keep', '--compress', '--force', source])
+ upload_file(source=new_path, dest=dest, public_read=True)
+
+
+def upload_file(*,
+ source: Path,
+ dest: PurePosixPath,
+ public_read: bool = False):
+ if public_read:
+ run(['gsutil.py', 'cp', '-a', 'public-read', source, f'gs:/{dest}'])
+ else:
+ run(['gsutil.py', 'cp', source, f'gs:/{dest}'])
+
+
+def maybe_download_crate(*, crate_name: str, crate_version: str):
+ directory = LOCAL_BASE / 'crates' / f'{crate_name}-{crate_version}'
+ if directory.is_dir():
+ logging.info('Crate already downloaded')
+ else:
+ logging.info('Downloading crate')
+ download_unpack_crate(crate_name=crate_name, crate_version=crate_version)
+
+
+def generate(args):
+ maybe_download_crate(crate_name=args.crate_name,
+ crate_version=args.crate_version)
+
+ llvm_dir = LOCAL_BASE / 'llvm-profraw'
+ shutil.rmtree(llvm_dir, ignore_errors=True)
+ frontend_dir = LOCAL_BASE / 'frontend-profraw'
+ shutil.rmtree(frontend_dir, ignore_errors=True)
+
+ logging.info('Building Rust instrumented for llvm')
+ build_rust(generate_llvm_profile=True)
+
+ llvm_dir.mkdir(parents=True, exist_ok=True)
+ for triple in TARGET_TRIPLES:
+ logging.info('Building crate with LLVM instrumentation, for triple %s',
+ triple)
+ build_crate(crate_name=args.crate_name,
+ crate_version=args.crate_version,
+ target_triple=triple)
+
+ logging.info('Merging LLVM profile data')
+ merge_profdata(
+ 'llvm',
+ source_directory=LOCAL_BASE / 'llvm-profraw',
+ dest=(LOCAL_BASE / 'profdata' /
+ f'{args.crate_name}-{args.crate_version}' / 'llvm.profdata'))
+
+ logging.info('Building Rust instrumented for frontend')
+ build_rust(generate_frontend_profile=True)
+
+ frontend_dir.mkdir(parents=True, exist_ok=True)
+ for triple in TARGET_TRIPLES:
+ logging.info('Building crate with frontend instrumentation, for triple %s',
+ triple)
+ build_crate(crate_name=args.crate_name,
+ crate_version=args.crate_version,
+ target_triple=triple)
+
+ logging.info('Merging frontend profile data')
+ merge_profdata(
+ 'frontend',
+ source_directory=LOCAL_BASE / 'frontend-profraw',
+ dest=(LOCAL_BASE / 'profdata' /
+ f'{args.crate_name}-{args.crate_version}' / 'frontend.profdata'))
+
+
+def benchmark_nopgo(args):
+ logging.info('Building Rust, no PGO')
+ build_rust()
+
+ time_directory = LOCAL_BASE / 'benchmarks' / 'nopgo'
+ logging.info('Benchmarking crate build with no PGO')
+ time_directory.mkdir(parents=True, exist_ok=True)
+ for triple in TARGET_TRIPLES:
+ build_crate(
+ crate_name=args.bench_crate_name,
+ crate_version=args.bench_crate_version,
+ target_triple=triple,
+ time_file=(
+ time_directory /
+ f'{args.bench_crate_name}-{args.bench_crate_version}-{triple}'))
+
+ rust_version = get_rust_version()
+ dest_directory = GS_BASE / 'benchmarks' / rust_version / 'nopgo'
+ logging.info('Uploading benchmark data')
+ for file in time_directory.iterdir():
+ upload_file(source=time_directory / file.name,
+ dest=dest_directory / file.name)
+
+
+def benchmark_pgo(args):
+ maybe_download_crate(crate_name=args.bench_crate_name,
+ crate_version=args.bench_crate_version)
+
+ files_dir = Path('/mnt/host/source/src/third_party/chromiumos-overlay',
+ 'dev-lang/rust/files')
+
+ logging.info('Copying profile data to be used in building Rust')
+ run([
+ 'cp',
+ (LOCAL_BASE / 'profdata' / f'{args.crate_name}-{args.crate_version}' /
+ 'llvm.profdata'), files_dir
+ ])
+ run([
+ 'cp',
+ (LOCAL_BASE / 'profdata' / f'{args.crate_name}-{args.crate_version}' /
+ 'frontend.profdata'), files_dir
+ ])
+
+ logging.info('Building Rust with PGO')
+ build_rust(use_llvm_profile=True, use_frontend_profile=True)
+
+ time_directory = (LOCAL_BASE / 'benchmarks' /
+ f'{args.crate_name}-{args.crate_version}')
+ time_directory.mkdir(parents=True, exist_ok=True)
+ logging.info('Benchmarking crate built with PGO')
+ for triple in TARGET_TRIPLES:
+ build_crate(
+ crate_name=args.bench_crate_name,
+ crate_version=args.bench_crate_version,
+ target_triple=triple,
+ time_file=(
+ time_directory /
+ f'{args.bench_crate_name}-{args.bench_crate_version}-{triple}'))
+
+ rust_version = get_rust_version()
+ dest_directory = (GS_BASE / 'benchmarks' / rust_version /
+ f'{args.crate_name}-{args.crate_version}')
+ logging.info('Uploading benchmark data')
+ for file in time_directory.iterdir():
+ upload_file(source=time_directory / file.name,
+ dest=dest_directory / file.name)
+
+
+def upload_profdata(args):
+ directory = (LOCAL_BASE / 'profdata /'
+ f'{args.crate_name}-{args.crate_version}')
+ rust_version = get_rust_version()
+
+ logging.info('Uploading LLVM profdata')
+ do_upload_profdata(source=directory / 'llvm.profdata',
+ dest=(GS_DISTFILES /
+ f'rust-pgo-{rust_version}-llvm.profdata.xz'))
+
+ logging.info('Uploading frontend profdata')
+ do_upload_profdata(source=directory / 'frontend.profdata',
+ dest=(GS_DISTFILES /
+ f'rust-pgo-{rust_version}-frontend.profdata.xz'))
+
+
+def main():
+ logging.basicConfig(stream=sys.stdout,
+ level=logging.NOTSET,
+ format='%(message)s')
+
+ parser = argparse.ArgumentParser(
+ prog=sys.argv[0],
+ description=__doc__,
+ formatter_class=argparse.RawDescriptionHelpFormatter)
+ subparsers = parser.add_subparsers(dest='command', help='')
+ subparsers.required = True
+
+ parser_generate = subparsers.add_parser(
+ 'generate',
+ help='Generate LLVM and frontend profdata files by building '
+ 'instrumented Rust compilers, and using them to build the '
+ 'indicated crate (downloading the crate if necessary).')
+ parser_generate.set_defaults(func=generate)
+ parser_generate.add_argument('--crate-name',
+ default=CRATE_NAME,
+ help='Name of the crate to build')
+ parser_generate.add_argument('--crate-version',
+ default=CRATE_VERSION,
+ help='Version of the crate to build')
+
+ parser_benchmark_nopgo = subparsers.add_parser(
+ 'benchmark-nopgo',
+ help='Build the Rust compiler without PGO, benchmark '
+ 'the build of the indicated crate, and upload '
+ 'the benchmark data.')
+ parser_benchmark_nopgo.set_defaults(func=benchmark_nopgo)
+ parser_benchmark_nopgo.add_argument(
+ '--bench-crate-name',
+ default=CRATE_NAME,
+ help='Name of the crate whose build to benchmark')
+ parser_benchmark_nopgo.add_argument(
+ '--bench-crate-version',
+ default=CRATE_VERSION,
+ help='Version of the crate whose benchmark to build')
+
+ parser_benchmark_pgo = subparsers.add_parser(
+ 'benchmark-pgo',
+ help='Build the Rust compiler using PGO with the indicated '
+ 'profdata files, benchmark the build of the indicated crate, '
+ 'and upload the benchmark data.')
+ parser_benchmark_pgo.set_defaults(func=benchmark_pgo)
+ parser_benchmark_pgo.add_argument(
+ '--bench-crate-name',
+ default=CRATE_NAME,
+ help='Name of the crate whose build to benchmark')
+ parser_benchmark_pgo.add_argument(
+ '--bench-crate-version',
+ default=CRATE_VERSION,
+ help='Version of the crate whose benchmark to build')
+ parser_benchmark_pgo.add_argument(
+ '--crate-name',
+ default=CRATE_NAME,
+ help='Name of the crate whose profile to use')
+ parser_benchmark_pgo.add_argument(
+ '--crate-version',
+ default=CRATE_VERSION,
+ help='Version of the crate whose profile to use')
+
+ parser_upload_profdata = subparsers.add_parser(
+ 'upload-profdata', help='Upload the profdata files')
+ parser_upload_profdata.set_defaults(func=upload_profdata)
+ parser_upload_profdata.add_argument(
+ '--crate-name',
+ default=CRATE_NAME,
+ help='Name of the crate whose profile to use')
+ parser_upload_profdata.add_argument(
+ '--crate-version',
+ default=CRATE_VERSION,
+ help='Version of the crate whose profile to use')
+
+ args = parser.parse_args()
+
+ (LOCAL_BASE / 'crates').mkdir(parents=True, exist_ok=True)
+ (LOCAL_BASE / 'llvm-profraw').mkdir(parents=True, exist_ok=True)
+ (LOCAL_BASE / 'frontend-profraw').mkdir(parents=True, exist_ok=True)
+ (LOCAL_BASE / 'benchmarks').mkdir(parents=True, exist_ok=True)
+
+ args.func(args)
+
+ return 0
+
+
+if __name__ == '__main__':
+ sys.exit(main())