aboutsummaryrefslogtreecommitdiff
path: root/pgo_tools_rust/pgo_rust.py
diff options
context:
space:
mode:
Diffstat (limited to 'pgo_tools_rust/pgo_rust.py')
-rwxr-xr-xpgo_tools_rust/pgo_rust.py652
1 files changed, 652 insertions, 0 deletions
diff --git a/pgo_tools_rust/pgo_rust.py b/pgo_tools_rust/pgo_rust.py
new file mode 100755
index 00000000..298c343f
--- /dev/null
+++ b/pgo_tools_rust/pgo_rust.py
@@ -0,0 +1,652 @@
+#!/usr/bin/env python3
+#
+# Copyright 2022 The ChromiumOS Authors
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Handle most aspects of creating and benchmarking PGO profiles for Rust.
+
+This is meant to be done at Rust uprev time. Ultimately profdata files need
+to be placed at
+
+gs://chromeos-localmirror/distfiles/rust-pgo-{rust_version}-frontend.profdata{s}.tz
+and
+gs://chromeos-localmirror/distfiles/rust-pgo-{rust_version}-llvm.profdata{s}.tz
+
+Here {s} is an optional suffix to distinguish between profdata files on the same
+Rust version.
+
+The intended flow is that you first get the new Rust version in a shape so that
+it builds, for instance modifying or adding patches as necessary. Note that if
+you need to generate manifests for dev-lang/rust and dev-lang/rust-host before
+the profdata files are created, which will cause the `ebuild manifest` command
+to fail. One way to handle this is to temporarily delete the lines of the
+variable SRC_URI in cros-rustc.eclass which refer to profdata files.
+
+After you have a new working Rust version, you can run the following.
+
+```
+$ ./pgo_rust.py generate # generate profdata files
+$ ./pgo_rust.py benchmark-pgo # benchmark with PGO
+$ ./pgo_rust.py benchmark-nopgo # benchmark without PGO
+$ ./pgo_rust.py upload-profdata # upload profdata to localmirror
+```
+
+The benchmark steps aren't strictly necessary, but are recommended and will
+upload benchmark data to
+
+gs://chromeos-toolchain-artifacts/rust-pgo/benchmarks/{rust_version}/
+
+Currently by default ripgrep 13.0.0 is used as both the crate to build using an
+instrumented Rust while generating profdata, and the crate to build to
+benchmark Rust. You may wish to experiment with other crates for either role.
+In that case upload your crate to
+
+gs://chromeos-toolchain-artifacts/rust-pgo/crates/{name}-{version}.tar.xz
+
+and use `--crate-name` and `--crate-version` to indicate which crate to build
+to generate profdata (or which crate's generated profdata to use), and
+`--bench-crate-name` to indicate which crate to build in benchmarks.
+
+Notes on various local and GS locations follow.
+
+Note that currently we need to keep separate profdata files for the LLVM and
+frontend components of Rust. This is because LLVM profdata is instrumented by
+the system LLVM, but Rust's profdata is instrumented by its own LLVM, which
+may have separate profdata.
+
+profdata files accessed by ebuilds must be stored in
+
+gs://chromeos-localmirror/distfiles
+
+Specifically, they go to
+
+gs://chromeos-localmirror/distfiles/rust-pgo-{rust-version}-llvm.profdata.xz
+
+gs://chromeos-localmirror/distfiles/
+ rust-pgo-{rust-version}-frontend.profdata.xz
+
+But we can store other data elsewhere, like gs://chromeos-toolchain-artifacts.
+
+GS locations:
+
+{GS_BASE}/crates/ - store crates we may use for generating profiles or
+benchmarking PGO optimized Rust compilers
+
+{GS_BASE}/benchmarks/{rust_version}/nopgo/
+ {bench_crate_name}-{bench_crate_version}-{triple}
+
+{GS_BASE}/benchmarks/{rust_version}/{crate_name}-{crate_version}/
+ {bench_crate_name}-{bench_crate_version}-{triple}
+
+Local locations:
+
+{LOCAL_BASE}/crates/
+
+{LOCAL_BASE}/llvm-profraw/
+
+{LOCAL_BASE}/frontend-profraw/
+
+{LOCAL_BASE}/profdata/{crate_name}-{crate_version}/llvm.profdata
+
+{LOCAL_BASE}/profdata/{crate_name}-{crate_version}/frontend.profdata
+
+{LOCAL_BASE}/benchmarks/{rust_version}/nopgo/
+ {bench_crate_name}-{bench_crate_version}-{triple}
+
+{LOCAL_BASE}/benchmarks/{rust_version}/{crate_name}-{crate_version}/
+ {bench_crate_name}-{bench_crate_version}-{triple}
+
+{LOCAL_BASE}/llvm.profdata - must go here to be used by Rust ebuild
+{LOCAL_BASE}/frontend.profdata - must go here to be used by Rust ebuild
+"""
+
+import argparse
+import contextlib
+import logging
+import os
+from pathlib import Path
+from pathlib import PurePosixPath
+import re
+import shutil
+import subprocess
+import sys
+from typing import Dict, List, Optional
+
+
+TARGET_TRIPLES = [
+ "x86_64-cros-linux-gnu",
+ "x86_64-pc-linux-gnu",
+ "armv7a-cros-linux-gnueabihf",
+ "aarch64-cros-linux-gnu",
+]
+
+LOCAL_BASE = Path("/tmp/rust-pgo")
+
+GS_BASE = PurePosixPath("/chromeos-toolchain-artifacts/rust-pgo")
+
+GS_DISTFILES = PurePosixPath("/chromeos-localmirror/distfiles")
+
+CRATE_NAME = "ripgrep"
+
+CRATE_VERSION = "13.0.0"
+
+
+@contextlib.contextmanager
+def chdir(new_directory: Path):
+ initial_directory = Path.cwd()
+ os.chdir(new_directory)
+ try:
+ yield
+ finally:
+ os.chdir(initial_directory)
+
+
+def run(
+ args: List,
+ *,
+ indent: int = 4,
+ env: Optional[Dict[str, str]] = None,
+ capture_stdout: bool = False,
+ message: bool = True,
+) -> Optional[str]:
+ args = [str(arg) for arg in args]
+
+ if env is None:
+ new_env = os.environ
+ else:
+ new_env = os.environ.copy()
+ new_env.update(env)
+
+ if message:
+ if env is None:
+ logging.info("Running %s", args)
+ else:
+ logging.info("Running %s in environment %s", args, env)
+
+ result = subprocess.run(
+ args,
+ env=new_env,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ encoding="utf-8",
+ check=False,
+ )
+
+ stdout = result.stdout
+ stderr = result.stderr
+ if indent != 0:
+ stdout = re.sub("^", " " * indent, stdout, flags=re.MULTILINE)
+ stderr = re.sub("^", " " * indent, stderr, flags=re.MULTILINE)
+
+ if capture_stdout:
+ ret = result.stdout
+ else:
+ logging.info("STDOUT:")
+ logging.info(stdout)
+ logging.info("STDERR:")
+ logging.info(stderr)
+ ret = None
+
+ result.check_returncode()
+
+ if message:
+ if env is None:
+ logging.info("Ran %s\n", args)
+ else:
+ logging.info("Ran %s in environment %s\n", args, env)
+
+ return ret
+
+
+def get_rust_version() -> str:
+ s = run(["rustc", "--version"], capture_stdout=True)
+ m = re.search(r"\d+\.\d+\.\d+", s)
+ assert m is not None, repr(s)
+ return m.group(0)
+
+
+def download_unpack_crate(*, crate_name: str, crate_version: str):
+ filename_no_extension = f"{crate_name}-{crate_version}"
+ gs_path = GS_BASE / "crates" / f"{filename_no_extension}.tar.xz"
+ local_path = LOCAL_BASE / "crates"
+ shutil.rmtree(
+ local_path / f"{crate_name}-{crate_version}", ignore_errors=True
+ )
+ with chdir(local_path):
+ run(["gsutil.py", "cp", f"gs:/{gs_path}", "."])
+ run(["xz", "-d", f"{filename_no_extension}.tar.xz"])
+ run(["tar", "xvf", f"{filename_no_extension}.tar"])
+
+
+def build_crate(
+ *,
+ crate_name: str,
+ crate_version: str,
+ target_triple: str,
+ time_file: Optional[str] = None,
+):
+ local_path = LOCAL_BASE / "crates" / f"{crate_name}-{crate_version}"
+ with chdir(local_path):
+ Path(".cargo").mkdir(exist_ok=True)
+ with open(".cargo/config.toml", "w") as f:
+ f.write(
+ "\n".join(
+ (
+ "[source.crates-io]",
+ 'replace-with = "vendored-sources"',
+ "",
+ "[source.vendored-sources]",
+ 'directory = "vendor"',
+ "",
+ f"[target.{target_triple}]",
+ f'linker = "{target_triple}-clang"',
+ "",
+ "[target.'cfg(all())']",
+ "rustflags = [",
+ ' "-Clto=thin",',
+ ' "-Cembed-bitcode=yes",',
+ "]",
+ )
+ )
+ )
+
+ run(["cargo", "clean"])
+
+ cargo_cmd = ["cargo", "build", "--release", "--target", target_triple]
+
+ if time_file is None:
+ run(cargo_cmd)
+ else:
+ time_cmd = [
+ "/usr/bin/time",
+ f"--output={time_file}",
+ "--format=wall time (s) %e\nuser time (s) %U\nmax RSS %M\n",
+ ]
+ run(time_cmd + cargo_cmd)
+
+
+def build_rust(
+ *,
+ generate_frontend_profile: bool = False,
+ generate_llvm_profile: bool = False,
+ use_frontend_profile: bool = False,
+ use_llvm_profile: bool = False,
+):
+
+ if use_frontend_profile or use_llvm_profile:
+ assert (
+ not generate_frontend_profile and not generate_llvm_profile
+ ), "Can't build a compiler to both use profile information and generate it"
+
+ assert (
+ not generate_frontend_profile or not generate_llvm_profile
+ ), "Can't generate both frontend and LLVM profile information"
+
+ use = "-rust_profile_frontend_use -rust_profile_llvm_use "
+ if generate_frontend_profile:
+ use += "rust_profile_frontend_generate "
+ if generate_llvm_profile:
+ use += "rust_profile_llvm_generate "
+ if use_frontend_profile:
+ use += "rust_profile_frontend_use_local "
+ if use_llvm_profile:
+ use += "rust_profile_llvm_use_local "
+
+ # -E to preserve our USE environment variable.
+ run(
+ ["sudo", "-E", "emerge", "dev-lang/rust", "dev-lang/rust-host"],
+ env={"USE": use},
+ )
+
+
+def merge_profdata(llvm_or_frontend, *, source_directory: Path, dest: Path):
+ assert llvm_or_frontend in ("llvm", "frontend")
+
+ # The two `llvm-profdata` programs come from different LLVM versions, and may
+ # support different versions of the profdata format, so make sure to use the
+ # right one.
+ llvm_profdata = (
+ "/usr/bin/llvm-profdata"
+ if llvm_or_frontend == "llvm"
+ else "/usr/libexec/rust/llvm-profdata"
+ )
+
+ dest.parent.mkdir(parents=True, exist_ok=True)
+
+ files = list(source_directory.glob("*.profraw"))
+ run([llvm_profdata, "merge", f"--output={dest}"] + files)
+
+
+def do_upload_profdata(*, source: Path, dest: PurePosixPath):
+ new_path = source.parent / (source.name + ".xz")
+ run(["xz", "--keep", "--compress", "--force", source])
+ upload_file(source=new_path, dest=dest, public_read=True)
+
+
+def upload_file(
+ *, source: Path, dest: PurePosixPath, public_read: bool = False
+):
+ if public_read:
+ run(["gsutil.py", "cp", "-a", "public-read", source, f"gs:/{dest}"])
+ else:
+ run(["gsutil.py", "cp", source, f"gs:/{dest}"])
+
+
+def maybe_download_crate(*, crate_name: str, crate_version: str):
+ directory = LOCAL_BASE / "crates" / f"{crate_name}-{crate_version}"
+ if directory.is_dir():
+ logging.info("Crate already downloaded")
+ else:
+ logging.info("Downloading crate")
+ download_unpack_crate(
+ crate_name=crate_name, crate_version=crate_version
+ )
+
+
+def generate(args):
+ maybe_download_crate(
+ crate_name=args.crate_name, crate_version=args.crate_version
+ )
+
+ llvm_dir = LOCAL_BASE / "llvm-profraw"
+ shutil.rmtree(llvm_dir, ignore_errors=True)
+ frontend_dir = LOCAL_BASE / "frontend-profraw"
+ shutil.rmtree(frontend_dir, ignore_errors=True)
+
+ logging.info("Building Rust instrumented for llvm")
+ build_rust(generate_llvm_profile=True)
+
+ llvm_dir.mkdir(parents=True, exist_ok=True)
+ for triple in TARGET_TRIPLES:
+ logging.info(
+ "Building crate with LLVM instrumentation, for triple %s", triple
+ )
+ build_crate(
+ crate_name=args.crate_name,
+ crate_version=args.crate_version,
+ target_triple=triple,
+ )
+
+ logging.info("Merging LLVM profile data")
+ merge_profdata(
+ "llvm",
+ source_directory=LOCAL_BASE / "llvm-profraw",
+ dest=(
+ LOCAL_BASE
+ / "profdata"
+ / f"{args.crate_name}-{args.crate_version}"
+ / "llvm.profdata"
+ ),
+ )
+
+ logging.info("Building Rust instrumented for frontend")
+ build_rust(generate_frontend_profile=True)
+
+ frontend_dir.mkdir(parents=True, exist_ok=True)
+ for triple in TARGET_TRIPLES:
+ logging.info(
+ "Building crate with frontend instrumentation, for triple %s",
+ triple,
+ )
+ build_crate(
+ crate_name=args.crate_name,
+ crate_version=args.crate_version,
+ target_triple=triple,
+ )
+
+ logging.info("Merging frontend profile data")
+ merge_profdata(
+ "frontend",
+ source_directory=LOCAL_BASE / "frontend-profraw",
+ dest=(
+ LOCAL_BASE
+ / "profdata"
+ / f"{args.crate_name}-{args.crate_version}"
+ / "frontend.profdata"
+ ),
+ )
+
+
+def benchmark_nopgo(args):
+ logging.info("Building Rust, no PGO")
+ build_rust()
+
+ time_directory = LOCAL_BASE / "benchmarks" / "nopgo"
+ logging.info("Benchmarking crate build with no PGO")
+ time_directory.mkdir(parents=True, exist_ok=True)
+ for triple in TARGET_TRIPLES:
+ build_crate(
+ crate_name=args.bench_crate_name,
+ crate_version=args.bench_crate_version,
+ target_triple=triple,
+ time_file=(
+ time_directory
+ / f"{args.bench_crate_name}-{args.bench_crate_version}-{triple}"
+ ),
+ )
+
+ rust_version = get_rust_version()
+ dest_directory = (
+ GS_BASE / "benchmarks" / rust_version / f"nopgo{args.suffix}"
+ )
+ logging.info("Uploading benchmark data")
+ for file in time_directory.iterdir():
+ upload_file(
+ source=time_directory / file.name, dest=dest_directory / file.name
+ )
+
+
+def benchmark_pgo(args):
+ maybe_download_crate(
+ crate_name=args.bench_crate_name, crate_version=args.bench_crate_version
+ )
+
+ files_dir = Path(
+ "/mnt/host/source/src/third_party/chromiumos-overlay",
+ "dev-lang/rust/files",
+ )
+
+ logging.info("Copying profile data to be used in building Rust")
+ run(
+ [
+ "cp",
+ (
+ LOCAL_BASE
+ / "profdata"
+ / f"{args.crate_name}-{args.crate_version}"
+ / "llvm.profdata"
+ ),
+ files_dir,
+ ]
+ )
+ run(
+ [
+ "cp",
+ (
+ LOCAL_BASE
+ / "profdata"
+ / f"{args.crate_name}-{args.crate_version}"
+ / "frontend.profdata"
+ ),
+ files_dir,
+ ]
+ )
+
+ logging.info("Building Rust with PGO")
+ build_rust(use_llvm_profile=True, use_frontend_profile=True)
+
+ time_directory = (
+ LOCAL_BASE / "benchmarks" / f"{args.crate_name}-{args.crate_version}"
+ )
+ time_directory.mkdir(parents=True, exist_ok=True)
+ logging.info("Benchmarking crate built with PGO")
+ for triple in TARGET_TRIPLES:
+ build_crate(
+ crate_name=args.bench_crate_name,
+ crate_version=args.bench_crate_version,
+ target_triple=triple,
+ time_file=(
+ time_directory
+ / f"{args.bench_crate_name}-{args.bench_crate_version}-{triple}"
+ ),
+ )
+
+ rust_version = get_rust_version()
+ dest_directory = (
+ GS_BASE
+ / "benchmarks"
+ / rust_version
+ / f"{args.crate_name}-{args.crate_version}{args.suffix}"
+ )
+ logging.info("Uploading benchmark data")
+ for file in time_directory.iterdir():
+ upload_file(
+ source=time_directory / file.name, dest=dest_directory / file.name
+ )
+
+
+def upload_profdata(args):
+ directory = (
+ LOCAL_BASE / "profdata" / f"{args.crate_name}-{args.crate_version}"
+ )
+ rust_version = get_rust_version()
+
+ logging.info("Uploading LLVM profdata")
+ do_upload_profdata(
+ source=directory / "llvm.profdata",
+ dest=(
+ GS_DISTFILES
+ / f"rust-pgo-{rust_version}-llvm{args.suffix}.profdata.xz"
+ ),
+ )
+
+ logging.info("Uploading frontend profdata")
+ do_upload_profdata(
+ source=directory / "frontend.profdata",
+ dest=(
+ GS_DISTFILES
+ / f"rust-pgo-{rust_version}-frontend{args.suffix}.profdata.xz"
+ ),
+ )
+
+
+def main():
+ logging.basicConfig(
+ stream=sys.stdout, level=logging.NOTSET, format="%(message)s"
+ )
+
+ parser = argparse.ArgumentParser(
+ prog=sys.argv[0],
+ description=__doc__,
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ )
+ subparsers = parser.add_subparsers(dest="command", help="")
+ subparsers.required = True
+
+ parser_generate = subparsers.add_parser(
+ "generate",
+ help="Generate LLVM and frontend profdata files by building "
+ "instrumented Rust compilers, and using them to build the "
+ "indicated crate (downloading the crate if necessary).",
+ )
+ parser_generate.set_defaults(func=generate)
+ parser_generate.add_argument(
+ "--crate-name", default=CRATE_NAME, help="Name of the crate to build"
+ )
+ parser_generate.add_argument(
+ "--crate-version",
+ default=CRATE_VERSION,
+ help="Version of the crate to build",
+ )
+
+ parser_benchmark_nopgo = subparsers.add_parser(
+ "benchmark-nopgo",
+ help="Build the Rust compiler without PGO, benchmark "
+ "the build of the indicated crate, and upload "
+ "the benchmark data.",
+ )
+ parser_benchmark_nopgo.set_defaults(func=benchmark_nopgo)
+ parser_benchmark_nopgo.add_argument(
+ "--bench-crate-name",
+ default=CRATE_NAME,
+ help="Name of the crate whose build to benchmark",
+ )
+ parser_benchmark_nopgo.add_argument(
+ "--bench-crate-version",
+ default=CRATE_VERSION,
+ help="Version of the crate whose benchmark to build",
+ )
+ parser_benchmark_nopgo.add_argument(
+ "--suffix",
+ default="",
+ help="Suffix to distinguish benchmarks and profdata with identical rustc versions",
+ )
+
+ parser_benchmark_pgo = subparsers.add_parser(
+ "benchmark-pgo",
+ help="Build the Rust compiler using PGO with the indicated "
+ "profdata files, benchmark the build of the indicated crate, "
+ "and upload the benchmark data.",
+ )
+ parser_benchmark_pgo.set_defaults(func=benchmark_pgo)
+ parser_benchmark_pgo.add_argument(
+ "--bench-crate-name",
+ default=CRATE_NAME,
+ help="Name of the crate whose build to benchmark",
+ )
+ parser_benchmark_pgo.add_argument(
+ "--bench-crate-version",
+ default=CRATE_VERSION,
+ help="Version of the crate whose benchmark to build",
+ )
+ parser_benchmark_pgo.add_argument(
+ "--crate-name",
+ default=CRATE_NAME,
+ help="Name of the crate whose profile to use",
+ )
+ parser_benchmark_pgo.add_argument(
+ "--crate-version",
+ default=CRATE_VERSION,
+ help="Version of the crate whose profile to use",
+ )
+ parser_benchmark_pgo.add_argument(
+ "--suffix",
+ default="",
+ help="Suffix to distinguish benchmarks and profdata with identical rustc versions",
+ )
+
+ parser_upload_profdata = subparsers.add_parser(
+ "upload-profdata", help="Upload the profdata files"
+ )
+ parser_upload_profdata.set_defaults(func=upload_profdata)
+ parser_upload_profdata.add_argument(
+ "--crate-name",
+ default=CRATE_NAME,
+ help="Name of the crate whose profile to use",
+ )
+ parser_upload_profdata.add_argument(
+ "--crate-version",
+ default=CRATE_VERSION,
+ help="Version of the crate whose profile to use",
+ )
+ parser_upload_profdata.add_argument(
+ "--suffix",
+ default="",
+ help="Suffix to distinguish benchmarks and profdata with identical rustc versions",
+ )
+
+ args = parser.parse_args()
+
+ (LOCAL_BASE / "crates").mkdir(parents=True, exist_ok=True)
+ (LOCAL_BASE / "llvm-profraw").mkdir(parents=True, exist_ok=True)
+ (LOCAL_BASE / "frontend-profraw").mkdir(parents=True, exist_ok=True)
+ (LOCAL_BASE / "benchmarks").mkdir(parents=True, exist_ok=True)
+
+ args.func(args)
+
+ return 0
+
+
+if __name__ == "__main__":
+ sys.exit(main())