diff options
Diffstat (limited to 'python/pip_install/tools/wheel_installer/wheel_installer.py')
-rw-r--r-- | python/pip_install/tools/wheel_installer/wheel_installer.py | 329 |
1 files changed, 288 insertions, 41 deletions
diff --git a/python/pip_install/tools/wheel_installer/wheel_installer.py b/python/pip_install/tools/wheel_installer/wheel_installer.py index 801ef95..9b363c3 100644 --- a/python/pip_install/tools/wheel_installer/wheel_installer.py +++ b/python/pip_install/tools/wheel_installer/wheel_installer.py @@ -12,22 +12,24 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Build and/or fetch a single wheel based on the requirement passed in""" - +import argparse import errno import glob import json import os import re +import shutil import subprocess import sys +import textwrap from pathlib import Path from tempfile import NamedTemporaryFile -from typing import Dict, List, Optional, Set, Tuple +from typing import Dict, Iterable, List, Optional, Set, Tuple from pip._vendor.packaging.utils import canonicalize_name -from python.pip_install.tools.wheel_installer import arguments, namespace_pkgs, wheel +from python.pip_install.tools.lib import annotation, arguments, bazel +from python.pip_install.tools.wheel_installer import namespace_pkgs, wheel def _configure_reproducible_wheels() -> None: @@ -101,12 +103,201 @@ def _setup_namespace_pkg_compatibility(wheel_dir: str) -> None: namespace_pkgs.add_pkgutil_style_namespace_pkg_init(ns_pkg_dir) +def _generate_entry_point_contents( + module: str, attribute: str, shebang: str = "#!/usr/bin/env python3" +) -> str: + """Generate the contents of an entry point script. + + Args: + module (str): The name of the module to use. + attribute (str): The name of the attribute to call. + shebang (str, optional): The shebang to use for the entry point python + file. + + Returns: + str: A string of python code. + """ + return textwrap.dedent( + """\ + {shebang} + import sys + from {module} import {attribute} + if __name__ == "__main__": + sys.exit({attribute}()) + """.format( + shebang=shebang, module=module, attribute=attribute + ) + ) + + +def _generate_entry_point_rule(name: str, script: str, pkg: str) -> str: + """Generate a Bazel `py_binary` rule for an entry point script. + + Note that the script is used to determine the name of the target. The name of + entry point targets should be uniuqe to avoid conflicts with existing sources or + directories within a wheel. + + Args: + name (str): The name of the generated py_binary. + script (str): The path to the entry point's python file. + pkg (str): The package owning the entry point. This is expected to + match up with the `py_library` defined for each repository. + + + Returns: + str: A `py_binary` instantiation. + """ + return textwrap.dedent( + """\ + py_binary( + name = "{name}", + srcs = ["{src}"], + # This makes this directory a top-level in the python import + # search path for anything that depends on this. + imports = ["."], + deps = ["{pkg}"], + ) + """.format( + name=name, src=str(script).replace("\\", "/"), pkg=pkg + ) + ) + + +def _generate_copy_commands(src, dest, is_executable=False) -> str: + """Generate a [@bazel_skylib//rules:copy_file.bzl%copy_file][cf] target + + [cf]: https://github.com/bazelbuild/bazel-skylib/blob/1.1.1/docs/copy_file_doc.md + + Args: + src (str): The label for the `src` attribute of [copy_file][cf] + dest (str): The label for the `out` attribute of [copy_file][cf] + is_executable (bool, optional): Whether or not the file being copied is executable. + sets `is_executable` for [copy_file][cf] + + Returns: + str: A `copy_file` instantiation. + """ + return textwrap.dedent( + """\ + copy_file( + name = "{dest}.copy", + src = "{src}", + out = "{dest}", + is_executable = {is_executable}, + ) + """.format( + src=src, + dest=dest, + is_executable=is_executable, + ) + ) + + +def _generate_build_file_contents( + name: str, + dependencies: List[str], + whl_file_deps: List[str], + data_exclude: List[str], + tags: List[str], + srcs_exclude: List[str] = [], + data: List[str] = [], + additional_content: List[str] = [], +) -> str: + """Generate a BUILD file for an unzipped Wheel + + Args: + name: the target name of the py_library + dependencies: a list of Bazel labels pointing to dependencies of the library + whl_file_deps: a list of Bazel labels pointing to wheel file dependencies of this wheel. + data_exclude: more patterns to exclude from the data attribute of generated py_library rules. + tags: list of tags to apply to generated py_library rules. + additional_content: A list of additional content to append to the BUILD file. + + Returns: + A complete BUILD file as a string + + We allow for empty Python sources as for Wheels containing only compiled C code + there may be no Python sources whatsoever (e.g. packages written in Cython: like `pymssql`). + """ + + data_exclude = list( + set( + [ + "**/* *", + "**/*.py", + "**/*.pyc", + "**/*.pyc.*", # During pyc creation, temp files named *.pyc.NNNN are created + # RECORD is known to contain sha256 checksums of files which might include the checksums + # of generated files produced when wheels are installed. The file is ignored to avoid + # Bazel caching issues. + "**/*.dist-info/RECORD", + ] + + data_exclude + ) + ) + + return "\n".join( + [ + textwrap.dedent( + """\ + load("@rules_python//python:defs.bzl", "py_library", "py_binary") + load("@bazel_skylib//rules:copy_file.bzl", "copy_file") + + package(default_visibility = ["//visibility:public"]) + + filegroup( + name = "{dist_info_label}", + srcs = glob(["site-packages/*.dist-info/**"], allow_empty = True), + ) + + filegroup( + name = "{data_label}", + srcs = glob(["data/**"], allow_empty = True), + ) + + filegroup( + name = "{whl_file_label}", + srcs = glob(["*.whl"], allow_empty = True), + data = [{whl_file_deps}], + ) + + py_library( + name = "{name}", + srcs = glob(["site-packages/**/*.py"], exclude={srcs_exclude}, allow_empty = True), + data = {data} + glob(["site-packages/**/*"], exclude={data_exclude}), + # This makes this directory a top-level in the python import + # search path for anything that depends on this. + imports = ["site-packages"], + deps = [{dependencies}], + tags = [{tags}], + ) + """.format( + name=name, + dependencies=",".join(sorted(dependencies)), + data_exclude=json.dumps(sorted(data_exclude)), + whl_file_label=bazel.WHEEL_FILE_LABEL, + whl_file_deps=",".join(sorted(whl_file_deps)), + tags=",".join(sorted(['"%s"' % t for t in tags])), + data_label=bazel.DATA_LABEL, + dist_info_label=bazel.DIST_INFO_LABEL, + entry_point_prefix=bazel.WHEEL_ENTRY_POINT_PREFIX, + srcs_exclude=json.dumps(sorted(srcs_exclude)), + data=json.dumps(sorted(data)), + ) + ) + ] + + additional_content + ) + + def _extract_wheel( wheel_file: str, extras: Dict[str, Set[str]], + pip_data_exclude: List[str], enable_implicit_namespace_pkgs: bool, - platforms: List[wheel.Platform], + repo_prefix: str, installation_dir: Path = Path("."), + annotation: Optional[annotation.Annotation] = None, ) -> None: """Extracts wheel into given directory and creates py_library and filegroup targets. @@ -114,7 +305,9 @@ def _extract_wheel( wheel_file: the filepath of the .whl installation_dir: the destination directory for installation of the wheel. extras: a list of extras to add as dependencies for the installed wheel + pip_data_exclude: list of file patterns to exclude from the generated data section of the py_library enable_implicit_namespace_pkgs: if true, disables conversion of implicit namespace packages and will unzip as-is + annotation: An optional set of annotations to apply to the BUILD contents of the wheel. """ whl = wheel.Wheel(wheel_file) @@ -124,47 +317,93 @@ def _extract_wheel( _setup_namespace_pkg_compatibility(installation_dir) extras_requested = extras[whl.name] if whl.name in extras else set() + # Packages may create dependency cycles when specifying optional-dependencies / 'extras'. + # Example: github.com/google/etils/blob/a0b71032095db14acf6b33516bca6d885fe09e35/pyproject.toml#L32. + self_edge_dep = set([whl.name]) + whl_deps = sorted(whl.dependencies(extras_requested) - self_edge_dep) + + sanitised_dependencies = [ + bazel.sanitised_repo_library_label(d, repo_prefix=repo_prefix) for d in whl_deps + ] + sanitised_wheel_file_dependencies = [ + bazel.sanitised_repo_file_label(d, repo_prefix=repo_prefix) for d in whl_deps + ] + + entry_points = [] + for name, (module, attribute) in sorted(whl.entry_points().items()): + # There is an extreme edge-case with entry_points that end with `.py` + # See: https://github.com/bazelbuild/bazel/blob/09c621e4cf5b968f4c6cdf905ab142d5961f9ddc/src/test/java/com/google/devtools/build/lib/rules/python/PyBinaryConfiguredTargetTest.java#L174 + entry_point_without_py = f"{name[:-3]}_py" if name.endswith(".py") else name + entry_point_target_name = ( + f"{bazel.WHEEL_ENTRY_POINT_PREFIX}_{entry_point_without_py}" + ) + entry_point_script_name = f"{entry_point_target_name}.py" + (installation_dir / entry_point_script_name).write_text( + _generate_entry_point_contents(module, attribute) + ) + entry_points.append( + _generate_entry_point_rule( + entry_point_target_name, + entry_point_script_name, + bazel.PY_LIBRARY_LABEL, + ) + ) - dependencies = whl.dependencies(extras_requested, platforms) - - with open(os.path.join(installation_dir, "metadata.json"), "w") as f: - metadata = { - "name": whl.name, - "version": whl.version, - "deps": dependencies.deps, - "deps_by_platform": dependencies.deps_select, - "entry_points": [ - { - "name": name, - "module": module, - "attribute": attribute, - } - for name, (module, attribute) in sorted(whl.entry_points().items()) - ], - } - json.dump(metadata, f) + with open(os.path.join(installation_dir, "BUILD.bazel"), "w") as build_file: + additional_content = entry_points + data = [] + data_exclude = pip_data_exclude + srcs_exclude = [] + if annotation: + for src, dest in annotation.copy_files.items(): + data.append(dest) + additional_content.append(_generate_copy_commands(src, dest)) + for src, dest in annotation.copy_executables.items(): + data.append(dest) + additional_content.append( + _generate_copy_commands(src, dest, is_executable=True) + ) + data.extend(annotation.data) + data_exclude.extend(annotation.data_exclude_glob) + srcs_exclude.extend(annotation.srcs_exclude_glob) + if annotation.additive_build_content: + additional_content.append(annotation.additive_build_content) + + contents = _generate_build_file_contents( + name=bazel.PY_LIBRARY_LABEL, + dependencies=sanitised_dependencies, + whl_file_deps=sanitised_wheel_file_dependencies, + data_exclude=data_exclude, + data=data, + srcs_exclude=srcs_exclude, + tags=["pypi_name=" + whl.name, "pypi_version=" + whl.version], + additional_content=additional_content, + ) + build_file.write(contents) def main() -> None: - args = arguments.parser(description=__doc__).parse_args() + parser = argparse.ArgumentParser( + description="Build and/or fetch a single wheel based on the requirement passed in" + ) + parser.add_argument( + "--requirement", + action="store", + required=True, + help="A single PEP508 requirement specifier string.", + ) + parser.add_argument( + "--annotation", + type=annotation.annotation_from_str_path, + help="A json encoded file containing annotations for rendered packages.", + ) + arguments.parse_common_args(parser) + args = parser.parse_args() deserialized_args = dict(vars(args)) arguments.deserialize_structured_args(deserialized_args) _configure_reproducible_wheels() - if args.whl_file: - whl = Path(args.whl_file) - - name, extras_for_pkg = _parse_requirement_for_extra(args.requirement) - extras = {name: extras_for_pkg} if extras_for_pkg and name else dict() - _extract_wheel( - wheel_file=whl, - extras=extras, - enable_implicit_namespace_pkgs=args.enable_implicit_namespace_pkgs, - platforms=arguments.get_platforms(args), - ) - return - pip_args = ( [sys.executable, "-m", "pip"] + (["--isolated"] if args.isolated else []) @@ -195,10 +434,18 @@ def main() -> None: if e.errno != errno.ENOENT: raise - whl = Path(next(iter(glob.glob("*.whl")))) - - with open("whl_file.json", "w") as f: - json.dump({"whl_file": f"{whl.resolve()}"}, f) + name, extras_for_pkg = _parse_requirement_for_extra(args.requirement) + extras = {name: extras_for_pkg} if extras_for_pkg and name else dict() + + whl = next(iter(glob.glob("*.whl"))) + _extract_wheel( + wheel_file=whl, + extras=extras, + pip_data_exclude=deserialized_args["pip_data_exclude"], + enable_implicit_namespace_pkgs=args.enable_implicit_namespace_pkgs, + repo_prefix=args.repo_prefix, + annotation=args.annotation, + ) if __name__ == "__main__": |