diff options
author | aiuto <aiuto@google.com> | 2022-12-09 13:31:52 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-12-09 13:31:52 -0500 |
commit | 183581b9c3844a661e97bbc7979e947dee6e2c30 (patch) | |
tree | ca81393ba2078cf9eb9646bf20a7614428cee2e1 | |
parent | ae2f8a528e14bd545ff5835f60592c6e148ef57e (diff) | |
parent | 7ac78275e209ce69953248d28fa5419c3f120190 (diff) | |
download | bazelbuild-rules_license-183581b9c3844a661e97bbc7979e947dee6e2c30.tar.gz |
Merge pull request #63 from aiuto/pinfo
Add package_info rule and a new gatherer to collect it.
-rw-r--r-- | BUILD | 10 | ||||
-rw-r--r-- | examples/sboms/BUILD | 13 | ||||
-rw-r--r-- | rules/gather_licenses_info.bzl | 4 | ||||
-rw-r--r-- | rules/gather_metadata.bzl | 302 | ||||
-rw-r--r-- | rules/licenses_core.bzl | 44 | ||||
-rw-r--r-- | rules/package_info.bzl | 100 | ||||
-rw-r--r-- | rules/providers.bzl | 38 | ||||
-rw-r--r-- | rules/sbom.bzl | 159 | ||||
-rw-r--r-- | tools/BUILD | 21 | ||||
-rw-r--r-- | tools/write_sbom.py | 117 |
10 files changed, 793 insertions, 15 deletions
@@ -13,9 +13,11 @@ # limitations under the License. load("@rules_license//rules:license.bzl", "license") +load("@rules_license//rules:package_info.bzl", "package_info") +load("@rules_license//:version.bzl", "version") package( - default_applicable_licenses = [":license"], + default_applicable_licenses = [":license", ":package_info"], default_visibility = ["//visibility:public"], ) @@ -29,6 +31,12 @@ license( license_text = "LICENSE", ) +package_info( + name = "package_info", + package_name = "rules_license", + package_version = version, +) + exports_files( ["LICENSE", "WORKSPACE"], visibility = ["//visibility:public"], diff --git a/examples/sboms/BUILD b/examples/sboms/BUILD new file mode 100644 index 0000000..0c31a04 --- /dev/null +++ b/examples/sboms/BUILD @@ -0,0 +1,13 @@ +# Demonstrate the generate_sbom rule + +load("@rules_license//rules:sbom.bzl", "generate_sbom") + +# There are not a lot of targets in this rule set to build a SBOM from +# so we will (in a very self-referential way) generate one for the tool +# which generates the SBOMs +# See the output in bazel-bin/examples/sboms/write_sbom.txt +generate_sbom( + name = "write_sbom_sbom", + out = "write_sbom.txt", + deps = ["//tools:write_sbom"], +) diff --git a/rules/gather_licenses_info.bzl b/rules/gather_licenses_info.bzl index a5f1a41..b676972 100644 --- a/rules/gather_licenses_info.bzl +++ b/rules/gather_licenses_info.bzl @@ -16,7 +16,7 @@ load( "@rules_license//rules:licenses_core.bzl", "TraceInfo", - "gather_licenses_info_common", + "gather_metadata_info_common", "should_traverse", ) load( @@ -41,7 +41,7 @@ def _strip_null_repo(label): return s def _gather_licenses_info_impl(target, ctx): - return gather_licenses_info_common(target, ctx, TransitiveLicensesInfo, NAMESPACES, should_traverse) + return gather_metadata_info_common(target, ctx, TransitiveLicensesInfo, NAMESPACES, [], should_traverse) gather_licenses_info = aspect( doc = """Collects LicenseInfo providers into a single TransitiveLicensesInfo provider.""", diff --git a/rules/gather_metadata.bzl b/rules/gather_metadata.bzl new file mode 100644 index 0000000..9e96cba --- /dev/null +++ b/rules/gather_metadata.bzl @@ -0,0 +1,302 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Rules and macros for collecting LicenseInfo providers.""" + +load( + "@rules_license//rules:licenses_core.bzl", + "TraceInfo", + "gather_metadata_info_common", + "should_traverse", +) +load( + "@rules_license//rules:providers.bzl", + "MetadataInfo", + "PackageInfo", + "TransitiveMetadataInfo", +) + +# Definition for compliance namespace, used for filtering licenses +# based on the namespace to which they belong. +NAMESPACES = ["compliance"] + +def _strip_null_repo(label): + """Removes the null repo name (e.g. @//) from a string. + + The is to make str(label) compatible between bazel 5.x and 6.x + """ + s = str(label) + if s.startswith('@//'): + return s[1:] + elif s.startswith('@@//'): + return s[2:] + return s + +def _bazel_package(label): + l = _strip_null_repo(label) + return l[0:-(len(label.name) + 1)] + +def _gather_metadata_info_impl(target, ctx): + return gather_metadata_info_common(target, ctx, TransitiveMetadataInfo, NAMESPACES, [MetadataInfo, PackageInfo], should_traverse) + +gather_metadata_info = aspect( + doc = """Collects LicenseInfo providers into a single TransitiveMetadataInfo provider.""", + implementation = _gather_metadata_info_impl, + attr_aspects = ["*"], + attrs = { + "_trace": attr.label(default = "@rules_license//rules:trace_target"), + }, + provides = [TransitiveMetadataInfo], + apply_to_generating_rules = True, +) + +def _write_metadata_info_impl(target, ctx): + """Write transitive license info into a JSON file + + Args: + target: The target of the aspect. + ctx: The aspect evaluation context. + + Returns: + OutputGroupInfo + """ + + if not TransitiveMetadataInfo in target: + return [OutputGroupInfo(licenses = depset())] + info = target[TransitiveMetadataInfo] + outs = [] + + # If the result doesn't contain licenses, we simply return the provider + if not hasattr(info, "target_under_license"): + return [OutputGroupInfo(licenses = depset())] + + # Write the output file for the target + name = "%s_metadata_info.json" % ctx.label.name + content = "[\n%s\n]\n" % ",\n".join(metadata_info_to_json(info)) + out = ctx.actions.declare_file(name) + ctx.actions.write( + output = out, + content = content, + ) + outs.append(out) + + if ctx.attr._trace[TraceInfo].trace: + trace = ctx.actions.declare_file("%s_trace_info.json" % ctx.label.name) + ctx.actions.write(output = trace, content = "\n".join(info.traces)) + outs.append(trace) + + return [OutputGroupInfo(licenses = depset(outs))] + +gather_metadata_info_and_write = aspect( + doc = """Collects TransitiveMetadataInfo providers and writes JSON representation to a file. + + Usage: + bazel build //some:target \ + --aspects=@rules_license//rules:gather_metadata_info.bzl%gather_metadata_info_and_write + --output_groups=licenses + """, + implementation = _write_metadata_info_impl, + attr_aspects = ["*"], + attrs = { + "_trace": attr.label(default = "@rules_license//rules:trace_target"), + }, + provides = [OutputGroupInfo], + requires = [gather_metadata_info], + apply_to_generating_rules = True, +) + +def write_metadata_info(ctx, deps, json_out): + """Writes TransitiveMetadataInfo providers for a set of targets as JSON. + + TODO(aiuto): Document JSON schema. But it is under development, so the current + best place to look is at tests/hello_licenses.golden. + + Usage: + write_metadata_info must be called from a rule implementation, where the + rule has run the gather_metadata_info aspect on its deps to + collect the transitive closure of LicenseInfo providers into a + LicenseInfo provider. + + foo = rule( + implementation = _foo_impl, + attrs = { + "deps": attr.label_list(aspects = [gather_metadata_info]) + } + ) + + def _foo_impl(ctx): + ... + out = ctx.actions.declare_file("%s_licenses.json" % ctx.label.name) + write_metadata_info(ctx, ctx.attr.deps, metadata_file) + + Args: + ctx: context of the caller + deps: a list of deps which should have TransitiveMetadataInfo providers. + This requires that you have run the gather_metadata_info + aspect over them + json_out: output handle to write the JSON info + """ + licenses = [] + for dep in deps: + if TransitiveMetadataInfo in dep: + licenses.extend(metadata_info_to_json(dep[TransitiveMetadataInfo])) + ctx.actions.write( + output = json_out, + content = "[\n%s\n]\n" % ",\n".join(licenses), + ) + +def metadata_info_to_json(metadata_info): + """Render a single LicenseInfo provider to JSON + + Args: + metadata_info: A LicenseInfo. + + Returns: + [(str)] list of LicenseInfo values rendered as JSON. + """ + + main_template = """ {{ + "top_level_target": "{top_level_target}", + "dependencies": [{dependencies} + ], + "licenses": [{licenses} + ], + "packages": [{packages} + ]\n }}""" + + dep_template = """ + {{ + "target_under_license": "{target_under_license}", + "licenses": [ + {licenses} + ] + }}""" + + license_template = """ + {{ + "label": "{label}", + "bazel_package": "{bazel_package}", + "license_kinds": [{kinds} + ], + "copyright_notice": "{copyright_notice}", + "package_name": "{package_name}", + "package_url": "{package_url}", + "package_version": "{package_version}", + "license_text": "{license_text}", + "used_by": [ + {used_by} + ] + }}""" + + kind_template = """ + {{ + "target": "{kind_path}", + "name": "{kind_name}", + "conditions": {kind_conditions} + }}""" + + package_info_template = """ + {{ + "target": "{label}", + "bazel_package": "{bazel_package}", + "package_name": "{package_name}", + "package_url": "{package_url}", + "package_version": "{package_version}" + }}""" + + # Build reverse map of license to user + used_by = {} + for dep in metadata_info.deps.to_list(): + # Undo the concatenation applied when stored in the provider. + dep_licenses = dep.licenses.split(",") + for license in dep_licenses: + if license not in used_by: + used_by[license] = [] + used_by[license].append(_strip_null_repo(dep.target_under_license)) + + all_licenses = [] + for license in sorted(metadata_info.licenses.to_list(), key = lambda x: x.label): + kinds = [] + for kind in sorted(license.license_kinds, key = lambda x: x.name): + kinds.append(kind_template.format( + kind_name = kind.name, + kind_path = kind.label, + kind_conditions = kind.conditions, + )) + + if license.license_text: + # Special handling for synthetic LicenseInfo + text_path = (license.license_text.package + "/" + license.license_text.name if type(license.license_text) == "Label" else license.license_text.path) + all_licenses.append(license_template.format( + copyright_notice = license.copyright_notice, + kinds = ",".join(kinds), + license_text = text_path, + package_name = license.package_name, + package_url = license.package_url, + package_version = license.package_version, + label = _strip_null_repo(license.label), + bazel_package = _bazel_package(license.label), + used_by = ",\n ".join(sorted(['"%s"' % x for x in used_by[str(license.label)]])), + )) + + all_deps = [] + for dep in sorted(metadata_info.deps.to_list(), key = lambda x: x.target_under_license): + metadata_used = [] + + # Undo the concatenation applied when stored in the provider. + dep_licenses = dep.licenses.split(",") + all_deps.append(dep_template.format( + target_under_license = _strip_null_repo(dep.target_under_license), + licenses = ",\n ".join(sorted(['"%s"' % _strip_null_repo(x) for x in dep_licenses])), + )) + + all_packages = [] + # We would use this if we had distinct depsets for every provider type. + #for package in sorted(metadata_info.package_info.to_list(), key = lambda x: x.label): + # all_packages.append(package_info_template.format( + # label = _strip_null_repo(package.label), + # package_name = package.package_name, + # package_url = package.package_url, + # package_version = package.package_version, + # )) + + for mi in sorted(metadata_info.other_metadata.to_list(), key = lambda x: x.label): + # Maybe use a map of provider class to formatter. A generic dict->json function + # in starlark would help + + # This format is for using distinct providers. I like the compile time safety. + if mi.type == "package_info": + all_packages.append(package_info_template.format( + label = _strip_null_repo(mi.label), + bazel_package = _bazel_package(mi.label), + package_name = mi.package_name, + package_url = mi.package_url, + package_version = mi.package_version, + )) + # experimental: Support the MetadataInfo bag of data + if mi.type == "package_info_alt": + all_packages.append(package_info_template.format( + label = _strip_null_repo(mi.label), + bazel_package = _bazel_package(mi.label), + # data is just a bag, so we need to use get() or "" + package_name = mi.data.get("package_name") or "", + package_url = mi.data.get("package_url") or "", + package_version = mi.data.get("package_version") or "", + )) + + return [main_template.format( + top_level_target = _strip_null_repo(metadata_info.target_under_license), + dependencies = ",".join(all_deps), + licenses = ",".join(all_licenses), + packages = ",".join(all_packages), + )] diff --git a/rules/licenses_core.bzl b/rules/licenses_core.bzl index 42702bd..cf476a4 100644 --- a/rules/licenses_core.bzl +++ b/rules/licenses_core.bzl @@ -19,6 +19,7 @@ load( "@rules_license//rules:providers.bzl", "LicenseInfo", "LicensedTargetInfo", + "TransitiveLicensesInfo", ) @@ -66,7 +67,7 @@ def should_traverse(ctx, attr): return True -def _get_transitive_licenses(ctx, trans_licenses, trans_deps, traces, provider, filter_func): +def _get_transitive_metadata(ctx, trans_licenses, trans_other_metadata, trans_package_info, trans_deps, traces, provider, filter_func): attrs = [a for a in dir(ctx.rule.attr)] for name in attrs: if not filter_func(ctx, name): @@ -96,8 +97,21 @@ def _get_transitive_licenses(ctx, trans_licenses, trans_deps, traces, provider, for trace in info.traces: traces.append("(" + ", ".join([str(ctx.label), ctx.rule.kind, name]) + ") -> " + trace) -def gather_licenses_info_common(target, ctx, provider_factory, namespaces, filter_func): - """Collect license info from myself and my deps. + # We only need one or the other of these stanzas. + # If we use a polymorphic approach to metadata providers, then + # this works. + if hasattr(info, "other_metadata"): + if info.other_metadata: + trans_other_metadata.append(info.other_metadata) + # But if we want more precise type safety, we would have a + # trans_* for each type of metadata. That is not user + # extensibile. + if hasattr(info, "package_info"): + if info.package_info: + trans_package_info.append(info.package_info) + +def gather_metadata_info_common(target, ctx, provider_factory, namespaces, metadata_providers, filter_func): + """Collect license and other metadata info from myself and my deps. Any single target might directly depend on a license, or depend on something that transitively depends on a license, or neither. @@ -116,6 +130,7 @@ def gather_licenses_info_common(target, ctx, provider_factory, namespaces, filte ctx: The aspect evaluation context. provider_factory: abstracts the provider returned by this aspect namespaces: a list of namespaces licenses must match to be included + metadata_providers: a list of other providers of interest filter_func: a function that returns true iff the dep edge should be ignored Returns: @@ -124,6 +139,8 @@ def gather_licenses_info_common(target, ctx, provider_factory, namespaces, filte # First we gather my direct license attachments licenses = [] + other_metadata = [] + package_info = [] if ctx.rule.kind == "_license": # Don't try to gather licenses from the license rule itself. We'll just # blunder into the text file of the license and pick up the default @@ -144,14 +161,18 @@ def gather_licenses_info_common(target, ctx, provider_factory, namespaces, filte licenses.append(lic) else: fail("should have a namespace") - + for m_p in metadata_providers: + if m_p in dep: + other_metadata.append(dep[m_p]) # Now gather transitive collection of providers from the targets # this target depends upon. trans_licenses = [] + trans_other_metadata = [] + trans_package_info = [] trans_deps = [] traces = [] - _get_transitive_licenses(ctx, trans_licenses, trans_deps, traces, provider_factory, filter_func) + _get_transitive_metadata(ctx, trans_licenses, trans_other_metadata, trans_package_info, trans_deps, traces, provider_factory, filter_func) if not licenses and not trans_licenses: return [provider_factory(deps = depset(), licenses = depset(), traces = [])] @@ -179,9 +200,22 @@ def gather_licenses_info_common(target, ctx, provider_factory, namespaces, filte else: direct_license_uses = None + # This is a bit of a hack for bazel 5.x. We can not pass extra fields to + # the provider constructor, so we need to do something special for each. + # In Bazel 6.x we can use a provider initializer function that would take + # all the args and only use the ones it wants. + if provider_factory == TransitiveLicensesInfo: + return [provider_factory( + target_under_license = target.label, + licenses = depset(tuple(licenses), transitive = trans_licenses), + deps = depset(direct = direct_license_uses, transitive = trans_deps), + traces = traces, + )] + return [provider_factory( target_under_license = target.label, licenses = depset(tuple(licenses), transitive = trans_licenses), + other_metadata = depset(tuple(other_metadata), transitive = trans_other_metadata), deps = depset(direct = direct_license_uses, transitive = trans_deps), traces = traces, )] diff --git a/rules/package_info.bzl b/rules/package_info.bzl new file mode 100644 index 0000000..a8643f8 --- /dev/null +++ b/rules/package_info.bzl @@ -0,0 +1,100 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Rules for declaring metadata about a package.""" + +load( + "@rules_license//rules:providers.bzl", + "MetadataInfo", + "PackageInfo", +) + +# +# package_info() +# + +def _package_info_impl(ctx): + provider = PackageInfo( + # Metadata providers must include a type discriminator. We don't need it + # to collect the providers, but we do need it to write the JSON. We + # key on the type field to look up the correct block of code to pull + # data out and format it. We can't to the lookup on the provider class. + type = "package_info", + label = ctx.label, + package_name = ctx.attr.package_name or ctx.build_file_path.rstrip("/BUILD"), + package_url = ctx.attr.package_url, + package_version = ctx.attr.package_version, + ) + # Experimental alternate design, using a generic 'data' back to hold things + generic_provider = MetadataInfo( + type = "package_info_alt", + label = ctx.label, + data = { + "package_name": ctx.attr.package_name or ctx.build_file_path.rstrip("/BUILD"), + "package_url": ctx.attr.package_url, + "package_version": ctx.attr.package_version + } + ) + return [provider, generic_provider] + +_package_info = rule( + implementation = _package_info_impl, + attrs = { + "package_name": attr.string( + doc = "A human readable name identifying this package." + + " This may be used to produce an index of OSS packages used by" + + " an applicatation.", + ), + "package_url": attr.string( + doc = "The URL this instance of the package was download from." + + " This may be used to produce an index of OSS packages used by" + + " an applicatation.", + ), + "package_version": attr.string( + doc = "A human readable version string identifying this package." + + " This may be used to produce an index of OSS packages used" + + " by an applicatation. It should be a value that" + + " increases over time, rather than a commit hash." + ), + }, +) + +# buildifier: disable=function-docstring-args +def package_info( + name, + package_name = None, + package_url = None, + package_version = None, + visibility = ["//visibility:public"]): + """Wrapper for package_info rule. + + Args: + name: str target name. + package_name : str A human readable name identifying this package. This + may be used to produce an index of OSS packages used by + an application. + package_url: str The canoncial URL this package distribution was retrieved from. + Note that, because of local mirroring, that might not be the + physical URL it was retrieved from. + package_version: str A human readable name identifying version of this package. + """ + _package_info( + name = name, + package_name = package_name, + package_url = package_url, + package_version = package_version, + applicable_licenses = [], + visibility = visibility, + tags = [], + testonly = 0, + ) diff --git a/rules/providers.bzl b/rules/providers.bzl index 8778fd7..3b1f090 100644 --- a/rules/providers.bzl +++ b/rules/providers.bzl @@ -59,3 +59,41 @@ def licenses_info(): # This provider is used by the aspect that is used by manifest() rules. TransitiveLicensesInfo = licenses_info() + +# This is one way to do specify data +PackageInfo = provider( + doc = """Provides information about a package.""", + fields = { + "type": "string: How to interpret data", + "label": "Label: label of the package_info rule", + "package_name": "string: Human readable package name", + "package_url": "string: URL from which this package was downloaded.", + "package_version": "string: Human readable version string", + }, +) + +# This is more extensible. Because of the provider implementation, having a big +# dict of values rather than named fields is not much more costly. +# Design choice. Replace data with actual providers, such as PackageInfo +MetadataInfo = provider( + doc = """Generic bag of metadata.""", + fields = { + "type": "string: How to interpret data", + "label": "Label: label of the metadata rule", + "data": "String->any: Map of names to values", + } +) + +TransitiveMetadataInfo = provider( + doc = """The transitive set of licenses used by a target.""", + fields = { + "top_level_target": "Label: The top level target label we are examining.", + "other_metadata": "depset(MetatdataInfo)", + "licenses": "depset(LicenseInfo)", + "package_info": "depset(PackageInfo)", + + "target_under_license": "Label: A target which will be associated with some licenses.", + "deps": "depset(LicensedTargetInfo): The transitive list of dependencies that have licenses.", + "traces": "list(string) - diagnostic for tracing a dependency relationship to a target.", + }, +) diff --git a/rules/sbom.bzl b/rules/sbom.bzl new file mode 100644 index 0000000..fb17adc --- /dev/null +++ b/rules/sbom.bzl @@ -0,0 +1,159 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""SBOM generation""" + +load( + "@rules_license//rules:gather_metadata.bzl", + "gather_metadata_info", + "gather_metadata_info_and_write", + "write_metadata_info", +) +load( + "@rules_license//rules:providers.bzl", + "TransitiveLicensesInfo", +) + +# This rule is proof of concept, and may not represent the final +# form of a rule for compliance validation. +def _generate_sbom_impl(ctx): + # Gather all licenses and write information to one place + + licenses_file = ctx.actions.declare_file("_%s_licenses_info.json" % ctx.label.name) + write_metadata_info(ctx, ctx.attr.deps, licenses_file) + + license_files = [] + # if ctx.outputs.license_texts: + # license_files = get_licenses_mapping(ctx.attr.deps).keys() + + # Now turn the big blob of data into something consumable. + inputs = [licenses_file] + outputs = [ctx.outputs.out] + args = ctx.actions.args() + args.add("--licenses_info", licenses_file.path) + args.add("--out", ctx.outputs.out.path) + ctx.actions.run( + mnemonic = "CreateSBOM", + progress_message = "Creating SBOM for %s" % ctx.label, + inputs = inputs, + outputs = outputs, + executable = ctx.executable._sbom_generator, + arguments = [args], + ) + outputs.append(licenses_file) # also make the json file available. + return [DefaultInfo(files = depset(outputs))] + +_generate_sbom = rule( + implementation = _generate_sbom_impl, + attrs = { + "deps": attr.label_list( + aspects = [gather_metadata_info], + ), + "out": attr.output(mandatory = True), + "_sbom_generator": attr.label( + default = Label("@rules_license//tools:write_sbom"), + executable = True, + allow_files = True, + cfg = "exec", + ), + }, +) + +def generate_sbom(**kwargs): + _generate_sbom(**kwargs) + +def _manifest_impl(ctx): + # Gather all licenses and make it available as deps for downstream rules + # Additionally write the list of license filenames to a file that can + # also be used as an input to downstream rules. + licenses_file = ctx.actions.declare_file(ctx.attr.out.name) + mappings = get_licenses_mapping(ctx.attr.deps, ctx.attr.warn_on_legacy_licenses) + ctx.actions.write( + output = licenses_file, + content = "\n".join([",".join([f.path, p]) for (f, p) in mappings.items()]), + ) + return [DefaultInfo(files = depset(mappings.keys()))] + +_manifest = rule( + implementation = _manifest_impl, + doc = """Internal tmplementation method for manifest().""", + attrs = { + "deps": attr.label_list( + doc = """List of targets to collect license files for.""", + aspects = [gather_metadata_info], + ), + "out": attr.output( + doc = """Output file.""", + mandatory = True, + ), + "warn_on_legacy_licenses": attr.bool(default = False), + }, +) + +def manifest(name, deps, out = None, **kwargs): + if not out: + out = name + ".manifest" + + _manifest(name = name, deps = deps, out = out, **kwargs) + +def _licenses_used_impl(ctx): + # Gather all licenses and make it available as JSON + write_metadata_info(ctx, ctx.attr.deps, ctx.outputs.out) + return [DefaultInfo(files = depset([ctx.outputs.out]))] + +_licenses_used = rule( + implementation = _licenses_used_impl, + doc = """Internal tmplementation method for licenses_used().""", + attrs = { + "deps": attr.label_list( + doc = """List of targets to collect LicenseInfo for.""", + aspects = [gather_metadata_info_and_write], + ), + "out": attr.output( + doc = """Output file.""", + mandatory = True, + ), + }, +) + +def get_licenses_mapping(deps, warn = False): + """Creates list of entries representing all licenses for the deps. + + Args: + + deps: a list of deps which should have TransitiveLicensesInfo providers. + This requires that you have run the gather_licenses_info + aspect over them + + warn: boolean, if true, display output about legacy targets that need + update + + Returns: + {File:package_name} + """ + tls = [] + for dep in deps: + lds = dep[TransitiveLicensesInfo].licenses + tls.append(lds) + + ds = depset(transitive = tls) + + # Ignore any legacy licenses that may be in the report + mappings = {} + for lic in ds.to_list(): + if type(lic.license_text) == "File": + mappings[lic.license_text] = lic.package_name + elif warn: + print("Legacy license %s not included, rule needs updating" % lic.license_text) + + return mappings diff --git a/tools/BUILD b/tools/BUILD index 9be1c2d..2b56a34 100644 --- a/tools/BUILD +++ b/tools/BUILD @@ -15,12 +15,20 @@ """License declaration and compliance checking tools.""" package( - default_applicable_licenses = ["//:license"], + default_applicable_licenses = ["//:license", "//:package_info"], default_visibility = ["//visibility:public"], ) licenses(["notice"]) +filegroup( + name = "standard_package", + srcs = glob(["**"]), + visibility = ["//distro:__pkg__"], +) + +exports_files(["diff_test.sh"]) + py_binary( name = "checker_demo", srcs = ["checker_demo.py"], @@ -28,10 +36,9 @@ py_binary( visibility = ["//visibility:public"], ) -exports_files(["diff_test.sh"]) - -filegroup( - name = "standard_package", - srcs = glob(["**"]), - visibility = ["//distro:__pkg__"], +py_binary( + name = "write_sbom", + srcs = ["write_sbom.py"], + python_version = "PY3", + visibility = ["//visibility:public"], ) diff --git a/tools/write_sbom.py b/tools/write_sbom.py new file mode 100644 index 0000000..18286ab --- /dev/null +++ b/tools/write_sbom.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python3 +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Proof of concept license checker. + +This is only a demonstration. It will be replaced with other tools. +""" + +import argparse +import codecs +import datetime +import json +import os + + +TOOL = 'https//github.com/bazelbuild/rules_license/tools:write_sbom' + +def _load_package_data(package_info): + with codecs.open(package_info, encoding='utf-8') as inp: + return json.loads(inp.read()) + +def _write_sbom_header(out, package): + header = [ + 'SPDXVersion: SPDX-2.2', + 'DataLicense: CC0-1.0', + 'SPDXID: SPDXRef-DOCUMENT', + 'DocumentName: %s' % package, + # TBD + # 'DocumentNamespace: https://swinslow.net/spdx-examples/example1/hello-v3 + 'Creator: Person: %s' % os.getlogin(), + 'Creator: Tool: %s' % TOOL, + datetime.datetime.utcnow().strftime('Created: %Y-%m-%d-%H:%M:%SZ'), + '', + '##### Package: %s' % package, + ] + out.write('\n'.join(header)) + + + +def _write_sbom(out, packages): + """Produce a basic SBOM + + Args: + out: file object to write to + packages: package metadata. A big blob of JSON. + """ + for p in packages: + name = p.get('package_name') or '<unknown>' + out.write('\n') + out.write('SPDXID: "%s"\n' % name) + out.write(' name: "%s"\n' % name) + if p.get('package_version'): + out.write(' versionInfo: "%s"\n' % p['package_version']) + # IGNORE_COPYRIGHT: Not a copyright notice. It is a variable holding one. + cn = p.get('copyright_notice') + if cn: + out.write(' copyrightText: "%s"\n' % cn) + kinds = p.get('license_kinds') + if kinds: + out.write(' licenseDeclared: "%s"\n' % + ','.join([k['name'] for k in kinds])) + url = p.get('package_url') + if url: + out.write(' downloadLocation: %s\n' % url) + + +def main(): + parser = argparse.ArgumentParser( + description='Demonstraton license compliance checker') + + parser.add_argument('--licenses_info', + help='path to JSON file containing all license data') + parser.add_argument('--out', default='sbom.out', help='SBOM output') + args = parser.parse_args() + + license_data = _load_package_data(args.licenses_info) + target = license_data[0] # we assume only one target for the demo + + top_level_target = target['top_level_target'] + dependencies = target['dependencies'] + # It's not really packages, but this is close proxy for now + licenses = target['licenses'] + package_infos = target['packages'] + + # These are similar dicts, so merge them by package. This is not + # strictly true, as different licenese can appear in the same + # package, but it is good enough for demonstrating the sbom. + + all = {x['bazel_package']: x for x in licenses} + for pi in package_infos: + p = all.get(pi['bazel_package']) + if p: + p.update(pi) + else: + all[pi['bazel_package']] = pi + + err = 0 + with codecs.open(args.out, mode='w', encoding='utf-8') as out: + _write_sbom_header(out, package=top_level_target) + _write_sbom(out, all.values()) + return err + + +if __name__ == '__main__': + main() |