From 3b8e3f150fb83bb6098298c2a79011fd0b7d6a3f Mon Sep 17 00:00:00 2001 From: aiuto Date: Wed, 15 Feb 2023 12:06:19 -0500 Subject: Add verify_archive rule to do e2e tests on built archives. (#669) We could do more, but this should be good enough to start. Fixes #644 --- pkg/BUILD | 10 +++ pkg/verify_archive.bzl | 128 ++++++++++++++++++++++++++++++++++++ pkg/verify_archive_test_lib.py | 107 ++++++++++++++++++++++++++++++ pkg/verify_archive_test_main.py.tpl | 46 +++++++++++++ tests/tar/BUILD | 20 ++++++ 5 files changed, 311 insertions(+) create mode 100644 pkg/verify_archive.bzl create mode 100644 pkg/verify_archive_test_lib.py create mode 100644 pkg/verify_archive_test_main.py.tpl diff --git a/pkg/BUILD b/pkg/BUILD index 8b83440..e24fc6b 100644 --- a/pkg/BUILD +++ b/pkg/BUILD @@ -100,3 +100,13 @@ py_binary( python_version = "PY3", visibility = ["//visibility:public"], ) + +# This might be public, but use at your own risk +py_library( + name = "verify_archive_test_lib", + srcs = ["verify_archive_test_lib.py"], + srcs_version = "PY3", + visibility = ["//visibility:public"], +) + +exports_files(["verify_archive_test_main.py.tpl"]) diff --git a/pkg/verify_archive.bzl b/pkg/verify_archive.bzl new file mode 100644 index 0000000..e31d539 --- /dev/null +++ b/pkg/verify_archive.bzl @@ -0,0 +1,128 @@ +# Copyright 2023 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Rule to test that the content of an archive has particular properties. + +This is available for integration testing, when people want to verify that all +the files they expect are in an archive. Or possibly, they want to verify that +some files do not appear. + +The execution time is O(# expected patterns * size of archive). +""" + +load("@rules_python//python:defs.bzl", "py_test") + + +def _gen_verify_archive_test_main_impl(ctx): + ctx.actions.expand_template( + template = ctx.file._template, + output = ctx.outputs.out, + # @unsorted-dict-items + substitutions = { + "${TEST_NAME}": ctx.attr.test_name, + "${TARGET}": ctx.files.target[0].short_path, + "${MUST_CONTAIN}": str(ctx.attr.must_contain), + "${MUST_CONTAIN_REGEX}": str(ctx.attr.must_contain_regex), + "${MUST_NOT_CONTAIN}": str(ctx.attr.must_not_contain), + "${MUST_NOT_CONTAIN_REGEX}": str(ctx.attr.must_not_contain_regex), + "${MIN_SIZE}": str(ctx.attr.min_size), + "${MAX_SIZE}": str(ctx.attr.max_size), + }, + ) + return [ + DefaultInfo(files = depset([ctx.outputs.out])), + ] + +_gen_verify_archive_test_main = rule( + implementation = _gen_verify_archive_test_main_impl, + # @unsorted-dict-items + attrs = { + "out": attr.output(mandatory = True), + "test_name": attr.string(mandatory = True), + "target": attr.label( + doc = "Archive to test", + allow_single_file = True, + mandatory = True, + ), + + "must_contain": attr.string_list( + doc = "List of paths which all must appear in the archive.", + ), + "must_contain_regex": attr.string_list( + doc = "List of regexes which all must appear in the archive.", + ), + "must_not_contain": attr.string_list( + doc = """List of paths that must not be in the archive.""", + ), + "must_not_contain_regex": attr.string_list( + doc = """List of regexes that must not be in the archive.""", + ), + "min_size": attr.int( + doc = """Miniumn number of entries in the archive.""" + ), + "max_size": attr.int( + doc = """Miniumn number of entries in the archive.""" + ), + + # Implicit dependencies. + "_template": attr.label( + default = Label("//pkg:verify_archive_test_main.py.tpl"), + allow_single_file = True, + ), + }, +) + +def verify_archive_test(name, target, + must_contain=None, must_contain_regex=None, + must_not_contain=None, must_not_contain_regex=None, + min_size=1, max_size=-1): + """Tests that an archive contains specific file patterns. + + This test is used to verify that an archive contains the expected content. + + Args: + target: A target archive. + must_contain: A list of paths which must appear in the archive. + must_contain_regex: A list of path regexes which must appear in the archive. + must_not_contain: A list of paths which must not appear in the archive. + must_not_contain_regex: A list of path regexes which must not appear in the archive. + min_size: The minimum number of entries which must be in the archive. + max_size: The maximum number of entries which must be in the archive. + """ + test_src = name + "__internal_main.py" + _gen_verify_archive_test_main( + name = name + "_internal_main", + target = target, + test_name = name.replace('-', '_') + "Test", + out = test_src, + must_contain = must_contain, + must_contain_regex = must_contain_regex, + must_not_contain = must_not_contain, + must_not_contain_regex = must_not_contain_regex, + min_size = min_size, + max_size = max_size, + ) + py_test( + name = name, + # Hey reviewer!!! What if we just added the source to the test lib + # here, so we would not have to make the library for that public? + srcs = [":" + test_src], + main = test_src, + data = [target], + python_version = "PY3", + deps = [ + "//pkg:verify_archive_test_lib", + "@bazel_tools//tools/python/runfiles", + ], + ) diff --git a/pkg/verify_archive_test_lib.py b/pkg/verify_archive_test_lib.py new file mode 100644 index 0000000..5d66948 --- /dev/null +++ b/pkg/verify_archive_test_lib.py @@ -0,0 +1,107 @@ +# Copyright 2023 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Compare to content manifest files.""" + +import json +import re +import tarfile +import unittest + +from bazel_tools.tools.python.runfiles import runfiles + + +class VerifyArchiveTest(unittest.TestCase): + """Test harness to see if we wrote the content manifest correctly.""" + + #run_files = runfiles.Create() + #target_path = VerifyArchiveTest.run_files.Rlocation('rules_pkg/' + target) + + def setUp(self): + super(VerifyArchiveTest, self).setUp() + + def scan_target(self, target): + parts = target.split('.') + ext = parts[-1] + if ext[0] == 't' or parts[-2] == 'tar': + self.load_tar(target) + elif ext[0] == 'z': + self.fail('Can not process zip yet') + else: + self.fail('Can not figure out the archive type for (%s)' % target) + + def load_tar(self, path): + self.paths = [] + with tarfile.open(path, 'r:*') as f: + i = 0 + for info in f: + self.paths.append(info.name) + + def assertMinSize(self, min_size): + """Check that the archive contains at least min_size entries. + + Args: + min_size: The minium number of targets we expect. + """ + actual_size = len(self.paths) + self.assertGreaterEqual( + len(self.paths), + min_size, + msg = "Expected at least %d files, but found only %d" % ( + min_size, actual_size)) + + def assertMaxSize(self, max_size): + """Check that the archive contains at most max_size entries. + + Args: + max_size: The maximum number of targets we expect. + """ + actual_size = len(self.paths) + self.assertLessEqual( + len(self.paths), + max_size, + msg = "Expected at most %d files, but found %d" % ( + max_size, actual_size)) + + def check_must_contain(self, must_contain): + plain_patterns = set(must_contain) + for path in self.paths: + if path in plain_patterns: + plain_patterns.remove(path) + if len(plain_patterns) > 0: + self.fail('These required paths were not found: %s' % ','.join(plain_patterns)) + + def check_must_not_contain(self, must_not_contain): + plain_patterns = set(must_not_contain) + for path in self.paths: + if path in plain_patterns: + self.fail('Found disallowed path (%s) in the archive' % path) + + def check_must_contain_regex(self, must_contain_regex): + for pattern in must_contain_regex: + r_comp = re.compile(pattern) + matched = False + for path in self.paths: + if r_comp.match(path): + matched = True + break + if not match: + self.fail('Did not find pattern (%s) in the archive' % pattern) + + def check_must_not_contain_regex(self, must_not_contain_regex): + for pattern in must_not_contain_regex: + r_comp = re.compile(pattern) + matched = False + for path in self.paths: + if r_comp.match(path): + self.fail('Found disallowed pattern (%s) in the archive' % pattern) diff --git a/pkg/verify_archive_test_main.py.tpl b/pkg/verify_archive_test_main.py.tpl new file mode 100644 index 0000000..d9b17cc --- /dev/null +++ b/pkg/verify_archive_test_main.py.tpl @@ -0,0 +1,46 @@ +# Copyright 2023 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for generated content manifest.""" + +import unittest + +from pkg import verify_archive_test_lib + +class ${TEST_NAME}(verify_archive_test_lib.VerifyArchiveTest): + + def setUp(self): + super(${TEST_NAME}, self).setUp() + self.scan_target('${TARGET}') + + def test_min_size(self): + self.assertMinSize(${MIN_SIZE}) + + def test_max_size(self): + self.assertMaxSize(${MAX_SIZE}) + + def test_must_contain(self): + self.check_must_contain(${MUST_CONTAIN}) + + def test_must_not_contain(self): + self.check_must_not_contain(${MUST_NOT_CONTAIN}) + + def test_must_not_contain(self): + self.check_must_contain_regex(${MUST_CONTAIN_REGEX}) + + def test_must_not_contain(self): + self.check_must_not_contain_regex(${MUST_NOT_CONTAIN_REGEX}) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/tar/BUILD b/tests/tar/BUILD index 19504e1..07111d4 100644 --- a/tests/tar/BUILD +++ b/tests/tar/BUILD @@ -16,6 +16,7 @@ # buildifier: disable=bzl-visibility load("//pkg:mappings.bzl", "pkg_files", "pkg_mklink", "strip_prefix") +load("//pkg:verify_archive.bzl", "verify_archive_test") load("//pkg/private/tar:tar.bzl", "SUPPORTED_TAR_COMPRESSIONS", "pkg_tar") load("//tests:my_package_name.bzl", "my_package_naming") load("//tests/util:defs.bzl", "directory", "fake_artifact", "link_tree") @@ -265,6 +266,25 @@ pkg_tar( ], ) +verify_archive_test( + name = "repackaging_long_filename_test", + target = ":test-tar-repackaging-long-filename", + must_contain = [ + "can_i_repackage_a_file_with_a_long_name/file_with_a_ridiculously_long_name_consectetur_adipiscing_elit_fusce_laoreet_lorem_neque_sed_pharetra_erat.txt", + ], + # there is really no need for these cases. I just want to use all the test capabilities. + must_not_contain = [ + "i_am_not here", + ], + must_contain_regex = [ + ".*can_i_repackage_a_file_with_a_long_name/$", + ], + must_not_contain_regex = [ + "^five.is.right.out", + ], + max_size = 2, +) + pkg_tar( name = "test-tar-tree-artifact", srcs = [ -- cgit v1.2.3