diff options
author | aiuto <aiuto@google.com> | 2022-04-01 18:46:09 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-04-01 18:46:09 -0400 |
commit | 5acb94bb8eb26091519e99ff67ee3e42989df953 (patch) | |
tree | 2af723bc16615bec822938f36f382463bfa7eaf8 | |
parent | 85bd2ad132ac0bebc5b0456bc6a58a0b97dd6d60 (diff) | |
download | bazelbuild-rules_pkg-5acb94bb8eb26091519e99ff67ee3e42989df953.tar.gz |
Split apart archive.py to archive and tar_writer (#557)
* split apart archive.py to archive and tar_writer
No behavior change. Just make the division cleaner.
-rw-r--r-- | pkg/private/archive.py | 367 | ||||
-rw-r--r-- | pkg/private/tar/BUILD | 14 | ||||
-rw-r--r-- | pkg/private/tar/build_tar.py | 5 | ||||
-rw-r--r-- | pkg/private/tar/tar_writer.py | 367 | ||||
-rw-r--r-- | tests/BUILD | 22 | ||||
-rw-r--r-- | tests/archive_test.py | 98 | ||||
-rw-r--r-- | tests/deb/pkg_deb_test.py | 2 | ||||
-rw-r--r-- | tests/tar/BUILD | 17 | ||||
-rw-r--r-- | tests/tar/pkg_tar_test.py | 10 | ||||
-rw-r--r-- | tests/tar/tar_writer_test.py (renamed from tests/tar/archive_test.py) | 107 |
10 files changed, 535 insertions, 474 deletions
diff --git a/pkg/private/archive.py b/pkg/private/archive.py index 17e2da0..f88db97 100644 --- a/pkg/private/archive.py +++ b/pkg/private/archive.py @@ -11,33 +11,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Archive manipulation library for the Docker rules.""" +"""Archive reader library for the .deb file testing.""" -# pylint: disable=g-import-not-at-top -import gzip import io import os -import subprocess -import tarfile -try: - import lzma # pylint: disable=g-import-not-at-top, unused-import - HAS_LZMA = True -except ImportError: - HAS_LZMA = False - -# This is slightly a lie. We do support xz fallback through the xz tool, but -# that is fragile. Users should stick to the expectations provided here. -COMPRESSIONS = ('', 'gz', 'bz2', 'xz') if HAS_LZMA else ('', 'gz', 'bz2') - - -# Use a deterministic mtime that doesn't confuse other programs. -# See: https://github.com/bazelbuild/bazel/issues/1299 -PORTABLE_MTIME = 946684800 # 2000-01-01 00:00:00.000 UTC - -_DEBUG_VERBOSITY = 0 - -class SimpleArFile(object): +class SimpleArReader(object): """A simple AR file reader. This enable to read AR file (System V variant) as described @@ -45,19 +24,15 @@ class SimpleArFile(object): The standard usage of this class is: - with SimpleArFile(filename) as ar: + with SimpleArReader(filename) as ar: nextFile = ar.next() while nextFile: - if _DEBUG_VERBOSITY > 0: - print(nextFile.filename) + print('This archive contains', nextFile.filename) nextFile = ar.next() Upon error, this class will raise a ArError exception. """ - # TODO(dmarting): We should use a standard library instead but python 2.7 - # does not have AR reading library. - class ArError(Exception): pass @@ -85,7 +60,7 @@ class SimpleArFile(object): self.size = int(f.read(10).strip()) pad = f.read(2) if pad != b'\x60\x0a': - raise SimpleArFile.ArError('Invalid AR file header') + raise SimpleArReader.ArError('Invalid AR file header') self.data = f.read(self.size) MAGIC_STRING = b'!<arch>\n' @@ -112,335 +87,3 @@ class SimpleArFile(object): if self.f.tell() > os.fstat(self.f.fileno()).st_size - 60: return None return self.SimpleArFileEntry(self.f) - - -class TarFileWriter(object): - """A wrapper to write tar files.""" - - class Error(Exception): - pass - - def __init__(self, - name, - compression='', - compressor='', - root_directory='', - default_mtime=None, - preserve_tar_mtimes=True): - """TarFileWriter wraps tarfile.open(). - - Args: - name: the tar file name. - compression: compression type: bzip2, bz2, gz, tgz, xz, lzma. - compressor: custom command to do the compression. - root_directory: virtual root to prepend to elements in the archive. - default_mtime: default mtime to use for elements in the archive. - May be an integer or the value 'portable' to use the date - 2000-01-01, which is compatible with non *nix OSes'. - preserve_tar_mtimes: if true, keep file mtimes from input tar file. - """ - self.preserve_mtime = preserve_tar_mtimes - if default_mtime is None: - self.default_mtime = 0 - elif default_mtime == 'portable': - self.default_mtime = PORTABLE_MTIME - else: - self.default_mtime = int(default_mtime) - - self.fileobj = None - self.compressor_cmd = (compressor or '').strip() - if self.compressor_cmd: - # Some custom command has been specified: no need for further - # configuration, we're just going to use it. - pass - # Support xz compression through xz... until we can use Py3 - elif compression in ['xz', 'lzma']: - if HAS_LZMA: - mode = 'w:xz' - else: - self.compressor_cmd = 'xz -F {} -'.format(compression) - elif compression in ['bzip2', 'bz2']: - mode = 'w:bz2' - else: - mode = 'w:' - if compression in ['tgz', 'gz']: - # The Tarfile class doesn't allow us to specify gzip's mtime attribute. - # Instead, we manually reimplement gzopen from tarfile.py and set mtime. - self.fileobj = gzip.GzipFile( - filename=name, mode='w', compresslevel=9, mtime=self.default_mtime) - self.compressor_proc = None - if self.compressor_cmd: - mode = 'w|' - self.compressor_proc = subprocess.Popen(self.compressor_cmd.split(), - stdin=subprocess.PIPE, - stdout=open(name, 'wb')) - self.fileobj = self.compressor_proc.stdin - self.name = name - if root_directory: - # tarfile uses / instead of os.path.sep, so convert to that - self.root_directory = root_directory.replace(os.path.sep, '/') - self.root_directory = self.root_directory.rstrip('/') + '/' - else: - self.root_directory = None - - self.tar = tarfile.open(name=name, mode=mode, fileobj=self.fileobj) - self.members = set() - self.directories = set() - # Preseed the added directory list with things we should not add. If we - # some day need to allow '.' or '/' as an explicit member of the archive, - # we can adjust that here based on the setting of root_dirctory. - self.directories.add('/') - self.directories.add('./') - - def __enter__(self): - return self - - def __exit__(self, t, v, traceback): - self.close() - - - def add_root_prefix(self, path: str) -> str: - """Add the root prefix to a path. - - If the path begins with / or the prefix itself, do nothing. - - Args: - path: a file path - Returns: - modified path. - """ - path = path.replace(os.path.sep, '/').rstrip('/') - while path.startswith('./'): - path = path[2:] - if not self.root_directory or path.startswith('/'): - return path - if (path + '/').startswith(self.root_directory): - return path - return self.root_directory + path - - def _have_added(self, path): - """Have we added this file before.""" - return (path in self.members) or (path in self.directories) - - def _addfile(self, info, fileobj=None): - """Add a file in the tar file if there is no conflict.""" - if info.type == tarfile.DIRTYPE: - # Enforce the ending / for directories so we correctly deduplicate. - if not info.name.endswith('/'): - info.name += '/' - if not self._have_added(info.name): - self.tar.addfile(info, fileobj) - self.members.add(info.name) - if info.type == tarfile.DIRTYPE: - self.directories.add(info.name) - elif info.type != tarfile.DIRTYPE: - print('Duplicate file in archive: %s, ' - 'picking first occurrence' % info.name) - - def add_directory_path(self, - path, - uid=0, - gid=0, - uname='', - gname='', - mtime=None, - mode=0o755): - """Add a directory to the current tar. - - Args: - path: the ('/' delimited) path of the file to add. - uid: owner user identifier. - gid: owner group identifier. - uname: owner user names. - gname: owner group names. - mtime: modification time to put in the archive. - mode: unix permission mode of the file, default: 0755. - """ - assert path[-1] == '/' - if not path or self._have_added(path): - return - if _DEBUG_VERBOSITY > 1: - print('DEBUG: adding directory', path) - tarinfo = tarfile.TarInfo(path) - tarinfo.type = tarfile.DIRTYPE - tarinfo.mtime = mtime - tarinfo.mode = mode - tarinfo.uid = uid - tarinfo.gid = gid - tarinfo.uname = uname - tarinfo.gname = gname - self._addfile(tarinfo) - - def add_parents(self, path, uid=0, gid=0, uname='', gname='', mtime=0, mode=0o755): - dirs = path.split('/') - parent_path = '' - for next_level in dirs[0:-1]: - parent_path = parent_path + next_level + '/' - self.add_directory_path( - parent_path, - uid=uid, - gid=gid, - uname=uname, - gname=gname, - mtime=mtime, - mode=0o755) - - def add_file(self, - name, - kind=tarfile.REGTYPE, - content=None, - link=None, - file_content=None, - uid=0, - gid=0, - uname='', - gname='', - mtime=None, - mode=None): - """Add a file to the current tar. - - Args: - name: the ('/' delimited) path of the file to add. - kind: the type of the file to add, see tarfile.*TYPE. - content: the content to put in the file. - link: if the file is a link, the destination of the link. - file_content: file to read the content from. Provide either this - one or `content` to specifies a content for the file. - uid: owner user identifier. - gid: owner group identifier. - uname: owner user names. - gname: owner group names. - mtime: modification time to put in the archive. - mode: unix permission mode of the file, default 0644 (0755). - """ - if not name: - return - if name == '.': - return - name = self.add_root_prefix(name) - if name in self.members: - return - - if mtime is None: - mtime = self.default_mtime - - # Make directories up the file - self.add_parents(name, mtime=mtime, mode=0o755, uid=uid, gid=gid, uname=uname, gname=gname) - - tarinfo = tarfile.TarInfo(name) - tarinfo.mtime = mtime - tarinfo.uid = uid - tarinfo.gid = gid - tarinfo.uname = uname - tarinfo.gname = gname - tarinfo.type = kind - if mode is None: - tarinfo.mode = 0o644 if kind == tarfile.REGTYPE else 0o755 - else: - tarinfo.mode = mode - if link: - tarinfo.linkname = link - if content: - content_bytes = content.encode('utf-8') - tarinfo.size = len(content_bytes) - self._addfile(tarinfo, io.BytesIO(content_bytes)) - elif file_content: - with open(file_content, 'rb') as f: - tarinfo.size = os.fstat(f.fileno()).st_size - self._addfile(tarinfo, f) - else: - self._addfile(tarinfo) - - def add_tar(self, - tar, - rootuid=None, - rootgid=None, - numeric=False, - name_filter=None, - prefix=None): - """Merge a tar content into the current tar, stripping timestamp. - - Args: - tar: the name of tar to extract and put content into the current tar. - rootuid: user id that we will pretend is root (replaced by uid 0). - rootgid: group id that we will pretend is root (replaced by gid 0). - numeric: set to true to strip out name of owners (and just use the - numeric values). - name_filter: filter out file by names. If not none, this method will be - called for each file to add, given the name and should return true if - the file is to be added to the final tar and false otherwise. - prefix: prefix to add to all file paths. This prefix is added to the - incoming file path, then the overall root_directory will also be - added. - - Raises: - TarFileWriter.Error: if an error happens when uncompressing the tar file. - """ - if prefix: - prefix = prefix.strip('/') + '/' - if _DEBUG_VERBOSITY > 1: - print('========================== prefix is', prefix) - intar = tarfile.open(name=tar, mode='r:*') - for tarinfo in intar: - if name_filter is None or name_filter(tarinfo.name): - if not self.preserve_mtime: - tarinfo.mtime = self.default_mtime - if rootuid is not None and tarinfo.uid == rootuid: - tarinfo.uid = 0 - tarinfo.uname = 'root' - if rootgid is not None and tarinfo.gid == rootgid: - tarinfo.gid = 0 - tarinfo.gname = 'root' - if numeric: - tarinfo.uname = '' - tarinfo.gname = '' - - in_name = tarinfo.name - if prefix: - in_name = prefix + in_name - tarinfo.name = self.add_root_prefix(in_name) - self.add_parents( - path=tarinfo.name, - mtime=tarinfo.mtime, - mode=0o755, - uid=tarinfo.uid, - gid=tarinfo.gid, - uname=tarinfo.uname, - gname=tarinfo.gname) - - if prefix is not None: - # Relocate internal hardlinks as well to avoid breaking them. - link = tarinfo.linkname - if link.startswith('.') and tarinfo.type == tarfile.LNKTYPE: - tarinfo.linkname = '.' + prefix + link.lstrip('.') - - # Remove path pax header to ensure that the proposed name is going - # to be used. Without this, files with long names will not be - # properly written to its new path. - if 'path' in tarinfo.pax_headers: - del tarinfo.pax_headers['path'] - - if tarinfo.isfile(): - # use extractfile(tarinfo) instead of tarinfo.name to preserve - # seek position in intar - self._addfile(tarinfo, intar.extractfile(tarinfo)) - else: - self._addfile(tarinfo) - intar.close() - - def close(self): - """Close the output tar file. - - This class should not be used anymore after calling that method. - - Raises: - TarFileWriter.Error: if an error happens when compressing the output file. - """ - self.tar.close() - # Close the file object if necessary. - if self.fileobj: - self.fileobj.close() - if self.compressor_proc and self.compressor_proc.wait() != 0: - raise self.Error('Custom compression command ' - '"{}" failed'.format(self.compressor_cmd)) - diff --git a/pkg/private/tar/BUILD b/pkg/private/tar/BUILD index 4dc94e6..0717eae 100644 --- a/pkg/private/tar/BUILD +++ b/pkg/private/tar/BUILD @@ -16,7 +16,7 @@ All interfaces are subject to change at any time. """ -load("@rules_python//python:defs.bzl", "py_binary") +load("@rules_python//python:defs.bzl", "py_binary", "py_library") licenses(["notice"]) @@ -46,9 +46,21 @@ py_binary( srcs_version = "PY3", visibility = ["//visibility:public"], deps = [ + ":tar_writer", "//pkg/private:archive", "//pkg/private:build_info", "//pkg/private:helpers", "//pkg/private:manifest", ], ) + +py_library( + name = "tar_writer", + srcs = [ + "tar_writer.py", + ], + srcs_version = "PY3", + visibility = [ + "//tests:__subpackages__", + ], +) diff --git a/pkg/private/tar/build_tar.py b/pkg/private/tar/build_tar.py index 7e7827c..ca4e566 100644 --- a/pkg/private/tar/build_tar.py +++ b/pkg/private/tar/build_tar.py @@ -23,6 +23,7 @@ from pkg.private import archive from pkg.private import helpers from pkg.private import build_info from pkg.private import manifest +from pkg.private.tar import tar_writer def normpath(path): @@ -52,7 +53,7 @@ class TarFile(object): self.default_mtime = default_mtime def __enter__(self): - self.tarfile = archive.TarFileWriter( + self.tarfile = tar_writer.TarFileWriter( self.output, self.compression, self.compressor, @@ -199,7 +200,7 @@ class TarFile(object): Raises: DebError: if the format of the deb archive is incorrect. """ - with archive.SimpleArFile(deb) as arfile: + with archive.SimpleArReader(deb) as arfile: current = next(arfile) while current and not current.filename.startswith('data.'): current = next(arfile) diff --git a/pkg/private/tar/tar_writer.py b/pkg/private/tar/tar_writer.py new file mode 100644 index 0000000..6da6f2f --- /dev/null +++ b/pkg/private/tar/tar_writer.py @@ -0,0 +1,367 @@ +# Copyright 2022 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tar writing helper.""" + +import gzip +import io +import os +import subprocess +import tarfile + +try: + import lzma # pylint: disable=g-import-not-at-top, unused-import + HAS_LZMA = True +except ImportError: + HAS_LZMA = False + +# This is slightly a lie. We do support xz fallback through the xz tool, but +# that is fragile. Users should stick to the expectations provided here. +COMPRESSIONS = ('', 'gz', 'bz2', 'xz') if HAS_LZMA else ('', 'gz', 'bz2') + +# Use a deterministic mtime that doesn't confuse other programs. +# See: https://github.com/bazelbuild/bazel/issues/1299 +PORTABLE_MTIME = 946684800 # 2000-01-01 00:00:00.000 UTC + +_DEBUG_VERBOSITY = 0 + + +class TarFileWriter(object): + """A wrapper to write tar files.""" + + class Error(Exception): + pass + + def __init__(self, + name, + compression='', + compressor='', + root_directory='', + default_mtime=None, + preserve_tar_mtimes=True): + """TarFileWriter wraps tarfile.open(). + + Args: + name: the tar file name. + compression: compression type: bzip2, bz2, gz, tgz, xz, lzma. + compressor: custom command to do the compression. + root_directory: virtual root to prepend to elements in the archive. + default_mtime: default mtime to use for elements in the archive. + May be an integer or the value 'portable' to use the date + 2000-01-01, which is compatible with non *nix OSes'. + preserve_tar_mtimes: if true, keep file mtimes from input tar file. + """ + self.preserve_mtime = preserve_tar_mtimes + if default_mtime is None: + self.default_mtime = 0 + elif default_mtime == 'portable': + self.default_mtime = PORTABLE_MTIME + else: + self.default_mtime = int(default_mtime) + + self.fileobj = None + self.compressor_cmd = (compressor or '').strip() + if self.compressor_cmd: + # Some custom command has been specified: no need for further + # configuration, we're just going to use it. + pass + # Support xz compression through xz... until we can use Py3 + elif compression in ['xz', 'lzma']: + if HAS_LZMA: + mode = 'w:xz' + else: + self.compressor_cmd = 'xz -F {} -'.format(compression) + elif compression in ['bzip2', 'bz2']: + mode = 'w:bz2' + else: + mode = 'w:' + if compression in ['tgz', 'gz']: + # The Tarfile class doesn't allow us to specify gzip's mtime attribute. + # Instead, we manually reimplement gzopen from tarfile.py and set mtime. + self.fileobj = gzip.GzipFile( + filename=name, mode='w', compresslevel=9, mtime=self.default_mtime) + self.compressor_proc = None + if self.compressor_cmd: + mode = 'w|' + self.compressor_proc = subprocess.Popen(self.compressor_cmd.split(), + stdin=subprocess.PIPE, + stdout=open(name, 'wb')) + self.fileobj = self.compressor_proc.stdin + self.name = name + if root_directory: + # tarfile uses / instead of os.path.sep, so convert to that + self.root_directory = root_directory.replace(os.path.sep, '/') + self.root_directory = self.root_directory.rstrip('/') + '/' + else: + self.root_directory = None + + self.tar = tarfile.open(name=name, mode=mode, fileobj=self.fileobj) + self.members = set() + self.directories = set() + # Preseed the added directory list with things we should not add. If we + # some day need to allow '.' or '/' as an explicit member of the archive, + # we can adjust that here based on the setting of root_dirctory. + self.directories.add('/') + self.directories.add('./') + + def __enter__(self): + return self + + def __exit__(self, t, v, traceback): + self.close() + + def add_root_prefix(self, path: str) -> str: + """Add the root prefix to a path. + + If the path begins with / or the prefix itself, do nothing. + + Args: + path: a file path + Returns: + modified path. + """ + path = path.replace(os.path.sep, '/').rstrip('/') + while path.startswith('./'): + path = path[2:] + if not self.root_directory or path.startswith('/'): + return path + if (path + '/').startswith(self.root_directory): + return path + return self.root_directory + path + + def _have_added(self, path): + """Have we added this file before.""" + return (path in self.members) or (path in self.directories) + + def _addfile(self, info, fileobj=None): + """Add a file in the tar file if there is no conflict.""" + if info.type == tarfile.DIRTYPE: + # Enforce the ending / for directories so we correctly deduplicate. + if not info.name.endswith('/'): + info.name += '/' + if not self._have_added(info.name): + self.tar.addfile(info, fileobj) + self.members.add(info.name) + if info.type == tarfile.DIRTYPE: + self.directories.add(info.name) + elif info.type != tarfile.DIRTYPE: + print('Duplicate file in archive: %s, ' + 'picking first occurrence' % info.name) + + def add_directory_path(self, + path, + uid=0, + gid=0, + uname='', + gname='', + mtime=None, + mode=0o755): + """Add a directory to the current tar. + + Args: + path: the ('/' delimited) path of the file to add. + uid: owner user identifier. + gid: owner group identifier. + uname: owner user names. + gname: owner group names. + mtime: modification time to put in the archive. + mode: unix permission mode of the file, default: 0755. + """ + assert path[-1] == '/' + if not path or self._have_added(path): + return + if _DEBUG_VERBOSITY > 1: + print('DEBUG: adding directory', path) + tarinfo = tarfile.TarInfo(path) + tarinfo.type = tarfile.DIRTYPE + tarinfo.mtime = mtime + tarinfo.mode = mode + tarinfo.uid = uid + tarinfo.gid = gid + tarinfo.uname = uname + tarinfo.gname = gname + self._addfile(tarinfo) + + def add_parents(self, path, uid=0, gid=0, uname='', gname='', mtime=0, mode=0o755): + dirs = path.split('/') + parent_path = '' + for next_level in dirs[0:-1]: + parent_path = parent_path + next_level + '/' + self.add_directory_path( + parent_path, + uid=uid, + gid=gid, + uname=uname, + gname=gname, + mtime=mtime, + mode=0o755) + + def add_file(self, + name, + kind=tarfile.REGTYPE, + content=None, + link=None, + file_content=None, + uid=0, + gid=0, + uname='', + gname='', + mtime=None, + mode=None): + """Add a file to the current tar. + + Args: + name: the ('/' delimited) path of the file to add. + kind: the type of the file to add, see tarfile.*TYPE. + content: the content to put in the file. + link: if the file is a link, the destination of the link. + file_content: file to read the content from. Provide either this + one or `content` to specifies a content for the file. + uid: owner user identifier. + gid: owner group identifier. + uname: owner user names. + gname: owner group names. + mtime: modification time to put in the archive. + mode: unix permission mode of the file, default 0644 (0755). + """ + if not name: + return + if name == '.': + return + name = self.add_root_prefix(name) + if name in self.members: + return + + if mtime is None: + mtime = self.default_mtime + + # Make directories up the file + self.add_parents(name, mtime=mtime, mode=0o755, uid=uid, gid=gid, uname=uname, gname=gname) + + tarinfo = tarfile.TarInfo(name) + tarinfo.mtime = mtime + tarinfo.uid = uid + tarinfo.gid = gid + tarinfo.uname = uname + tarinfo.gname = gname + tarinfo.type = kind + if mode is None: + tarinfo.mode = 0o644 if kind == tarfile.REGTYPE else 0o755 + else: + tarinfo.mode = mode + if link: + tarinfo.linkname = link + if content: + content_bytes = content.encode('utf-8') + tarinfo.size = len(content_bytes) + self._addfile(tarinfo, io.BytesIO(content_bytes)) + elif file_content: + with open(file_content, 'rb') as f: + tarinfo.size = os.fstat(f.fileno()).st_size + self._addfile(tarinfo, f) + else: + self._addfile(tarinfo) + + def add_tar(self, + tar, + rootuid=None, + rootgid=None, + numeric=False, + name_filter=None, + prefix=None): + """Merge a tar content into the current tar, stripping timestamp. + + Args: + tar: the name of tar to extract and put content into the current tar. + rootuid: user id that we will pretend is root (replaced by uid 0). + rootgid: group id that we will pretend is root (replaced by gid 0). + numeric: set to true to strip out name of owners (and just use the + numeric values). + name_filter: filter out file by names. If not none, this method will be + called for each file to add, given the name and should return true if + the file is to be added to the final tar and false otherwise. + prefix: prefix to add to all file paths. This prefix is added to the + incoming file path, then the overall root_directory will also be + added. + + Raises: + TarFileWriter.Error: if an error happens when uncompressing the tar file. + """ + if prefix: + prefix = prefix.strip('/') + '/' + if _DEBUG_VERBOSITY > 1: + print('========================== prefix is', prefix) + intar = tarfile.open(name=tar, mode='r:*') + for tarinfo in intar: + if name_filter is None or name_filter(tarinfo.name): + if not self.preserve_mtime: + tarinfo.mtime = self.default_mtime + if rootuid is not None and tarinfo.uid == rootuid: + tarinfo.uid = 0 + tarinfo.uname = 'root' + if rootgid is not None and tarinfo.gid == rootgid: + tarinfo.gid = 0 + tarinfo.gname = 'root' + if numeric: + tarinfo.uname = '' + tarinfo.gname = '' + + in_name = tarinfo.name + if prefix: + in_name = prefix + in_name + tarinfo.name = self.add_root_prefix(in_name) + self.add_parents( + path=tarinfo.name, + mtime=tarinfo.mtime, + mode=0o755, + uid=tarinfo.uid, + gid=tarinfo.gid, + uname=tarinfo.uname, + gname=tarinfo.gname) + + if prefix is not None: + # Relocate internal hardlinks as well to avoid breaking them. + link = tarinfo.linkname + if link.startswith('.') and tarinfo.type == tarfile.LNKTYPE: + tarinfo.linkname = '.' + prefix + link.lstrip('.') + + # Remove path pax header to ensure that the proposed name is going + # to be used. Without this, files with long names will not be + # properly written to its new path. + if 'path' in tarinfo.pax_headers: + del tarinfo.pax_headers['path'] + + if tarinfo.isfile(): + # use extractfile(tarinfo) instead of tarinfo.name to preserve + # seek position in intar + self._addfile(tarinfo, intar.extractfile(tarinfo)) + else: + self._addfile(tarinfo) + intar.close() + + def close(self): + """Close the output tar file. + + This class should not be used anymore after calling that method. + + Raises: + TarFileWriter.Error: if an error happens when compressing the output file. + """ + self.tar.close() + # Close the file object if necessary. + if self.fileobj: + self.fileobj.close() + if self.compressor_proc and self.compressor_proc.wait() != 0: + raise self.Error('Custom compression command ' + '"{}" failed'.format(self.compressor_cmd)) + diff --git a/tests/BUILD b/tests/BUILD index 8221529..7d0d8ed 100644 --- a/tests/BUILD +++ b/tests/BUILD @@ -37,6 +37,28 @@ filegroup( ) py_test( + name = "archive_test", + srcs = [ + "archive_test.py", + ], + data = [ + "//tests:testdata/empty.ar", + "//tests:testdata/a_ab.ar", + "//tests:testdata/a.ar", + "//tests:testdata/a_b_ab.ar", + "//tests:testdata/a_b.ar", + "//tests:testdata/ab.ar", + "//tests:testdata/b.ar", + ], + python_version = "PY3", + srcs_version = "PY3", + deps = [ + "//pkg/private:archive", + "@bazel_tools//tools/python/runfiles", + ], +) + +py_test( name = "path_test", srcs = ["path_test.py"], data = ["//pkg:path.bzl"], diff --git a/tests/archive_test.py b/tests/archive_test.py new file mode 100644 index 0000000..21ddd5e --- /dev/null +++ b/tests/archive_test.py @@ -0,0 +1,98 @@ +# Copyright 2015 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Testing for archive.""" + +import unittest + +from bazel_tools.tools.python.runfiles import runfiles +from pkg.private import archive + + +class SimpleArReaderTest(unittest.TestCase): + """Testing for SimpleArReader class.""" + + def setUp(self): + super(SimpleArReaderTest, self).setUp() + self.data_files = runfiles.Create() + + def assertArFileContent(self, arfile, content): + """Assert that arfile contains exactly the entry described by `content`. + + Args: + arfile: the path to the AR file to test. + content: an array describing the expected content of the AR file. + Each entry in that list should be a dictionary where each field + is a field to test in the corresponding SimpleArFileEntry. For + testing the presence of a file "x", then the entry could simply + be `{"filename": "x"}`, the missing field will be ignored. + """ + print("READING: %s" % arfile) + with archive.SimpleArReader(arfile) as f: + current = f.next() + i = 0 + while current: + error_msg = "Extraneous file at end of archive %s: %s" % ( + arfile, + current.filename + ) + self.assertLess(i, len(content), error_msg) + for k, v in content[i].items(): + value = getattr(current, k) + error_msg = " ".join([ + "Value `%s` for key `%s` of file" % (value, k), + "%s in archive %s does" % (current.filename, arfile), + "not match expected value `%s`" % v + ]) + self.assertEqual(value, v, error_msg) + current = f.next() + i += 1 + if i < len(content): + self.fail("Missing file %s in archive %s" % (content[i], arfile)) + + def testEmptyArFile(self): + self.assertArFileContent( + self.data_files.Rlocation("rules_pkg/tests/testdata/empty.ar"), + []) + + def assertSimpleFileContent(self, names): + datafile = self.data_files.Rlocation( + "rules_pkg/tests/testdata/" + "_".join(names) + ".ar") + # pylint: disable=g-complex-comprehension + content = [{"filename": n, + "size": len(n.encode("utf-8")), + "data": n.encode("utf-8")} + for n in names] + self.assertArFileContent(datafile, content) + + def testAFile(self): + self.assertSimpleFileContent(["a"]) + + def testBFile(self): + self.assertSimpleFileContent(["b"]) + + def testABFile(self): + self.assertSimpleFileContent(["ab"]) + + def testA_BFile(self): + self.assertSimpleFileContent(["a", "b"]) + + def testA_ABFile(self): + self.assertSimpleFileContent(["a", "ab"]) + + def testA_B_ABFile(self): + self.assertSimpleFileContent(["a", "b", "ab"]) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/deb/pkg_deb_test.py b/tests/deb/pkg_deb_test.py index 49f934e..7cb7240 100644 --- a/tests/deb/pkg_deb_test.py +++ b/tests/deb/pkg_deb_test.py @@ -33,7 +33,7 @@ class DebInspect(object): self.deb_version = None self.data = None self.control = None - with archive.SimpleArFile(deb_file) as f: + with archive.SimpleArReader(deb_file) as f: info = f.next() while info: if info.filename == 'debian-binary': diff --git a/tests/tar/BUILD b/tests/tar/BUILD index db2dcca..55b22dc 100644 --- a/tests/tar/BUILD +++ b/tests/tar/BUILD @@ -25,24 +25,15 @@ load("@bazel_skylib//rules:copy_file.bzl", "copy_file") package(default_applicable_licenses = ["//:license"]) py_test( - name = "archive_test", + name = "tar_writer_test", srcs = [ - "archive_test.py", + "tar_writer_test.py", ], data = [ ":compressor", ":test_tar_compression", ":test_tar_package_dir", ":test_tar_package_dir_file", - # TODO(aiuto): Some of these are used by pkg_deb tests. Figure out why - # and eliminate the weird cross coupling. - "//tests:testdata/empty.ar", - "//tests:testdata/a_ab.ar", - "//tests:testdata/a.ar", - "//tests:testdata/a_b_ab.ar", - "//tests:testdata/a_b.ar", - "//tests:testdata/ab.ar", - "//tests:testdata/b.ar", "//tests:testdata/tar_test.tar", "//tests:testdata/tar_test.tar.bz2", "//tests:testdata/tar_test.tar.gz", @@ -52,7 +43,7 @@ py_test( python_version = "PY3", srcs_version = "PY3", deps = [ - "//pkg/private:archive", + "//pkg/private/tar:tar_writer", "@bazel_tools//tools/python/runfiles", ], ) @@ -327,7 +318,7 @@ py_test( ], python_version = "PY3", deps = [ - "//pkg/private:archive", + "//pkg/private/tar:tar_writer", "@bazel_tools//tools/python/runfiles", ], ) diff --git a/tests/tar/pkg_tar_test.py b/tests/tar/pkg_tar_test.py index ec05a0d..255bdea 100644 --- a/tests/tar/pkg_tar_test.py +++ b/tests/tar/pkg_tar_test.py @@ -11,13 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Testing for archive.""" +"""Testing for pkg_tar.""" import tarfile import unittest from bazel_tools.tools.python.runfiles import runfiles -from pkg.private import archive +from pkg.private.tar import tar_writer PORTABLE_MTIME = 946684800 # 2000-01-01 00:00:00.000 UTC @@ -156,7 +156,8 @@ class PkgTarTest(unittest.TestCase): 'mode': 0o755, 'uid': 42, 'gid': 24, 'uname': 'titi', 'gname': 'tata'}, ] - for ext in [('.' + comp if comp else '') for comp in archive.COMPRESSIONS]: + for ext in [('.' + comp if comp else '') + for comp in tar_writer.COMPRESSIONS]: with self.subTest(ext=ext): self.assertTarFileContent('test-tar-basic-%s.tar%s' % (ext[1:], ext), content) @@ -171,7 +172,8 @@ class PkgTarTest(unittest.TestCase): {'name': 'usr/titi', 'mode': 0o755, 'uid': 42, 'gid': 24}, {'name': 'BUILD'}, ] - for ext in [('.' + comp if comp else '') for comp in archive.COMPRESSIONS]: + for ext in [('.' + comp if comp else '') + for comp in tar_writer.COMPRESSIONS]: with self.subTest(ext=ext): self.assertTarFileContent('test-tar-inclusion-%s.tar' % ext[1:], content) diff --git a/tests/tar/archive_test.py b/tests/tar/tar_writer_test.py index 659dd2a..f3ff3e5 100644 --- a/tests/tar/archive_test.py +++ b/tests/tar/tar_writer_test.py @@ -11,92 +11,17 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Testing for archive.""" +"""Testing for tar_writer.""" import os import tarfile import unittest from bazel_tools.tools.python.runfiles import runfiles -from pkg.private import archive +from pkg.private.tar import tar_writer from tests.tar import compressor -class SimpleArFileTest(unittest.TestCase): - """Testing for SimpleArFile class.""" - - def setUp(self): - super(SimpleArFileTest, self).setUp() - self.data_files = runfiles.Create() - - def assertArFileContent(self, arfile, content): - """Assert that arfile contains exactly the entry described by `content`. - - Args: - arfile: the path to the AR file to test. - content: an array describing the expected content of the AR file. - Each entry in that list should be a dictionary where each field - is a field to test in the corresponding SimpleArFileEntry. For - testing the presence of a file "x", then the entry could simply - be `{"filename": "x"}`, the missing field will be ignored. - """ - print("READING: %s" % arfile) - with archive.SimpleArFile(arfile) as f: - current = f.next() - i = 0 - while current: - error_msg = "Extraneous file at end of archive %s: %s" % ( - arfile, - current.filename - ) - self.assertLess(i, len(content), error_msg) - for k, v in content[i].items(): - value = getattr(current, k) - error_msg = " ".join([ - "Value `%s` for key `%s` of file" % (value, k), - "%s in archive %s does" % (current.filename, arfile), - "not match expected value `%s`" % v - ]) - self.assertEqual(value, v, error_msg) - current = f.next() - i += 1 - if i < len(content): - self.fail("Missing file %s in archive %s" % (content[i], arfile)) - - def testEmptyArFile(self): - self.assertArFileContent( - self.data_files.Rlocation("rules_pkg/tests/testdata/empty.ar"), - []) - - def assertSimpleFileContent(self, names): - datafile = self.data_files.Rlocation( - "rules_pkg/tests/testdata/" + "_".join(names) + ".ar") - # pylint: disable=g-complex-comprehension - content = [{"filename": n, - "size": len(n.encode("utf-8")), - "data": n.encode("utf-8")} - for n in names] - self.assertArFileContent(datafile, content) - - def testAFile(self): - self.assertSimpleFileContent(["a"]) - - def testBFile(self): - self.assertSimpleFileContent(["b"]) - - def testABFile(self): - self.assertSimpleFileContent(["ab"]) - - def testA_BFile(self): - self.assertSimpleFileContent(["a", "b"]) - - def testA_ABFile(self): - self.assertSimpleFileContent(["a", "ab"]) - - def testA_B_ABFile(self): - self.assertSimpleFileContent(["a", "b", "ab"]) - - class TarFileWriterTest(unittest.TestCase): """Testing for TarFileWriter class.""" @@ -154,12 +79,12 @@ class TarFileWriterTest(unittest.TestCase): os.remove(self.tempfile) def testEmptyTarFile(self): - with archive.TarFileWriter(self.tempfile): + with tar_writer.TarFileWriter(self.tempfile): pass self.assertTarFileContent(self.tempfile, []) def assertSimpleFileContent(self, names): - with archive.TarFileWriter(self.tempfile) as f: + with tar_writer.TarFileWriter(self.tempfile) as f: for n in names: f.add_file(n, content=n) # pylint: disable=g-complex-comprehension @@ -178,7 +103,7 @@ class TarFileWriterTest(unittest.TestCase): self.assertSimpleFileContent(["./a", "./b", "./ab"]) def testDottedFiles(self): - with archive.TarFileWriter(self.tempfile) as f: + with tar_writer.TarFileWriter(self.tempfile) as f: f.add_file("a") f.add_file("/b") f.add_file("./c") @@ -200,8 +125,8 @@ class TarFileWriterTest(unittest.TestCase): {"name": "a", "data": b"a"}, {"name": "ab", "data": b"ab"}, ] - for ext in [("." + comp if comp else "") for comp in archive.COMPRESSIONS]: - with archive.TarFileWriter(self.tempfile) as f: + for ext in [("." + comp if comp else "") for comp in tar_writer.COMPRESSIONS]: + with tar_writer.TarFileWriter(self.tempfile) as f: datafile = self.data_files.Rlocation( "rules_pkg/tests/testdata/tar_test.tar" + ext) f.add_tar(datafile, name_filter=lambda n: n != "./b") @@ -213,26 +138,26 @@ class TarFileWriterTest(unittest.TestCase): {"name": "foo/a", "data": b"a"}, {"name": "foo/ab", "data": b"ab"}, ] - with archive.TarFileWriter(self.tempfile, root_directory="foo") as f: + with tar_writer.TarFileWriter(self.tempfile, root_directory="foo") as f: datafile = self.data_files.Rlocation( "rules_pkg/tests/testdata/tar_test.tar") f.add_tar(datafile, name_filter=lambda n: n != "./b") self.assertTarFileContent(self.tempfile, content) def testDefaultMtimeNotProvided(self): - with archive.TarFileWriter(self.tempfile) as f: + with tar_writer.TarFileWriter(self.tempfile) as f: self.assertEqual(f.default_mtime, 0) def testDefaultMtimeProvided(self): - with archive.TarFileWriter(self.tempfile, default_mtime=1234) as f: + with tar_writer.TarFileWriter(self.tempfile, default_mtime=1234) as f: self.assertEqual(f.default_mtime, 1234) def testPortableMtime(self): - with archive.TarFileWriter(self.tempfile, default_mtime="portable") as f: + with tar_writer.TarFileWriter(self.tempfile, default_mtime="portable") as f: self.assertEqual(f.default_mtime, 946684800) def testPreserveTarMtimesTrueByDefault(self): - with archive.TarFileWriter(self.tempfile) as f: + with tar_writer.TarFileWriter(self.tempfile) as f: input_tar_path = self.data_files.Rlocation( "rules_pkg/tests/testdata/tar_test.tar") f.add_tar(input_tar_path) @@ -245,7 +170,7 @@ class TarFileWriterTest(unittest.TestCase): self.assertEqual(input_file.mtime, output_file.mtime) def testPreserveTarMtimesFalse(self): - with archive.TarFileWriter(self.tempfile, preserve_tar_mtimes=False) as f: + with tar_writer.TarFileWriter(self.tempfile, preserve_tar_mtimes=False) as f: input_tar_path = self.data_files.Rlocation( "rules_pkg/tests/testdata/tar_test.tar") f.add_tar(input_tar_path) @@ -253,7 +178,7 @@ class TarFileWriterTest(unittest.TestCase): self.assertEqual(output_file.mtime, 0) def testAddingDirectoriesForFile(self): - with archive.TarFileWriter(self.tempfile) as f: + with tar_writer.TarFileWriter(self.tempfile) as f: f.add_file("d/f") content = [ {"name": "d", "mode": 0o755}, @@ -262,7 +187,7 @@ class TarFileWriterTest(unittest.TestCase): self.assertTarFileContent(self.tempfile, content) def testAddingDirectoriesForFileManually(self): - with archive.TarFileWriter(self.tempfile) as f: + with tar_writer.TarFileWriter(self.tempfile) as f: f.add_file("d", tarfile.DIRTYPE) f.add_file("d/f") @@ -288,7 +213,7 @@ class TarFileWriterTest(unittest.TestCase): self.assertTarFileContent(self.tempfile, content) def testChangingRootDirectory(self): - with archive.TarFileWriter(self.tempfile, root_directory="root") as f: + with tar_writer.TarFileWriter(self.tempfile, root_directory="root") as f: f.add_file("d", tarfile.DIRTYPE) f.add_file("d/f") |