summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoraiuto <aiuto@google.com>2022-04-01 18:46:09 -0400
committerGitHub <noreply@github.com>2022-04-01 18:46:09 -0400
commit5acb94bb8eb26091519e99ff67ee3e42989df953 (patch)
tree2af723bc16615bec822938f36f382463bfa7eaf8
parent85bd2ad132ac0bebc5b0456bc6a58a0b97dd6d60 (diff)
downloadbazelbuild-rules_pkg-5acb94bb8eb26091519e99ff67ee3e42989df953.tar.gz
Split apart archive.py to archive and tar_writer (#557)
* split apart archive.py to archive and tar_writer No behavior change. Just make the division cleaner.
-rw-r--r--pkg/private/archive.py367
-rw-r--r--pkg/private/tar/BUILD14
-rw-r--r--pkg/private/tar/build_tar.py5
-rw-r--r--pkg/private/tar/tar_writer.py367
-rw-r--r--tests/BUILD22
-rw-r--r--tests/archive_test.py98
-rw-r--r--tests/deb/pkg_deb_test.py2
-rw-r--r--tests/tar/BUILD17
-rw-r--r--tests/tar/pkg_tar_test.py10
-rw-r--r--tests/tar/tar_writer_test.py (renamed from tests/tar/archive_test.py)107
10 files changed, 535 insertions, 474 deletions
diff --git a/pkg/private/archive.py b/pkg/private/archive.py
index 17e2da0..f88db97 100644
--- a/pkg/private/archive.py
+++ b/pkg/private/archive.py
@@ -11,33 +11,12 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-"""Archive manipulation library for the Docker rules."""
+"""Archive reader library for the .deb file testing."""
-# pylint: disable=g-import-not-at-top
-import gzip
import io
import os
-import subprocess
-import tarfile
-try:
- import lzma # pylint: disable=g-import-not-at-top, unused-import
- HAS_LZMA = True
-except ImportError:
- HAS_LZMA = False
-
-# This is slightly a lie. We do support xz fallback through the xz tool, but
-# that is fragile. Users should stick to the expectations provided here.
-COMPRESSIONS = ('', 'gz', 'bz2', 'xz') if HAS_LZMA else ('', 'gz', 'bz2')
-
-
-# Use a deterministic mtime that doesn't confuse other programs.
-# See: https://github.com/bazelbuild/bazel/issues/1299
-PORTABLE_MTIME = 946684800 # 2000-01-01 00:00:00.000 UTC
-
-_DEBUG_VERBOSITY = 0
-
-class SimpleArFile(object):
+class SimpleArReader(object):
"""A simple AR file reader.
This enable to read AR file (System V variant) as described
@@ -45,19 +24,15 @@ class SimpleArFile(object):
The standard usage of this class is:
- with SimpleArFile(filename) as ar:
+ with SimpleArReader(filename) as ar:
nextFile = ar.next()
while nextFile:
- if _DEBUG_VERBOSITY > 0:
- print(nextFile.filename)
+ print('This archive contains', nextFile.filename)
nextFile = ar.next()
Upon error, this class will raise a ArError exception.
"""
- # TODO(dmarting): We should use a standard library instead but python 2.7
- # does not have AR reading library.
-
class ArError(Exception):
pass
@@ -85,7 +60,7 @@ class SimpleArFile(object):
self.size = int(f.read(10).strip())
pad = f.read(2)
if pad != b'\x60\x0a':
- raise SimpleArFile.ArError('Invalid AR file header')
+ raise SimpleArReader.ArError('Invalid AR file header')
self.data = f.read(self.size)
MAGIC_STRING = b'!<arch>\n'
@@ -112,335 +87,3 @@ class SimpleArFile(object):
if self.f.tell() > os.fstat(self.f.fileno()).st_size - 60:
return None
return self.SimpleArFileEntry(self.f)
-
-
-class TarFileWriter(object):
- """A wrapper to write tar files."""
-
- class Error(Exception):
- pass
-
- def __init__(self,
- name,
- compression='',
- compressor='',
- root_directory='',
- default_mtime=None,
- preserve_tar_mtimes=True):
- """TarFileWriter wraps tarfile.open().
-
- Args:
- name: the tar file name.
- compression: compression type: bzip2, bz2, gz, tgz, xz, lzma.
- compressor: custom command to do the compression.
- root_directory: virtual root to prepend to elements in the archive.
- default_mtime: default mtime to use for elements in the archive.
- May be an integer or the value 'portable' to use the date
- 2000-01-01, which is compatible with non *nix OSes'.
- preserve_tar_mtimes: if true, keep file mtimes from input tar file.
- """
- self.preserve_mtime = preserve_tar_mtimes
- if default_mtime is None:
- self.default_mtime = 0
- elif default_mtime == 'portable':
- self.default_mtime = PORTABLE_MTIME
- else:
- self.default_mtime = int(default_mtime)
-
- self.fileobj = None
- self.compressor_cmd = (compressor or '').strip()
- if self.compressor_cmd:
- # Some custom command has been specified: no need for further
- # configuration, we're just going to use it.
- pass
- # Support xz compression through xz... until we can use Py3
- elif compression in ['xz', 'lzma']:
- if HAS_LZMA:
- mode = 'w:xz'
- else:
- self.compressor_cmd = 'xz -F {} -'.format(compression)
- elif compression in ['bzip2', 'bz2']:
- mode = 'w:bz2'
- else:
- mode = 'w:'
- if compression in ['tgz', 'gz']:
- # The Tarfile class doesn't allow us to specify gzip's mtime attribute.
- # Instead, we manually reimplement gzopen from tarfile.py and set mtime.
- self.fileobj = gzip.GzipFile(
- filename=name, mode='w', compresslevel=9, mtime=self.default_mtime)
- self.compressor_proc = None
- if self.compressor_cmd:
- mode = 'w|'
- self.compressor_proc = subprocess.Popen(self.compressor_cmd.split(),
- stdin=subprocess.PIPE,
- stdout=open(name, 'wb'))
- self.fileobj = self.compressor_proc.stdin
- self.name = name
- if root_directory:
- # tarfile uses / instead of os.path.sep, so convert to that
- self.root_directory = root_directory.replace(os.path.sep, '/')
- self.root_directory = self.root_directory.rstrip('/') + '/'
- else:
- self.root_directory = None
-
- self.tar = tarfile.open(name=name, mode=mode, fileobj=self.fileobj)
- self.members = set()
- self.directories = set()
- # Preseed the added directory list with things we should not add. If we
- # some day need to allow '.' or '/' as an explicit member of the archive,
- # we can adjust that here based on the setting of root_dirctory.
- self.directories.add('/')
- self.directories.add('./')
-
- def __enter__(self):
- return self
-
- def __exit__(self, t, v, traceback):
- self.close()
-
-
- def add_root_prefix(self, path: str) -> str:
- """Add the root prefix to a path.
-
- If the path begins with / or the prefix itself, do nothing.
-
- Args:
- path: a file path
- Returns:
- modified path.
- """
- path = path.replace(os.path.sep, '/').rstrip('/')
- while path.startswith('./'):
- path = path[2:]
- if not self.root_directory or path.startswith('/'):
- return path
- if (path + '/').startswith(self.root_directory):
- return path
- return self.root_directory + path
-
- def _have_added(self, path):
- """Have we added this file before."""
- return (path in self.members) or (path in self.directories)
-
- def _addfile(self, info, fileobj=None):
- """Add a file in the tar file if there is no conflict."""
- if info.type == tarfile.DIRTYPE:
- # Enforce the ending / for directories so we correctly deduplicate.
- if not info.name.endswith('/'):
- info.name += '/'
- if not self._have_added(info.name):
- self.tar.addfile(info, fileobj)
- self.members.add(info.name)
- if info.type == tarfile.DIRTYPE:
- self.directories.add(info.name)
- elif info.type != tarfile.DIRTYPE:
- print('Duplicate file in archive: %s, '
- 'picking first occurrence' % info.name)
-
- def add_directory_path(self,
- path,
- uid=0,
- gid=0,
- uname='',
- gname='',
- mtime=None,
- mode=0o755):
- """Add a directory to the current tar.
-
- Args:
- path: the ('/' delimited) path of the file to add.
- uid: owner user identifier.
- gid: owner group identifier.
- uname: owner user names.
- gname: owner group names.
- mtime: modification time to put in the archive.
- mode: unix permission mode of the file, default: 0755.
- """
- assert path[-1] == '/'
- if not path or self._have_added(path):
- return
- if _DEBUG_VERBOSITY > 1:
- print('DEBUG: adding directory', path)
- tarinfo = tarfile.TarInfo(path)
- tarinfo.type = tarfile.DIRTYPE
- tarinfo.mtime = mtime
- tarinfo.mode = mode
- tarinfo.uid = uid
- tarinfo.gid = gid
- tarinfo.uname = uname
- tarinfo.gname = gname
- self._addfile(tarinfo)
-
- def add_parents(self, path, uid=0, gid=0, uname='', gname='', mtime=0, mode=0o755):
- dirs = path.split('/')
- parent_path = ''
- for next_level in dirs[0:-1]:
- parent_path = parent_path + next_level + '/'
- self.add_directory_path(
- parent_path,
- uid=uid,
- gid=gid,
- uname=uname,
- gname=gname,
- mtime=mtime,
- mode=0o755)
-
- def add_file(self,
- name,
- kind=tarfile.REGTYPE,
- content=None,
- link=None,
- file_content=None,
- uid=0,
- gid=0,
- uname='',
- gname='',
- mtime=None,
- mode=None):
- """Add a file to the current tar.
-
- Args:
- name: the ('/' delimited) path of the file to add.
- kind: the type of the file to add, see tarfile.*TYPE.
- content: the content to put in the file.
- link: if the file is a link, the destination of the link.
- file_content: file to read the content from. Provide either this
- one or `content` to specifies a content for the file.
- uid: owner user identifier.
- gid: owner group identifier.
- uname: owner user names.
- gname: owner group names.
- mtime: modification time to put in the archive.
- mode: unix permission mode of the file, default 0644 (0755).
- """
- if not name:
- return
- if name == '.':
- return
- name = self.add_root_prefix(name)
- if name in self.members:
- return
-
- if mtime is None:
- mtime = self.default_mtime
-
- # Make directories up the file
- self.add_parents(name, mtime=mtime, mode=0o755, uid=uid, gid=gid, uname=uname, gname=gname)
-
- tarinfo = tarfile.TarInfo(name)
- tarinfo.mtime = mtime
- tarinfo.uid = uid
- tarinfo.gid = gid
- tarinfo.uname = uname
- tarinfo.gname = gname
- tarinfo.type = kind
- if mode is None:
- tarinfo.mode = 0o644 if kind == tarfile.REGTYPE else 0o755
- else:
- tarinfo.mode = mode
- if link:
- tarinfo.linkname = link
- if content:
- content_bytes = content.encode('utf-8')
- tarinfo.size = len(content_bytes)
- self._addfile(tarinfo, io.BytesIO(content_bytes))
- elif file_content:
- with open(file_content, 'rb') as f:
- tarinfo.size = os.fstat(f.fileno()).st_size
- self._addfile(tarinfo, f)
- else:
- self._addfile(tarinfo)
-
- def add_tar(self,
- tar,
- rootuid=None,
- rootgid=None,
- numeric=False,
- name_filter=None,
- prefix=None):
- """Merge a tar content into the current tar, stripping timestamp.
-
- Args:
- tar: the name of tar to extract and put content into the current tar.
- rootuid: user id that we will pretend is root (replaced by uid 0).
- rootgid: group id that we will pretend is root (replaced by gid 0).
- numeric: set to true to strip out name of owners (and just use the
- numeric values).
- name_filter: filter out file by names. If not none, this method will be
- called for each file to add, given the name and should return true if
- the file is to be added to the final tar and false otherwise.
- prefix: prefix to add to all file paths. This prefix is added to the
- incoming file path, then the overall root_directory will also be
- added.
-
- Raises:
- TarFileWriter.Error: if an error happens when uncompressing the tar file.
- """
- if prefix:
- prefix = prefix.strip('/') + '/'
- if _DEBUG_VERBOSITY > 1:
- print('========================== prefix is', prefix)
- intar = tarfile.open(name=tar, mode='r:*')
- for tarinfo in intar:
- if name_filter is None or name_filter(tarinfo.name):
- if not self.preserve_mtime:
- tarinfo.mtime = self.default_mtime
- if rootuid is not None and tarinfo.uid == rootuid:
- tarinfo.uid = 0
- tarinfo.uname = 'root'
- if rootgid is not None and tarinfo.gid == rootgid:
- tarinfo.gid = 0
- tarinfo.gname = 'root'
- if numeric:
- tarinfo.uname = ''
- tarinfo.gname = ''
-
- in_name = tarinfo.name
- if prefix:
- in_name = prefix + in_name
- tarinfo.name = self.add_root_prefix(in_name)
- self.add_parents(
- path=tarinfo.name,
- mtime=tarinfo.mtime,
- mode=0o755,
- uid=tarinfo.uid,
- gid=tarinfo.gid,
- uname=tarinfo.uname,
- gname=tarinfo.gname)
-
- if prefix is not None:
- # Relocate internal hardlinks as well to avoid breaking them.
- link = tarinfo.linkname
- if link.startswith('.') and tarinfo.type == tarfile.LNKTYPE:
- tarinfo.linkname = '.' + prefix + link.lstrip('.')
-
- # Remove path pax header to ensure that the proposed name is going
- # to be used. Without this, files with long names will not be
- # properly written to its new path.
- if 'path' in tarinfo.pax_headers:
- del tarinfo.pax_headers['path']
-
- if tarinfo.isfile():
- # use extractfile(tarinfo) instead of tarinfo.name to preserve
- # seek position in intar
- self._addfile(tarinfo, intar.extractfile(tarinfo))
- else:
- self._addfile(tarinfo)
- intar.close()
-
- def close(self):
- """Close the output tar file.
-
- This class should not be used anymore after calling that method.
-
- Raises:
- TarFileWriter.Error: if an error happens when compressing the output file.
- """
- self.tar.close()
- # Close the file object if necessary.
- if self.fileobj:
- self.fileobj.close()
- if self.compressor_proc and self.compressor_proc.wait() != 0:
- raise self.Error('Custom compression command '
- '"{}" failed'.format(self.compressor_cmd))
-
diff --git a/pkg/private/tar/BUILD b/pkg/private/tar/BUILD
index 4dc94e6..0717eae 100644
--- a/pkg/private/tar/BUILD
+++ b/pkg/private/tar/BUILD
@@ -16,7 +16,7 @@
All interfaces are subject to change at any time.
"""
-load("@rules_python//python:defs.bzl", "py_binary")
+load("@rules_python//python:defs.bzl", "py_binary", "py_library")
licenses(["notice"])
@@ -46,9 +46,21 @@ py_binary(
srcs_version = "PY3",
visibility = ["//visibility:public"],
deps = [
+ ":tar_writer",
"//pkg/private:archive",
"//pkg/private:build_info",
"//pkg/private:helpers",
"//pkg/private:manifest",
],
)
+
+py_library(
+ name = "tar_writer",
+ srcs = [
+ "tar_writer.py",
+ ],
+ srcs_version = "PY3",
+ visibility = [
+ "//tests:__subpackages__",
+ ],
+)
diff --git a/pkg/private/tar/build_tar.py b/pkg/private/tar/build_tar.py
index 7e7827c..ca4e566 100644
--- a/pkg/private/tar/build_tar.py
+++ b/pkg/private/tar/build_tar.py
@@ -23,6 +23,7 @@ from pkg.private import archive
from pkg.private import helpers
from pkg.private import build_info
from pkg.private import manifest
+from pkg.private.tar import tar_writer
def normpath(path):
@@ -52,7 +53,7 @@ class TarFile(object):
self.default_mtime = default_mtime
def __enter__(self):
- self.tarfile = archive.TarFileWriter(
+ self.tarfile = tar_writer.TarFileWriter(
self.output,
self.compression,
self.compressor,
@@ -199,7 +200,7 @@ class TarFile(object):
Raises:
DebError: if the format of the deb archive is incorrect.
"""
- with archive.SimpleArFile(deb) as arfile:
+ with archive.SimpleArReader(deb) as arfile:
current = next(arfile)
while current and not current.filename.startswith('data.'):
current = next(arfile)
diff --git a/pkg/private/tar/tar_writer.py b/pkg/private/tar/tar_writer.py
new file mode 100644
index 0000000..6da6f2f
--- /dev/null
+++ b/pkg/private/tar/tar_writer.py
@@ -0,0 +1,367 @@
+# Copyright 2022 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tar writing helper."""
+
+import gzip
+import io
+import os
+import subprocess
+import tarfile
+
+try:
+ import lzma # pylint: disable=g-import-not-at-top, unused-import
+ HAS_LZMA = True
+except ImportError:
+ HAS_LZMA = False
+
+# This is slightly a lie. We do support xz fallback through the xz tool, but
+# that is fragile. Users should stick to the expectations provided here.
+COMPRESSIONS = ('', 'gz', 'bz2', 'xz') if HAS_LZMA else ('', 'gz', 'bz2')
+
+# Use a deterministic mtime that doesn't confuse other programs.
+# See: https://github.com/bazelbuild/bazel/issues/1299
+PORTABLE_MTIME = 946684800 # 2000-01-01 00:00:00.000 UTC
+
+_DEBUG_VERBOSITY = 0
+
+
+class TarFileWriter(object):
+ """A wrapper to write tar files."""
+
+ class Error(Exception):
+ pass
+
+ def __init__(self,
+ name,
+ compression='',
+ compressor='',
+ root_directory='',
+ default_mtime=None,
+ preserve_tar_mtimes=True):
+ """TarFileWriter wraps tarfile.open().
+
+ Args:
+ name: the tar file name.
+ compression: compression type: bzip2, bz2, gz, tgz, xz, lzma.
+ compressor: custom command to do the compression.
+ root_directory: virtual root to prepend to elements in the archive.
+ default_mtime: default mtime to use for elements in the archive.
+ May be an integer or the value 'portable' to use the date
+ 2000-01-01, which is compatible with non *nix OSes'.
+ preserve_tar_mtimes: if true, keep file mtimes from input tar file.
+ """
+ self.preserve_mtime = preserve_tar_mtimes
+ if default_mtime is None:
+ self.default_mtime = 0
+ elif default_mtime == 'portable':
+ self.default_mtime = PORTABLE_MTIME
+ else:
+ self.default_mtime = int(default_mtime)
+
+ self.fileobj = None
+ self.compressor_cmd = (compressor or '').strip()
+ if self.compressor_cmd:
+ # Some custom command has been specified: no need for further
+ # configuration, we're just going to use it.
+ pass
+ # Support xz compression through xz... until we can use Py3
+ elif compression in ['xz', 'lzma']:
+ if HAS_LZMA:
+ mode = 'w:xz'
+ else:
+ self.compressor_cmd = 'xz -F {} -'.format(compression)
+ elif compression in ['bzip2', 'bz2']:
+ mode = 'w:bz2'
+ else:
+ mode = 'w:'
+ if compression in ['tgz', 'gz']:
+ # The Tarfile class doesn't allow us to specify gzip's mtime attribute.
+ # Instead, we manually reimplement gzopen from tarfile.py and set mtime.
+ self.fileobj = gzip.GzipFile(
+ filename=name, mode='w', compresslevel=9, mtime=self.default_mtime)
+ self.compressor_proc = None
+ if self.compressor_cmd:
+ mode = 'w|'
+ self.compressor_proc = subprocess.Popen(self.compressor_cmd.split(),
+ stdin=subprocess.PIPE,
+ stdout=open(name, 'wb'))
+ self.fileobj = self.compressor_proc.stdin
+ self.name = name
+ if root_directory:
+ # tarfile uses / instead of os.path.sep, so convert to that
+ self.root_directory = root_directory.replace(os.path.sep, '/')
+ self.root_directory = self.root_directory.rstrip('/') + '/'
+ else:
+ self.root_directory = None
+
+ self.tar = tarfile.open(name=name, mode=mode, fileobj=self.fileobj)
+ self.members = set()
+ self.directories = set()
+ # Preseed the added directory list with things we should not add. If we
+ # some day need to allow '.' or '/' as an explicit member of the archive,
+ # we can adjust that here based on the setting of root_dirctory.
+ self.directories.add('/')
+ self.directories.add('./')
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, t, v, traceback):
+ self.close()
+
+ def add_root_prefix(self, path: str) -> str:
+ """Add the root prefix to a path.
+
+ If the path begins with / or the prefix itself, do nothing.
+
+ Args:
+ path: a file path
+ Returns:
+ modified path.
+ """
+ path = path.replace(os.path.sep, '/').rstrip('/')
+ while path.startswith('./'):
+ path = path[2:]
+ if not self.root_directory or path.startswith('/'):
+ return path
+ if (path + '/').startswith(self.root_directory):
+ return path
+ return self.root_directory + path
+
+ def _have_added(self, path):
+ """Have we added this file before."""
+ return (path in self.members) or (path in self.directories)
+
+ def _addfile(self, info, fileobj=None):
+ """Add a file in the tar file if there is no conflict."""
+ if info.type == tarfile.DIRTYPE:
+ # Enforce the ending / for directories so we correctly deduplicate.
+ if not info.name.endswith('/'):
+ info.name += '/'
+ if not self._have_added(info.name):
+ self.tar.addfile(info, fileobj)
+ self.members.add(info.name)
+ if info.type == tarfile.DIRTYPE:
+ self.directories.add(info.name)
+ elif info.type != tarfile.DIRTYPE:
+ print('Duplicate file in archive: %s, '
+ 'picking first occurrence' % info.name)
+
+ def add_directory_path(self,
+ path,
+ uid=0,
+ gid=0,
+ uname='',
+ gname='',
+ mtime=None,
+ mode=0o755):
+ """Add a directory to the current tar.
+
+ Args:
+ path: the ('/' delimited) path of the file to add.
+ uid: owner user identifier.
+ gid: owner group identifier.
+ uname: owner user names.
+ gname: owner group names.
+ mtime: modification time to put in the archive.
+ mode: unix permission mode of the file, default: 0755.
+ """
+ assert path[-1] == '/'
+ if not path or self._have_added(path):
+ return
+ if _DEBUG_VERBOSITY > 1:
+ print('DEBUG: adding directory', path)
+ tarinfo = tarfile.TarInfo(path)
+ tarinfo.type = tarfile.DIRTYPE
+ tarinfo.mtime = mtime
+ tarinfo.mode = mode
+ tarinfo.uid = uid
+ tarinfo.gid = gid
+ tarinfo.uname = uname
+ tarinfo.gname = gname
+ self._addfile(tarinfo)
+
+ def add_parents(self, path, uid=0, gid=0, uname='', gname='', mtime=0, mode=0o755):
+ dirs = path.split('/')
+ parent_path = ''
+ for next_level in dirs[0:-1]:
+ parent_path = parent_path + next_level + '/'
+ self.add_directory_path(
+ parent_path,
+ uid=uid,
+ gid=gid,
+ uname=uname,
+ gname=gname,
+ mtime=mtime,
+ mode=0o755)
+
+ def add_file(self,
+ name,
+ kind=tarfile.REGTYPE,
+ content=None,
+ link=None,
+ file_content=None,
+ uid=0,
+ gid=0,
+ uname='',
+ gname='',
+ mtime=None,
+ mode=None):
+ """Add a file to the current tar.
+
+ Args:
+ name: the ('/' delimited) path of the file to add.
+ kind: the type of the file to add, see tarfile.*TYPE.
+ content: the content to put in the file.
+ link: if the file is a link, the destination of the link.
+ file_content: file to read the content from. Provide either this
+ one or `content` to specifies a content for the file.
+ uid: owner user identifier.
+ gid: owner group identifier.
+ uname: owner user names.
+ gname: owner group names.
+ mtime: modification time to put in the archive.
+ mode: unix permission mode of the file, default 0644 (0755).
+ """
+ if not name:
+ return
+ if name == '.':
+ return
+ name = self.add_root_prefix(name)
+ if name in self.members:
+ return
+
+ if mtime is None:
+ mtime = self.default_mtime
+
+ # Make directories up the file
+ self.add_parents(name, mtime=mtime, mode=0o755, uid=uid, gid=gid, uname=uname, gname=gname)
+
+ tarinfo = tarfile.TarInfo(name)
+ tarinfo.mtime = mtime
+ tarinfo.uid = uid
+ tarinfo.gid = gid
+ tarinfo.uname = uname
+ tarinfo.gname = gname
+ tarinfo.type = kind
+ if mode is None:
+ tarinfo.mode = 0o644 if kind == tarfile.REGTYPE else 0o755
+ else:
+ tarinfo.mode = mode
+ if link:
+ tarinfo.linkname = link
+ if content:
+ content_bytes = content.encode('utf-8')
+ tarinfo.size = len(content_bytes)
+ self._addfile(tarinfo, io.BytesIO(content_bytes))
+ elif file_content:
+ with open(file_content, 'rb') as f:
+ tarinfo.size = os.fstat(f.fileno()).st_size
+ self._addfile(tarinfo, f)
+ else:
+ self._addfile(tarinfo)
+
+ def add_tar(self,
+ tar,
+ rootuid=None,
+ rootgid=None,
+ numeric=False,
+ name_filter=None,
+ prefix=None):
+ """Merge a tar content into the current tar, stripping timestamp.
+
+ Args:
+ tar: the name of tar to extract and put content into the current tar.
+ rootuid: user id that we will pretend is root (replaced by uid 0).
+ rootgid: group id that we will pretend is root (replaced by gid 0).
+ numeric: set to true to strip out name of owners (and just use the
+ numeric values).
+ name_filter: filter out file by names. If not none, this method will be
+ called for each file to add, given the name and should return true if
+ the file is to be added to the final tar and false otherwise.
+ prefix: prefix to add to all file paths. This prefix is added to the
+ incoming file path, then the overall root_directory will also be
+ added.
+
+ Raises:
+ TarFileWriter.Error: if an error happens when uncompressing the tar file.
+ """
+ if prefix:
+ prefix = prefix.strip('/') + '/'
+ if _DEBUG_VERBOSITY > 1:
+ print('========================== prefix is', prefix)
+ intar = tarfile.open(name=tar, mode='r:*')
+ for tarinfo in intar:
+ if name_filter is None or name_filter(tarinfo.name):
+ if not self.preserve_mtime:
+ tarinfo.mtime = self.default_mtime
+ if rootuid is not None and tarinfo.uid == rootuid:
+ tarinfo.uid = 0
+ tarinfo.uname = 'root'
+ if rootgid is not None and tarinfo.gid == rootgid:
+ tarinfo.gid = 0
+ tarinfo.gname = 'root'
+ if numeric:
+ tarinfo.uname = ''
+ tarinfo.gname = ''
+
+ in_name = tarinfo.name
+ if prefix:
+ in_name = prefix + in_name
+ tarinfo.name = self.add_root_prefix(in_name)
+ self.add_parents(
+ path=tarinfo.name,
+ mtime=tarinfo.mtime,
+ mode=0o755,
+ uid=tarinfo.uid,
+ gid=tarinfo.gid,
+ uname=tarinfo.uname,
+ gname=tarinfo.gname)
+
+ if prefix is not None:
+ # Relocate internal hardlinks as well to avoid breaking them.
+ link = tarinfo.linkname
+ if link.startswith('.') and tarinfo.type == tarfile.LNKTYPE:
+ tarinfo.linkname = '.' + prefix + link.lstrip('.')
+
+ # Remove path pax header to ensure that the proposed name is going
+ # to be used. Without this, files with long names will not be
+ # properly written to its new path.
+ if 'path' in tarinfo.pax_headers:
+ del tarinfo.pax_headers['path']
+
+ if tarinfo.isfile():
+ # use extractfile(tarinfo) instead of tarinfo.name to preserve
+ # seek position in intar
+ self._addfile(tarinfo, intar.extractfile(tarinfo))
+ else:
+ self._addfile(tarinfo)
+ intar.close()
+
+ def close(self):
+ """Close the output tar file.
+
+ This class should not be used anymore after calling that method.
+
+ Raises:
+ TarFileWriter.Error: if an error happens when compressing the output file.
+ """
+ self.tar.close()
+ # Close the file object if necessary.
+ if self.fileobj:
+ self.fileobj.close()
+ if self.compressor_proc and self.compressor_proc.wait() != 0:
+ raise self.Error('Custom compression command '
+ '"{}" failed'.format(self.compressor_cmd))
+
diff --git a/tests/BUILD b/tests/BUILD
index 8221529..7d0d8ed 100644
--- a/tests/BUILD
+++ b/tests/BUILD
@@ -37,6 +37,28 @@ filegroup(
)
py_test(
+ name = "archive_test",
+ srcs = [
+ "archive_test.py",
+ ],
+ data = [
+ "//tests:testdata/empty.ar",
+ "//tests:testdata/a_ab.ar",
+ "//tests:testdata/a.ar",
+ "//tests:testdata/a_b_ab.ar",
+ "//tests:testdata/a_b.ar",
+ "//tests:testdata/ab.ar",
+ "//tests:testdata/b.ar",
+ ],
+ python_version = "PY3",
+ srcs_version = "PY3",
+ deps = [
+ "//pkg/private:archive",
+ "@bazel_tools//tools/python/runfiles",
+ ],
+)
+
+py_test(
name = "path_test",
srcs = ["path_test.py"],
data = ["//pkg:path.bzl"],
diff --git a/tests/archive_test.py b/tests/archive_test.py
new file mode 100644
index 0000000..21ddd5e
--- /dev/null
+++ b/tests/archive_test.py
@@ -0,0 +1,98 @@
+# Copyright 2015 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Testing for archive."""
+
+import unittest
+
+from bazel_tools.tools.python.runfiles import runfiles
+from pkg.private import archive
+
+
+class SimpleArReaderTest(unittest.TestCase):
+ """Testing for SimpleArReader class."""
+
+ def setUp(self):
+ super(SimpleArReaderTest, self).setUp()
+ self.data_files = runfiles.Create()
+
+ def assertArFileContent(self, arfile, content):
+ """Assert that arfile contains exactly the entry described by `content`.
+
+ Args:
+ arfile: the path to the AR file to test.
+ content: an array describing the expected content of the AR file.
+ Each entry in that list should be a dictionary where each field
+ is a field to test in the corresponding SimpleArFileEntry. For
+ testing the presence of a file "x", then the entry could simply
+ be `{"filename": "x"}`, the missing field will be ignored.
+ """
+ print("READING: %s" % arfile)
+ with archive.SimpleArReader(arfile) as f:
+ current = f.next()
+ i = 0
+ while current:
+ error_msg = "Extraneous file at end of archive %s: %s" % (
+ arfile,
+ current.filename
+ )
+ self.assertLess(i, len(content), error_msg)
+ for k, v in content[i].items():
+ value = getattr(current, k)
+ error_msg = " ".join([
+ "Value `%s` for key `%s` of file" % (value, k),
+ "%s in archive %s does" % (current.filename, arfile),
+ "not match expected value `%s`" % v
+ ])
+ self.assertEqual(value, v, error_msg)
+ current = f.next()
+ i += 1
+ if i < len(content):
+ self.fail("Missing file %s in archive %s" % (content[i], arfile))
+
+ def testEmptyArFile(self):
+ self.assertArFileContent(
+ self.data_files.Rlocation("rules_pkg/tests/testdata/empty.ar"),
+ [])
+
+ def assertSimpleFileContent(self, names):
+ datafile = self.data_files.Rlocation(
+ "rules_pkg/tests/testdata/" + "_".join(names) + ".ar")
+ # pylint: disable=g-complex-comprehension
+ content = [{"filename": n,
+ "size": len(n.encode("utf-8")),
+ "data": n.encode("utf-8")}
+ for n in names]
+ self.assertArFileContent(datafile, content)
+
+ def testAFile(self):
+ self.assertSimpleFileContent(["a"])
+
+ def testBFile(self):
+ self.assertSimpleFileContent(["b"])
+
+ def testABFile(self):
+ self.assertSimpleFileContent(["ab"])
+
+ def testA_BFile(self):
+ self.assertSimpleFileContent(["a", "b"])
+
+ def testA_ABFile(self):
+ self.assertSimpleFileContent(["a", "ab"])
+
+ def testA_B_ABFile(self):
+ self.assertSimpleFileContent(["a", "b", "ab"])
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tests/deb/pkg_deb_test.py b/tests/deb/pkg_deb_test.py
index 49f934e..7cb7240 100644
--- a/tests/deb/pkg_deb_test.py
+++ b/tests/deb/pkg_deb_test.py
@@ -33,7 +33,7 @@ class DebInspect(object):
self.deb_version = None
self.data = None
self.control = None
- with archive.SimpleArFile(deb_file) as f:
+ with archive.SimpleArReader(deb_file) as f:
info = f.next()
while info:
if info.filename == 'debian-binary':
diff --git a/tests/tar/BUILD b/tests/tar/BUILD
index db2dcca..55b22dc 100644
--- a/tests/tar/BUILD
+++ b/tests/tar/BUILD
@@ -25,24 +25,15 @@ load("@bazel_skylib//rules:copy_file.bzl", "copy_file")
package(default_applicable_licenses = ["//:license"])
py_test(
- name = "archive_test",
+ name = "tar_writer_test",
srcs = [
- "archive_test.py",
+ "tar_writer_test.py",
],
data = [
":compressor",
":test_tar_compression",
":test_tar_package_dir",
":test_tar_package_dir_file",
- # TODO(aiuto): Some of these are used by pkg_deb tests. Figure out why
- # and eliminate the weird cross coupling.
- "//tests:testdata/empty.ar",
- "//tests:testdata/a_ab.ar",
- "//tests:testdata/a.ar",
- "//tests:testdata/a_b_ab.ar",
- "//tests:testdata/a_b.ar",
- "//tests:testdata/ab.ar",
- "//tests:testdata/b.ar",
"//tests:testdata/tar_test.tar",
"//tests:testdata/tar_test.tar.bz2",
"//tests:testdata/tar_test.tar.gz",
@@ -52,7 +43,7 @@ py_test(
python_version = "PY3",
srcs_version = "PY3",
deps = [
- "//pkg/private:archive",
+ "//pkg/private/tar:tar_writer",
"@bazel_tools//tools/python/runfiles",
],
)
@@ -327,7 +318,7 @@ py_test(
],
python_version = "PY3",
deps = [
- "//pkg/private:archive",
+ "//pkg/private/tar:tar_writer",
"@bazel_tools//tools/python/runfiles",
],
)
diff --git a/tests/tar/pkg_tar_test.py b/tests/tar/pkg_tar_test.py
index ec05a0d..255bdea 100644
--- a/tests/tar/pkg_tar_test.py
+++ b/tests/tar/pkg_tar_test.py
@@ -11,13 +11,13 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-"""Testing for archive."""
+"""Testing for pkg_tar."""
import tarfile
import unittest
from bazel_tools.tools.python.runfiles import runfiles
-from pkg.private import archive
+from pkg.private.tar import tar_writer
PORTABLE_MTIME = 946684800 # 2000-01-01 00:00:00.000 UTC
@@ -156,7 +156,8 @@ class PkgTarTest(unittest.TestCase):
'mode': 0o755,
'uid': 42, 'gid': 24, 'uname': 'titi', 'gname': 'tata'},
]
- for ext in [('.' + comp if comp else '') for comp in archive.COMPRESSIONS]:
+ for ext in [('.' + comp if comp else '')
+ for comp in tar_writer.COMPRESSIONS]:
with self.subTest(ext=ext):
self.assertTarFileContent('test-tar-basic-%s.tar%s' % (ext[1:], ext),
content)
@@ -171,7 +172,8 @@ class PkgTarTest(unittest.TestCase):
{'name': 'usr/titi', 'mode': 0o755, 'uid': 42, 'gid': 24},
{'name': 'BUILD'},
]
- for ext in [('.' + comp if comp else '') for comp in archive.COMPRESSIONS]:
+ for ext in [('.' + comp if comp else '')
+ for comp in tar_writer.COMPRESSIONS]:
with self.subTest(ext=ext):
self.assertTarFileContent('test-tar-inclusion-%s.tar' % ext[1:],
content)
diff --git a/tests/tar/archive_test.py b/tests/tar/tar_writer_test.py
index 659dd2a..f3ff3e5 100644
--- a/tests/tar/archive_test.py
+++ b/tests/tar/tar_writer_test.py
@@ -11,92 +11,17 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-"""Testing for archive."""
+"""Testing for tar_writer."""
import os
import tarfile
import unittest
from bazel_tools.tools.python.runfiles import runfiles
-from pkg.private import archive
+from pkg.private.tar import tar_writer
from tests.tar import compressor
-class SimpleArFileTest(unittest.TestCase):
- """Testing for SimpleArFile class."""
-
- def setUp(self):
- super(SimpleArFileTest, self).setUp()
- self.data_files = runfiles.Create()
-
- def assertArFileContent(self, arfile, content):
- """Assert that arfile contains exactly the entry described by `content`.
-
- Args:
- arfile: the path to the AR file to test.
- content: an array describing the expected content of the AR file.
- Each entry in that list should be a dictionary where each field
- is a field to test in the corresponding SimpleArFileEntry. For
- testing the presence of a file "x", then the entry could simply
- be `{"filename": "x"}`, the missing field will be ignored.
- """
- print("READING: %s" % arfile)
- with archive.SimpleArFile(arfile) as f:
- current = f.next()
- i = 0
- while current:
- error_msg = "Extraneous file at end of archive %s: %s" % (
- arfile,
- current.filename
- )
- self.assertLess(i, len(content), error_msg)
- for k, v in content[i].items():
- value = getattr(current, k)
- error_msg = " ".join([
- "Value `%s` for key `%s` of file" % (value, k),
- "%s in archive %s does" % (current.filename, arfile),
- "not match expected value `%s`" % v
- ])
- self.assertEqual(value, v, error_msg)
- current = f.next()
- i += 1
- if i < len(content):
- self.fail("Missing file %s in archive %s" % (content[i], arfile))
-
- def testEmptyArFile(self):
- self.assertArFileContent(
- self.data_files.Rlocation("rules_pkg/tests/testdata/empty.ar"),
- [])
-
- def assertSimpleFileContent(self, names):
- datafile = self.data_files.Rlocation(
- "rules_pkg/tests/testdata/" + "_".join(names) + ".ar")
- # pylint: disable=g-complex-comprehension
- content = [{"filename": n,
- "size": len(n.encode("utf-8")),
- "data": n.encode("utf-8")}
- for n in names]
- self.assertArFileContent(datafile, content)
-
- def testAFile(self):
- self.assertSimpleFileContent(["a"])
-
- def testBFile(self):
- self.assertSimpleFileContent(["b"])
-
- def testABFile(self):
- self.assertSimpleFileContent(["ab"])
-
- def testA_BFile(self):
- self.assertSimpleFileContent(["a", "b"])
-
- def testA_ABFile(self):
- self.assertSimpleFileContent(["a", "ab"])
-
- def testA_B_ABFile(self):
- self.assertSimpleFileContent(["a", "b", "ab"])
-
-
class TarFileWriterTest(unittest.TestCase):
"""Testing for TarFileWriter class."""
@@ -154,12 +79,12 @@ class TarFileWriterTest(unittest.TestCase):
os.remove(self.tempfile)
def testEmptyTarFile(self):
- with archive.TarFileWriter(self.tempfile):
+ with tar_writer.TarFileWriter(self.tempfile):
pass
self.assertTarFileContent(self.tempfile, [])
def assertSimpleFileContent(self, names):
- with archive.TarFileWriter(self.tempfile) as f:
+ with tar_writer.TarFileWriter(self.tempfile) as f:
for n in names:
f.add_file(n, content=n)
# pylint: disable=g-complex-comprehension
@@ -178,7 +103,7 @@ class TarFileWriterTest(unittest.TestCase):
self.assertSimpleFileContent(["./a", "./b", "./ab"])
def testDottedFiles(self):
- with archive.TarFileWriter(self.tempfile) as f:
+ with tar_writer.TarFileWriter(self.tempfile) as f:
f.add_file("a")
f.add_file("/b")
f.add_file("./c")
@@ -200,8 +125,8 @@ class TarFileWriterTest(unittest.TestCase):
{"name": "a", "data": b"a"},
{"name": "ab", "data": b"ab"},
]
- for ext in [("." + comp if comp else "") for comp in archive.COMPRESSIONS]:
- with archive.TarFileWriter(self.tempfile) as f:
+ for ext in [("." + comp if comp else "") for comp in tar_writer.COMPRESSIONS]:
+ with tar_writer.TarFileWriter(self.tempfile) as f:
datafile = self.data_files.Rlocation(
"rules_pkg/tests/testdata/tar_test.tar" + ext)
f.add_tar(datafile, name_filter=lambda n: n != "./b")
@@ -213,26 +138,26 @@ class TarFileWriterTest(unittest.TestCase):
{"name": "foo/a", "data": b"a"},
{"name": "foo/ab", "data": b"ab"},
]
- with archive.TarFileWriter(self.tempfile, root_directory="foo") as f:
+ with tar_writer.TarFileWriter(self.tempfile, root_directory="foo") as f:
datafile = self.data_files.Rlocation(
"rules_pkg/tests/testdata/tar_test.tar")
f.add_tar(datafile, name_filter=lambda n: n != "./b")
self.assertTarFileContent(self.tempfile, content)
def testDefaultMtimeNotProvided(self):
- with archive.TarFileWriter(self.tempfile) as f:
+ with tar_writer.TarFileWriter(self.tempfile) as f:
self.assertEqual(f.default_mtime, 0)
def testDefaultMtimeProvided(self):
- with archive.TarFileWriter(self.tempfile, default_mtime=1234) as f:
+ with tar_writer.TarFileWriter(self.tempfile, default_mtime=1234) as f:
self.assertEqual(f.default_mtime, 1234)
def testPortableMtime(self):
- with archive.TarFileWriter(self.tempfile, default_mtime="portable") as f:
+ with tar_writer.TarFileWriter(self.tempfile, default_mtime="portable") as f:
self.assertEqual(f.default_mtime, 946684800)
def testPreserveTarMtimesTrueByDefault(self):
- with archive.TarFileWriter(self.tempfile) as f:
+ with tar_writer.TarFileWriter(self.tempfile) as f:
input_tar_path = self.data_files.Rlocation(
"rules_pkg/tests/testdata/tar_test.tar")
f.add_tar(input_tar_path)
@@ -245,7 +170,7 @@ class TarFileWriterTest(unittest.TestCase):
self.assertEqual(input_file.mtime, output_file.mtime)
def testPreserveTarMtimesFalse(self):
- with archive.TarFileWriter(self.tempfile, preserve_tar_mtimes=False) as f:
+ with tar_writer.TarFileWriter(self.tempfile, preserve_tar_mtimes=False) as f:
input_tar_path = self.data_files.Rlocation(
"rules_pkg/tests/testdata/tar_test.tar")
f.add_tar(input_tar_path)
@@ -253,7 +178,7 @@ class TarFileWriterTest(unittest.TestCase):
self.assertEqual(output_file.mtime, 0)
def testAddingDirectoriesForFile(self):
- with archive.TarFileWriter(self.tempfile) as f:
+ with tar_writer.TarFileWriter(self.tempfile) as f:
f.add_file("d/f")
content = [
{"name": "d", "mode": 0o755},
@@ -262,7 +187,7 @@ class TarFileWriterTest(unittest.TestCase):
self.assertTarFileContent(self.tempfile, content)
def testAddingDirectoriesForFileManually(self):
- with archive.TarFileWriter(self.tempfile) as f:
+ with tar_writer.TarFileWriter(self.tempfile) as f:
f.add_file("d", tarfile.DIRTYPE)
f.add_file("d/f")
@@ -288,7 +213,7 @@ class TarFileWriterTest(unittest.TestCase):
self.assertTarFileContent(self.tempfile, content)
def testChangingRootDirectory(self):
- with archive.TarFileWriter(self.tempfile, root_directory="root") as f:
+ with tar_writer.TarFileWriter(self.tempfile, root_directory="root") as f:
f.add_file("d", tarfile.DIRTYPE)
f.add_file("d/f")