summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrançois Degros <fdegros@chromium.org>2022-02-09 06:06:16 +0000
committerCopybara-Service <copybara-worker@google.com>2022-02-08 22:16:54 -0800
commit172367d1f35fb67f09ab577ee35901815b6470af (patch)
treead576cfdcabda1becf875846adeb13dafab8c39e
parent3fc79233fe8ff5cf39fec4c8b8a46272d4f11cec (diff)
downloadzlib-172367d1f35fb67f09ab577ee35901815b6470af.tar.gz
[zip] Add ZipReader::Entry
Preparing the transition from (class) ZipReader::EntryInfo to (struct) ZipReader::Entry. This will remove boilerplate code, and will provide optimization and simplification opportunities. Avoid dynamically allocating the Entry over and over again. Reuse the same internal Entry object. BUG=chromium:1295127 TEST=autoninja -C out/Default zlib_unittests && out/Default/zlib_unittests Change-Id: Ie51e1bda05408b28f53957ea89c78d37d8009f5c Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/3446405 Reviewed-by: Noel Gordon <noel@chromium.org> Commit-Queue: François Degros <fdegros@chromium.org> Cr-Commit-Position: refs/heads/main@{#968767} NOKEYCHECK=True GitOrigin-RevId: 153ddbabb7339501c16310337d9b240c37929fb6
-rw-r--r--google/zip_reader.cc130
-rw-r--r--google/zip_reader.h146
-rw-r--r--google/zip_reader_unittest.cc10
3 files changed, 141 insertions, 145 deletions
diff --git a/google/zip_reader.cc b/google/zip_reader.cc
index 9209b6b..bd36741 100644
--- a/google/zip_reader.cc
+++ b/google/zip_reader.cc
@@ -115,48 +115,6 @@ void SetPosixFilePermissions(int fd, int mode) {
} // namespace
-ZipReader::EntryInfo::EntryInfo(base::FilePath file_path,
- std::string file_path_in_original_encoding,
- const unz_file_info& raw_file_info)
- : file_path_(std::move(file_path)),
- file_path_in_original_encoding_(
- std::move(file_path_in_original_encoding)),
- posix_mode_(0) {
- original_size_ = raw_file_info.uncompressed_size;
-
- // Directory entries in zip files end with "/".
- is_directory_ = base::EndsWith(file_path_in_original_encoding_, "/");
-
- // Check the file name here for directory traversal issues.
- // We also consider that the file name is unsafe, if it's absolute.
- // On Windows, IsAbsolute() returns false for paths starting with "/".
- is_unsafe_ = file_path_.ReferencesParent() || file_path_.IsAbsolute() ||
- base::StartsWith(file_path_in_original_encoding_, "/");
-
- // Whether the file is encrypted is bit 0 of the flag.
- is_encrypted_ = raw_file_info.flag & 1;
-
- // Construct the last modified time. The timezone info is not present in
- // zip files, so we construct the time as local time.
- base::Time::Exploded exploded_time = {}; // Zero-clear.
- exploded_time.year = raw_file_info.tmu_date.tm_year;
- // The month in zip file is 0-based, whereas ours is 1-based.
- exploded_time.month = raw_file_info.tmu_date.tm_mon + 1;
- exploded_time.day_of_month = raw_file_info.tmu_date.tm_mday;
- exploded_time.hour = raw_file_info.tmu_date.tm_hour;
- exploded_time.minute = raw_file_info.tmu_date.tm_min;
- exploded_time.second = raw_file_info.tmu_date.tm_sec;
- exploded_time.millisecond = 0;
-
- if (!base::Time::FromUTCExploded(exploded_time, &last_modified_))
- last_modified_ = base::Time::UnixEpoch();
-
-#if defined(OS_POSIX)
- posix_mode_ =
- (raw_file_info.external_fa >> 16L) & (S_IRWXU | S_IRWXG | S_IRWXO);
-#endif
-}
-
ZipReader::ZipReader() {
Reset();
}
@@ -165,12 +123,12 @@ ZipReader::~ZipReader() {
Close();
}
-bool ZipReader::Open(const base::FilePath& zip_file_path) {
+bool ZipReader::Open(const base::FilePath& zip_path) {
DCHECK(!zip_file_);
// Use of "Unsafe" function does not look good, but there is no way to do
// this safely on Linux. See file_util.h for details.
- zip_file_ = internal::OpenForUnzipping(zip_file_path.AsUTF8Unsafe());
+ zip_file_ = internal::OpenForUnzipping(zip_path.AsUTF8Unsafe());
if (!zip_file_) {
return false;
}
@@ -223,7 +181,7 @@ bool ZipReader::AdvanceToNextEntry() {
return false;
const int current_entry_index = position.num_of_file;
// If we are currently at the last entry, then the next position is the
- // end of the zip file, so mark that we reached the end.
+ // end of the ZIP archive, so mark that we reached the end.
if (current_entry_index + 1 == num_entries_) {
reached_end_ = true;
} else {
@@ -232,38 +190,75 @@ bool ZipReader::AdvanceToNextEntry() {
return false;
}
}
- current_entry_info_.reset();
+
+ entry_ = {};
+ current_entry_ = nullptr;
return true;
}
bool ZipReader::OpenCurrentEntryInZip() {
DCHECK(zip_file_);
- unz_file_info raw_file_info = {};
- char file_path_in_zip[internal::kZipMaxPath] = {};
- const int result = unzGetCurrentFileInfo(
- zip_file_, &raw_file_info, file_path_in_zip, sizeof(file_path_in_zip) - 1,
- nullptr, // extraField.
- 0, // extraFieldBufferSize.
- nullptr, // szComment.
- 0); // commentBufferSize.
- if (result != UNZ_OK)
+ current_entry_ = nullptr;
+
+ // Get entry info.
+ unz_file_info info = {};
+ char path_in_zip[internal::kZipMaxPath] = {};
+ if (unzGetCurrentFileInfo(zip_file_, &info, path_in_zip,
+ sizeof(path_in_zip) - 1, nullptr, 0, nullptr,
+ 0) != UNZ_OK) {
return false;
+ }
+
+ Entry& entry = entry_;
+ entry.path_in_original_encoding = path_in_zip;
- // Convert file path from original encoding to Unicode.
- const base::StringPiece file_path_in_original_encoding(file_path_in_zip);
- std::u16string file_path_in_utf16;
+ // Convert path from original encoding to Unicode.
+ std::u16string path_in_utf16;
const char* const encoding = encoding_.empty() ? "UTF-8" : encoding_.c_str();
- if (!base::CodepageToUTF16(file_path_in_original_encoding, encoding,
+ if (!base::CodepageToUTF16(entry.path_in_original_encoding, encoding,
base::OnStringConversionError::SUBSTITUTE,
- &file_path_in_utf16)) {
- LOG(ERROR) << "Cannot convert file path from encoding " << encoding;
+ &path_in_utf16)) {
+ LOG(ERROR) << "Cannot convert path from encoding " << encoding;
return false;
}
- current_entry_info_.reset(new EntryInfo(
- base::FilePath::FromUTF16Unsafe(file_path_in_utf16),
- std::string(file_path_in_original_encoding), raw_file_info));
+ entry.path = base::FilePath::FromUTF16Unsafe(path_in_utf16);
+ entry.original_size = info.uncompressed_size;
+
+ // Directory entries in ZIP have a path ending with "/".
+ entry.is_directory = base::EndsWith(path_in_utf16, u"/");
+
+ // Check the entry path for directory traversal issues. We consider entry
+ // paths unsafe if they are absolute or if they contain "..". On Windows,
+ // IsAbsolute() returns false for paths starting with "/".
+ entry.is_unsafe = entry.path.ReferencesParent() || entry.path.IsAbsolute() ||
+ base::StartsWith(path_in_utf16, u"/");
+
+ // The file content of this entry is encrypted if flag bit 0 is set.
+ entry.is_encrypted = info.flag & 1;
+
+ // Construct the last modified time. The timezone info is not present in ZIP
+ // archives, so we construct the time as UTC.
+ base::Time::Exploded exploded_time = {};
+ exploded_time.year = info.tmu_date.tm_year;
+ exploded_time.month = info.tmu_date.tm_mon + 1; // 0-based vs 1-based
+ exploded_time.day_of_month = info.tmu_date.tm_mday;
+ exploded_time.hour = info.tmu_date.tm_hour;
+ exploded_time.minute = info.tmu_date.tm_min;
+ exploded_time.second = info.tmu_date.tm_sec;
+ exploded_time.millisecond = 0;
+
+ if (!base::Time::FromUTCExploded(exploded_time, &entry.last_modified))
+ entry.last_modified = base::Time::UnixEpoch();
+
+#if defined(OS_POSIX)
+ entry.posix_mode = (info.external_fa >> 16L) & (S_IRWXU | S_IRWXG | S_IRWXO);
+#else
+ entry.posix_mode = 0;
+#endif
+
+ current_entry_ = &entry_;
return true;
}
@@ -325,7 +320,7 @@ void ZipReader::ExtractCurrentEntryToFilePathAsync(
FailureCallback failure_callback,
const ProgressCallback& progress_callback) {
DCHECK(zip_file_);
- DCHECK(current_entry_info_.get());
+ DCHECK(current_entry_);
// If this is a directory, just create it and return.
if (current_entry_info()->is_directory()) {
@@ -427,7 +422,7 @@ bool ZipReader::OpenInternal() {
if (num_entries_ < 0)
return false;
- // We are already at the end if the zip file is empty.
+ // We are already at the end if the ZIP archive is empty.
reached_end_ = (num_entries_ == 0);
return true;
}
@@ -436,7 +431,8 @@ void ZipReader::Reset() {
zip_file_ = nullptr;
num_entries_ = 0;
reached_end_ = false;
- current_entry_info_.reset();
+ entry_ = {};
+ current_entry_ = nullptr;
}
void ZipReader::ExtractChunk(base::File output_file,
diff --git a/google/zip_reader.h b/google/zip_reader.h
index 2199357..1b54622 100644
--- a/google/zip_reader.h
+++ b/google/zip_reader.h
@@ -48,12 +48,12 @@ class WriterDelegate {
virtual void SetPosixFilePermissions(int mode) = 0;
};
-// This class is used for reading zip files. A typical use case of this
-// class is to scan entries in a zip file and extract them. The code will
-// look like:
+// This class is used for reading ZIP archives. A typical use case of this class
+// is to scan entries in a ZIP archive and extract them. The code will look
+// like:
//
// ZipReader reader;
-// reader.Open(zip_file_path);
+// reader.Open(zip_path);
// while (reader.HasMore()) {
// reader.OpenCurrentEntryInZip();
// const base::FilePath& entry_path =
@@ -76,68 +76,67 @@ class ZipReader {
// of bytes that have been processed so far.
using ProgressCallback = base::RepeatingCallback<void(int64_t)>;
- // This class represents information of an entry (file or directory) in
- // a zip file.
- class EntryInfo {
- public:
- EntryInfo(base::FilePath file_path,
- std::string file_path_in_original_encoding,
- const unz_file_info& raw_file_info);
-
- EntryInfo(const EntryInfo&) = delete;
- EntryInfo& operator=(const EntryInfo&) = delete;
-
- // Returns the file path. The path is usually relative like
- // "foo/bar.txt", but if it's absolute, is_unsafe() returns true.
- const base::FilePath& file_path() const { return file_path_; }
-
- // Returns the file path in its original encoding as it is stored in the ZIP
- // archive. The encoding is not specified here. It might not be UTF-8, and
- // the caller needs to use other means to determine the encoding if it wants
- // to interpret this file path correctly.
- const std::string& file_path_in_original_encoding() const {
- return file_path_in_original_encoding_;
- }
-
- // Returns the size of the original file (i.e. after uncompressed).
- // Returns 0 if the entry is a directory.
- // Note: this value should not be trusted, because it is stored as metadata
- // in the zip archive and can be different from the real uncompressed size.
- int64_t original_size() const { return original_size_; }
-
- // Returns the last modified time. If the time stored in the zip file was
- // not valid, the unix epoch will be returned.
+ // Information of an entry (file or directory) in a ZIP archive.
+ struct Entry {
+ // Path of this entry, in its original encoding as it is stored in the ZIP
+ // archive. The encoding is not specified here. It might or might not be
+ // UTF-8, and the caller needs to use other means to determine the encoding
+ // if it wants to interpret this path correctly.
+ std::string path_in_original_encoding;
+
+ // Path of the entry, converted to Unicode. This path is usually relative
+ // (eg "foo/bar.txt"), but it can also be absolute (eg "/foo/bar.txt") or
+ // parent-relative (eg "../foo/bar.txt"). See also |is_unsafe|.
+ base::FilePath path;
+
+ // Size of the original uncompressed file, or 0 if the entry is a directory.
+ // This value should not be trusted, because it is stored as metadata in the
+ // ZIP archive and can be different from the real uncompressed size.
+ int64_t original_size;
+
+ // Last modified time. If the timestamp stored in the ZIP archive is not
+ // valid, the Unix epoch will be returned.
+ //
+ // The timestamp stored in the ZIP archive uses the MS-DOS date and time
+ // format.
//
- // The time stored in the zip archive uses the MS-DOS date and time format.
// http://msdn.microsoft.com/en-us/library/ms724247(v=vs.85).aspx
+ //
// As such the following limitations apply:
- // * only years from 1980 to 2107 can be represented.
- // * the time stamp has a 2 second resolution.
- // * there's no timezone information, so the time is interpreted as local.
- base::Time last_modified() const { return last_modified_; }
-
- // Returns true if the entry is a directory.
- bool is_directory() const { return is_directory_; }
-
- // Returns true if the entry is unsafe, like having ".." or invalid
- // UTF-8 characters in its file name, or the file path is absolute.
- bool is_unsafe() const { return is_unsafe_; }
-
- // Returns true if the entry is encrypted.
- bool is_encrypted() const { return is_encrypted_; }
-
- // Returns the posix file permissions of the entry.
- int posix_mode() const { return posix_mode_; }
-
- private:
- base::FilePath file_path_;
- std::string file_path_in_original_encoding_;
- int64_t original_size_;
- base::Time last_modified_;
- bool is_directory_;
- bool is_unsafe_;
- bool is_encrypted_;
- int posix_mode_;
+ // * Only years from 1980 to 2107 can be represented.
+ // * The timestamp has a 2-second resolution.
+ // * There is no timezone information, so the time is interpreted as UTC.
+ base::Time last_modified;
+
+ // True if the entry is a directory.
+ // False if the entry is a file.
+ bool is_directory;
+
+ // True if the entry path is considered unsafe, ie if it is absolute or if
+ // it contains "..".
+ bool is_unsafe;
+
+ // True if the file content is encrypted.
+ bool is_encrypted;
+
+ // Entry POSIX permissions (POSIX systems only).
+ int posix_mode;
+ };
+
+ // TODO(crbug.com/1295127) Remove this struct once transition to Entry is
+ // finished.
+ struct EntryInfo : Entry {
+ const Entry& entry() const { return *this; }
+ const std::string& file_path_in_original_encoding() const {
+ return entry().path_in_original_encoding;
+ }
+ const base::FilePath& file_path() const { return entry().path; }
+ int64_t original_size() const { return entry().original_size; }
+ base::Time last_modified() const { return entry().last_modified; }
+ bool is_directory() const { return entry().is_directory; }
+ bool is_unsafe() const { return entry().is_unsafe; }
+ bool is_encrypted() const { return entry().is_encrypted; }
+ int posix_mode() const { return entry().posix_mode; }
};
ZipReader();
@@ -147,11 +146,11 @@ class ZipReader {
~ZipReader();
- // Opens the zip file specified by |zip_file_path|. Returns true on
+ // Opens the ZIP archive specified by |zip_path|. Returns true on
// success.
- bool Open(const base::FilePath& zip_file_path);
+ bool Open(const base::FilePath& zip_path);
- // Opens the zip file referred to by the platform file |zip_fd|, without
+ // Opens the ZIP archive referred to by the platform file |zip_fd|, without
// taking ownership of |zip_fd|. Returns true on success.
bool OpenFromPlatformFile(base::PlatformFile zip_fd);
@@ -160,12 +159,12 @@ class ZipReader {
// string until it finishes extracting files.
bool OpenFromString(const std::string& data);
- // Closes the currently opened zip file. This function is called in the
+ // Closes the currently opened ZIP archive. This function is called in the
// destructor of the class, so you usually don't need to call this.
void Close();
- // Sets the encoding of file paths in the ZIP archive.
- // By default, file paths are assumed to be in UTF-8.
+ // Sets the encoding of entry paths in the ZIP archive.
+ // By default, paths are assumed to be in UTF-8.
void SetEncoding(std::string encoding) { encoding_ = std::move(encoding); }
// Returns true if there is at least one entry to read. This function is
@@ -180,7 +179,7 @@ class ZipReader {
// Advances the next entry. Returns true on success.
bool AdvanceToNextEntry();
- // Opens the current entry in the zip file. On success, returns true and
+ // Opens the current entry in the ZIP archive. On success, returns true and
// updates the current entry state (i.e. current_entry_info() is updated).
// This function should be called before operations over the current entry
// like ExtractCurrentEntryToFile().
@@ -224,9 +223,9 @@ class ZipReader {
// Returns the current entry info. Returns NULL if the current entry is
// not yet opened. OpenCurrentEntryInZip() must be called beforehand.
- EntryInfo* current_entry_info() const { return current_entry_info_.get(); }
+ EntryInfo* current_entry_info() const { return current_entry_; }
- // Returns the number of entries in the zip file.
+ // Returns the number of entries in the ZIP archive.
// Open() must be called beforehand.
int num_entries() const { return num_entries_; }
@@ -249,7 +248,8 @@ class ZipReader {
unzFile zip_file_;
int num_entries_;
bool reached_end_;
- std::unique_ptr<EntryInfo> current_entry_info_;
+ EntryInfo entry_ = {};
+ EntryInfo* current_entry_ = nullptr;
base::WeakPtrFactory<ZipReader> weak_ptr_factory_{this};
};
diff --git a/google/zip_reader_unittest.cc b/google/zip_reader_unittest.cc
index f4ecfbd..b0347e8 100644
--- a/google/zip_reader_unittest.cc
+++ b/google/zip_reader_unittest.cc
@@ -216,8 +216,8 @@ TEST_F(ZipReaderTest, Open_ExistentButNonZipFile) {
ASSERT_FALSE(reader.Open(data_dir_.AppendASCII("create_test_zip.sh")));
}
-// Iterate through the contents in the test zip file, and compare that the
-// contents collected from the zip reader matches the expected contents.
+// Iterate through the contents in the test ZIP archive, and compare that the
+// contents collected from the ZipReader matches the expected contents.
TEST_F(ZipReaderTest, Iteration) {
Paths actual_contents;
ZipReader reader;
@@ -233,8 +233,8 @@ TEST_F(ZipReaderTest, Iteration) {
EXPECT_THAT(actual_contents, ElementsAreArray(test_zip_contents_));
}
-// Open the test zip file from a file descriptor, iterate through its contents,
-// and compare that they match the expected contents.
+// Open the test ZIP archive from a file descriptor, iterate through its
+// contents, and compare that they match the expected contents.
TEST_F(ZipReaderTest, PlatformFileIteration) {
Paths actual_contents;
ZipReader reader;
@@ -293,7 +293,7 @@ TEST_F(ZipReaderTest, current_entry_info_DotDotFile) {
TEST_F(ZipReaderTest, current_entry_info_InvalidUTF8File) {
ZipReader reader;
ASSERT_TRUE(reader.Open(data_dir_.AppendASCII("evil_via_invalid_utf8.zip")));
- // The evil file is the 2nd file in the zip file.
+ // The evil file is the 2nd file in the ZIP archive.
// We cannot locate by the file name ".\x80.\\evil.txt",
// as FilePath may internally convert the string.
ASSERT_TRUE(reader.AdvanceToNextEntry());