diff options
author | François Degros <fdegros@chromium.org> | 2022-02-09 06:06:16 +0000 |
---|---|---|
committer | Copybara-Service <copybara-worker@google.com> | 2022-02-08 22:16:54 -0800 |
commit | 172367d1f35fb67f09ab577ee35901815b6470af (patch) | |
tree | ad576cfdcabda1becf875846adeb13dafab8c39e | |
parent | 3fc79233fe8ff5cf39fec4c8b8a46272d4f11cec (diff) | |
download | zlib-172367d1f35fb67f09ab577ee35901815b6470af.tar.gz |
[zip] Add ZipReader::Entry
Preparing the transition from (class) ZipReader::EntryInfo to (struct)
ZipReader::Entry. This will remove boilerplate code, and will provide
optimization and simplification opportunities.
Avoid dynamically allocating the Entry over and over again. Reuse the
same internal Entry object.
BUG=chromium:1295127
TEST=autoninja -C out/Default zlib_unittests && out/Default/zlib_unittests
Change-Id: Ie51e1bda05408b28f53957ea89c78d37d8009f5c
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/3446405
Reviewed-by: Noel Gordon <noel@chromium.org>
Commit-Queue: François Degros <fdegros@chromium.org>
Cr-Commit-Position: refs/heads/main@{#968767}
NOKEYCHECK=True
GitOrigin-RevId: 153ddbabb7339501c16310337d9b240c37929fb6
-rw-r--r-- | google/zip_reader.cc | 130 | ||||
-rw-r--r-- | google/zip_reader.h | 146 | ||||
-rw-r--r-- | google/zip_reader_unittest.cc | 10 |
3 files changed, 141 insertions, 145 deletions
diff --git a/google/zip_reader.cc b/google/zip_reader.cc index 9209b6b..bd36741 100644 --- a/google/zip_reader.cc +++ b/google/zip_reader.cc @@ -115,48 +115,6 @@ void SetPosixFilePermissions(int fd, int mode) { } // namespace -ZipReader::EntryInfo::EntryInfo(base::FilePath file_path, - std::string file_path_in_original_encoding, - const unz_file_info& raw_file_info) - : file_path_(std::move(file_path)), - file_path_in_original_encoding_( - std::move(file_path_in_original_encoding)), - posix_mode_(0) { - original_size_ = raw_file_info.uncompressed_size; - - // Directory entries in zip files end with "/". - is_directory_ = base::EndsWith(file_path_in_original_encoding_, "/"); - - // Check the file name here for directory traversal issues. - // We also consider that the file name is unsafe, if it's absolute. - // On Windows, IsAbsolute() returns false for paths starting with "/". - is_unsafe_ = file_path_.ReferencesParent() || file_path_.IsAbsolute() || - base::StartsWith(file_path_in_original_encoding_, "/"); - - // Whether the file is encrypted is bit 0 of the flag. - is_encrypted_ = raw_file_info.flag & 1; - - // Construct the last modified time. The timezone info is not present in - // zip files, so we construct the time as local time. - base::Time::Exploded exploded_time = {}; // Zero-clear. - exploded_time.year = raw_file_info.tmu_date.tm_year; - // The month in zip file is 0-based, whereas ours is 1-based. - exploded_time.month = raw_file_info.tmu_date.tm_mon + 1; - exploded_time.day_of_month = raw_file_info.tmu_date.tm_mday; - exploded_time.hour = raw_file_info.tmu_date.tm_hour; - exploded_time.minute = raw_file_info.tmu_date.tm_min; - exploded_time.second = raw_file_info.tmu_date.tm_sec; - exploded_time.millisecond = 0; - - if (!base::Time::FromUTCExploded(exploded_time, &last_modified_)) - last_modified_ = base::Time::UnixEpoch(); - -#if defined(OS_POSIX) - posix_mode_ = - (raw_file_info.external_fa >> 16L) & (S_IRWXU | S_IRWXG | S_IRWXO); -#endif -} - ZipReader::ZipReader() { Reset(); } @@ -165,12 +123,12 @@ ZipReader::~ZipReader() { Close(); } -bool ZipReader::Open(const base::FilePath& zip_file_path) { +bool ZipReader::Open(const base::FilePath& zip_path) { DCHECK(!zip_file_); // Use of "Unsafe" function does not look good, but there is no way to do // this safely on Linux. See file_util.h for details. - zip_file_ = internal::OpenForUnzipping(zip_file_path.AsUTF8Unsafe()); + zip_file_ = internal::OpenForUnzipping(zip_path.AsUTF8Unsafe()); if (!zip_file_) { return false; } @@ -223,7 +181,7 @@ bool ZipReader::AdvanceToNextEntry() { return false; const int current_entry_index = position.num_of_file; // If we are currently at the last entry, then the next position is the - // end of the zip file, so mark that we reached the end. + // end of the ZIP archive, so mark that we reached the end. if (current_entry_index + 1 == num_entries_) { reached_end_ = true; } else { @@ -232,38 +190,75 @@ bool ZipReader::AdvanceToNextEntry() { return false; } } - current_entry_info_.reset(); + + entry_ = {}; + current_entry_ = nullptr; return true; } bool ZipReader::OpenCurrentEntryInZip() { DCHECK(zip_file_); - unz_file_info raw_file_info = {}; - char file_path_in_zip[internal::kZipMaxPath] = {}; - const int result = unzGetCurrentFileInfo( - zip_file_, &raw_file_info, file_path_in_zip, sizeof(file_path_in_zip) - 1, - nullptr, // extraField. - 0, // extraFieldBufferSize. - nullptr, // szComment. - 0); // commentBufferSize. - if (result != UNZ_OK) + current_entry_ = nullptr; + + // Get entry info. + unz_file_info info = {}; + char path_in_zip[internal::kZipMaxPath] = {}; + if (unzGetCurrentFileInfo(zip_file_, &info, path_in_zip, + sizeof(path_in_zip) - 1, nullptr, 0, nullptr, + 0) != UNZ_OK) { return false; + } + + Entry& entry = entry_; + entry.path_in_original_encoding = path_in_zip; - // Convert file path from original encoding to Unicode. - const base::StringPiece file_path_in_original_encoding(file_path_in_zip); - std::u16string file_path_in_utf16; + // Convert path from original encoding to Unicode. + std::u16string path_in_utf16; const char* const encoding = encoding_.empty() ? "UTF-8" : encoding_.c_str(); - if (!base::CodepageToUTF16(file_path_in_original_encoding, encoding, + if (!base::CodepageToUTF16(entry.path_in_original_encoding, encoding, base::OnStringConversionError::SUBSTITUTE, - &file_path_in_utf16)) { - LOG(ERROR) << "Cannot convert file path from encoding " << encoding; + &path_in_utf16)) { + LOG(ERROR) << "Cannot convert path from encoding " << encoding; return false; } - current_entry_info_.reset(new EntryInfo( - base::FilePath::FromUTF16Unsafe(file_path_in_utf16), - std::string(file_path_in_original_encoding), raw_file_info)); + entry.path = base::FilePath::FromUTF16Unsafe(path_in_utf16); + entry.original_size = info.uncompressed_size; + + // Directory entries in ZIP have a path ending with "/". + entry.is_directory = base::EndsWith(path_in_utf16, u"/"); + + // Check the entry path for directory traversal issues. We consider entry + // paths unsafe if they are absolute or if they contain "..". On Windows, + // IsAbsolute() returns false for paths starting with "/". + entry.is_unsafe = entry.path.ReferencesParent() || entry.path.IsAbsolute() || + base::StartsWith(path_in_utf16, u"/"); + + // The file content of this entry is encrypted if flag bit 0 is set. + entry.is_encrypted = info.flag & 1; + + // Construct the last modified time. The timezone info is not present in ZIP + // archives, so we construct the time as UTC. + base::Time::Exploded exploded_time = {}; + exploded_time.year = info.tmu_date.tm_year; + exploded_time.month = info.tmu_date.tm_mon + 1; // 0-based vs 1-based + exploded_time.day_of_month = info.tmu_date.tm_mday; + exploded_time.hour = info.tmu_date.tm_hour; + exploded_time.minute = info.tmu_date.tm_min; + exploded_time.second = info.tmu_date.tm_sec; + exploded_time.millisecond = 0; + + if (!base::Time::FromUTCExploded(exploded_time, &entry.last_modified)) + entry.last_modified = base::Time::UnixEpoch(); + +#if defined(OS_POSIX) + entry.posix_mode = (info.external_fa >> 16L) & (S_IRWXU | S_IRWXG | S_IRWXO); +#else + entry.posix_mode = 0; +#endif + + current_entry_ = &entry_; return true; } @@ -325,7 +320,7 @@ void ZipReader::ExtractCurrentEntryToFilePathAsync( FailureCallback failure_callback, const ProgressCallback& progress_callback) { DCHECK(zip_file_); - DCHECK(current_entry_info_.get()); + DCHECK(current_entry_); // If this is a directory, just create it and return. if (current_entry_info()->is_directory()) { @@ -427,7 +422,7 @@ bool ZipReader::OpenInternal() { if (num_entries_ < 0) return false; - // We are already at the end if the zip file is empty. + // We are already at the end if the ZIP archive is empty. reached_end_ = (num_entries_ == 0); return true; } @@ -436,7 +431,8 @@ void ZipReader::Reset() { zip_file_ = nullptr; num_entries_ = 0; reached_end_ = false; - current_entry_info_.reset(); + entry_ = {}; + current_entry_ = nullptr; } void ZipReader::ExtractChunk(base::File output_file, diff --git a/google/zip_reader.h b/google/zip_reader.h index 2199357..1b54622 100644 --- a/google/zip_reader.h +++ b/google/zip_reader.h @@ -48,12 +48,12 @@ class WriterDelegate { virtual void SetPosixFilePermissions(int mode) = 0; }; -// This class is used for reading zip files. A typical use case of this -// class is to scan entries in a zip file and extract them. The code will -// look like: +// This class is used for reading ZIP archives. A typical use case of this class +// is to scan entries in a ZIP archive and extract them. The code will look +// like: // // ZipReader reader; -// reader.Open(zip_file_path); +// reader.Open(zip_path); // while (reader.HasMore()) { // reader.OpenCurrentEntryInZip(); // const base::FilePath& entry_path = @@ -76,68 +76,67 @@ class ZipReader { // of bytes that have been processed so far. using ProgressCallback = base::RepeatingCallback<void(int64_t)>; - // This class represents information of an entry (file or directory) in - // a zip file. - class EntryInfo { - public: - EntryInfo(base::FilePath file_path, - std::string file_path_in_original_encoding, - const unz_file_info& raw_file_info); - - EntryInfo(const EntryInfo&) = delete; - EntryInfo& operator=(const EntryInfo&) = delete; - - // Returns the file path. The path is usually relative like - // "foo/bar.txt", but if it's absolute, is_unsafe() returns true. - const base::FilePath& file_path() const { return file_path_; } - - // Returns the file path in its original encoding as it is stored in the ZIP - // archive. The encoding is not specified here. It might not be UTF-8, and - // the caller needs to use other means to determine the encoding if it wants - // to interpret this file path correctly. - const std::string& file_path_in_original_encoding() const { - return file_path_in_original_encoding_; - } - - // Returns the size of the original file (i.e. after uncompressed). - // Returns 0 if the entry is a directory. - // Note: this value should not be trusted, because it is stored as metadata - // in the zip archive and can be different from the real uncompressed size. - int64_t original_size() const { return original_size_; } - - // Returns the last modified time. If the time stored in the zip file was - // not valid, the unix epoch will be returned. + // Information of an entry (file or directory) in a ZIP archive. + struct Entry { + // Path of this entry, in its original encoding as it is stored in the ZIP + // archive. The encoding is not specified here. It might or might not be + // UTF-8, and the caller needs to use other means to determine the encoding + // if it wants to interpret this path correctly. + std::string path_in_original_encoding; + + // Path of the entry, converted to Unicode. This path is usually relative + // (eg "foo/bar.txt"), but it can also be absolute (eg "/foo/bar.txt") or + // parent-relative (eg "../foo/bar.txt"). See also |is_unsafe|. + base::FilePath path; + + // Size of the original uncompressed file, or 0 if the entry is a directory. + // This value should not be trusted, because it is stored as metadata in the + // ZIP archive and can be different from the real uncompressed size. + int64_t original_size; + + // Last modified time. If the timestamp stored in the ZIP archive is not + // valid, the Unix epoch will be returned. + // + // The timestamp stored in the ZIP archive uses the MS-DOS date and time + // format. // - // The time stored in the zip archive uses the MS-DOS date and time format. // http://msdn.microsoft.com/en-us/library/ms724247(v=vs.85).aspx + // // As such the following limitations apply: - // * only years from 1980 to 2107 can be represented. - // * the time stamp has a 2 second resolution. - // * there's no timezone information, so the time is interpreted as local. - base::Time last_modified() const { return last_modified_; } - - // Returns true if the entry is a directory. - bool is_directory() const { return is_directory_; } - - // Returns true if the entry is unsafe, like having ".." or invalid - // UTF-8 characters in its file name, or the file path is absolute. - bool is_unsafe() const { return is_unsafe_; } - - // Returns true if the entry is encrypted. - bool is_encrypted() const { return is_encrypted_; } - - // Returns the posix file permissions of the entry. - int posix_mode() const { return posix_mode_; } - - private: - base::FilePath file_path_; - std::string file_path_in_original_encoding_; - int64_t original_size_; - base::Time last_modified_; - bool is_directory_; - bool is_unsafe_; - bool is_encrypted_; - int posix_mode_; + // * Only years from 1980 to 2107 can be represented. + // * The timestamp has a 2-second resolution. + // * There is no timezone information, so the time is interpreted as UTC. + base::Time last_modified; + + // True if the entry is a directory. + // False if the entry is a file. + bool is_directory; + + // True if the entry path is considered unsafe, ie if it is absolute or if + // it contains "..". + bool is_unsafe; + + // True if the file content is encrypted. + bool is_encrypted; + + // Entry POSIX permissions (POSIX systems only). + int posix_mode; + }; + + // TODO(crbug.com/1295127) Remove this struct once transition to Entry is + // finished. + struct EntryInfo : Entry { + const Entry& entry() const { return *this; } + const std::string& file_path_in_original_encoding() const { + return entry().path_in_original_encoding; + } + const base::FilePath& file_path() const { return entry().path; } + int64_t original_size() const { return entry().original_size; } + base::Time last_modified() const { return entry().last_modified; } + bool is_directory() const { return entry().is_directory; } + bool is_unsafe() const { return entry().is_unsafe; } + bool is_encrypted() const { return entry().is_encrypted; } + int posix_mode() const { return entry().posix_mode; } }; ZipReader(); @@ -147,11 +146,11 @@ class ZipReader { ~ZipReader(); - // Opens the zip file specified by |zip_file_path|. Returns true on + // Opens the ZIP archive specified by |zip_path|. Returns true on // success. - bool Open(const base::FilePath& zip_file_path); + bool Open(const base::FilePath& zip_path); - // Opens the zip file referred to by the platform file |zip_fd|, without + // Opens the ZIP archive referred to by the platform file |zip_fd|, without // taking ownership of |zip_fd|. Returns true on success. bool OpenFromPlatformFile(base::PlatformFile zip_fd); @@ -160,12 +159,12 @@ class ZipReader { // string until it finishes extracting files. bool OpenFromString(const std::string& data); - // Closes the currently opened zip file. This function is called in the + // Closes the currently opened ZIP archive. This function is called in the // destructor of the class, so you usually don't need to call this. void Close(); - // Sets the encoding of file paths in the ZIP archive. - // By default, file paths are assumed to be in UTF-8. + // Sets the encoding of entry paths in the ZIP archive. + // By default, paths are assumed to be in UTF-8. void SetEncoding(std::string encoding) { encoding_ = std::move(encoding); } // Returns true if there is at least one entry to read. This function is @@ -180,7 +179,7 @@ class ZipReader { // Advances the next entry. Returns true on success. bool AdvanceToNextEntry(); - // Opens the current entry in the zip file. On success, returns true and + // Opens the current entry in the ZIP archive. On success, returns true and // updates the current entry state (i.e. current_entry_info() is updated). // This function should be called before operations over the current entry // like ExtractCurrentEntryToFile(). @@ -224,9 +223,9 @@ class ZipReader { // Returns the current entry info. Returns NULL if the current entry is // not yet opened. OpenCurrentEntryInZip() must be called beforehand. - EntryInfo* current_entry_info() const { return current_entry_info_.get(); } + EntryInfo* current_entry_info() const { return current_entry_; } - // Returns the number of entries in the zip file. + // Returns the number of entries in the ZIP archive. // Open() must be called beforehand. int num_entries() const { return num_entries_; } @@ -249,7 +248,8 @@ class ZipReader { unzFile zip_file_; int num_entries_; bool reached_end_; - std::unique_ptr<EntryInfo> current_entry_info_; + EntryInfo entry_ = {}; + EntryInfo* current_entry_ = nullptr; base::WeakPtrFactory<ZipReader> weak_ptr_factory_{this}; }; diff --git a/google/zip_reader_unittest.cc b/google/zip_reader_unittest.cc index f4ecfbd..b0347e8 100644 --- a/google/zip_reader_unittest.cc +++ b/google/zip_reader_unittest.cc @@ -216,8 +216,8 @@ TEST_F(ZipReaderTest, Open_ExistentButNonZipFile) { ASSERT_FALSE(reader.Open(data_dir_.AppendASCII("create_test_zip.sh"))); } -// Iterate through the contents in the test zip file, and compare that the -// contents collected from the zip reader matches the expected contents. +// Iterate through the contents in the test ZIP archive, and compare that the +// contents collected from the ZipReader matches the expected contents. TEST_F(ZipReaderTest, Iteration) { Paths actual_contents; ZipReader reader; @@ -233,8 +233,8 @@ TEST_F(ZipReaderTest, Iteration) { EXPECT_THAT(actual_contents, ElementsAreArray(test_zip_contents_)); } -// Open the test zip file from a file descriptor, iterate through its contents, -// and compare that they match the expected contents. +// Open the test ZIP archive from a file descriptor, iterate through its +// contents, and compare that they match the expected contents. TEST_F(ZipReaderTest, PlatformFileIteration) { Paths actual_contents; ZipReader reader; @@ -293,7 +293,7 @@ TEST_F(ZipReaderTest, current_entry_info_DotDotFile) { TEST_F(ZipReaderTest, current_entry_info_InvalidUTF8File) { ZipReader reader; ASSERT_TRUE(reader.Open(data_dir_.AppendASCII("evil_via_invalid_utf8.zip"))); - // The evil file is the 2nd file in the zip file. + // The evil file is the 2nd file in the ZIP archive. // We cannot locate by the file name ".\x80.\\evil.txt", // as FilePath may internally convert the string. ASSERT_TRUE(reader.AdvanceToNextEntry()); |