diff options
author | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2022-05-11 05:08:18 +0000 |
---|---|---|
committer | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2022-05-11 05:08:18 +0000 |
commit | 96127c99b09ff102fab70f95541d9d433ce10ab8 (patch) | |
tree | 650e1d3a1f5269380163831380aad1e24c6907b6 /google/zip_reader.h | |
parent | 3544c2604fc833c89a68e5fb4e563cf0a231cfb0 (diff) | |
parent | 4b17695e63c6e11735f0f7a3d3754b1cac5da348 (diff) | |
download | zlib-96127c99b09ff102fab70f95541d9d433ce10ab8.tar.gz |
Snap for 8570526 from 4b17695e63c6e11735f0f7a3d3754b1cac5da348 to mainline-scheduling-releaseaml_sch_331113000aml_sch_331111000android13-mainline-scheduling-release
Change-Id: I3b08362eee53ba19c76b91d5eb7988594746089d
Diffstat (limited to 'google/zip_reader.h')
-rw-r--r-- | google/zip_reader.h | 336 |
1 files changed, 194 insertions, 142 deletions
diff --git a/google/zip_reader.h b/google/zip_reader.h index d442d42..df7452a 100644 --- a/google/zip_reader.h +++ b/google/zip_reader.h @@ -7,15 +7,15 @@ #include <stddef.h> #include <stdint.h> +#include <limits> #include <memory> #include <string> #include "base/callback.h" #include "base/files/file.h" #include "base/files/file_path.h" -#include "base/files/file_util.h" -#include "base/macros.h" #include "base/memory/weak_ptr.h" +#include "base/numerics/safe_conversions.h" #include "base/time/time.h" #if defined(USE_SYSTEM_MINIZIP) @@ -34,33 +34,47 @@ class WriterDelegate { // Invoked once before any data is streamed out to pave the way (e.g., to open // the output file). Return false on failure to cancel extraction. - virtual bool PrepareOutput() = 0; + virtual bool PrepareOutput() { return true; } // Invoked to write the next chunk of data. Return false on failure to cancel // extraction. - virtual bool WriteBytes(const char* data, int num_bytes) = 0; + virtual bool WriteBytes(const char* data, int num_bytes) { return true; } // Sets the last-modified time of the data. - virtual void SetTimeModified(const base::Time& time) = 0; + virtual void SetTimeModified(const base::Time& time) {} + + // Called with the POSIX file permissions of the data; POSIX implementations + // may apply some of the permissions (for example, the executable bit) to the + // output file. + virtual void SetPosixFilePermissions(int mode) {} + + // Called if an error occurred while extracting the file. The WriterDelegate + // can then remove and clean up the partially extracted data. + virtual void OnError() {} }; -// This class is used for reading zip files. A typical use case of this -// class is to scan entries in a zip file and extract them. The code will -// look like: +// This class is used for reading ZIP archives. A typical use case of this class +// is to scan entries in a ZIP archive and extract them. The code will look +// like: // // ZipReader reader; -// reader.Open(zip_file_path); -// while (reader.HasMore()) { -// reader.OpenCurrentEntryInZip(); -// const base::FilePath& entry_path = -// reader.current_entry_info()->file_path(); -// auto writer = CreateFilePathWriterDelegate(extract_dir, entry_path); -// reader.ExtractCurrentEntry(writer, std::numeric_limits<uint64_t>::max()); -// reader.AdvanceToNextEntry(); +// if (!reader.Open(zip_path)) { +// // Cannot open +// return; // } // -// For simplicity, error checking is omitted in the example code above. The -// production code should check return values from all of these functions. +// while (const ZipReader::entry* entry = reader.Next()) { +// auto writer = CreateFilePathWriterDelegate(extract_dir, entry->path); +// if (!reader.ExtractCurrentEntry(writer)) { +// // Cannot extract +// return; +// } +// } +// +// if (!reader.ok()) { +// // Error while enumerating entries +// return; +// } // class ZipReader { public: @@ -72,62 +86,65 @@ class ZipReader { // of bytes that have been processed so far. using ProgressCallback = base::RepeatingCallback<void(int64_t)>; - // This class represents information of an entry (file or directory) in - // a zip file. - class EntryInfo { - public: - EntryInfo(const std::string& filename_in_zip, - const unz_file_info& raw_file_info); - - // Returns the file path. The path is usually relative like - // "foo/bar.txt", but if it's absolute, is_unsafe() returns true. - const base::FilePath& file_path() const { return file_path_; } - - // Returns the size of the original file (i.e. after uncompressed). - // Returns 0 if the entry is a directory. - // Note: this value should not be trusted, because it is stored as metadata - // in the zip archive and can be different from the real uncompressed size. - int64_t original_size() const { return original_size_; } - - // Returns the last modified time. If the time stored in the zip file was - // not valid, the unix epoch will be returned. + // Information of an entry (file or directory) in a ZIP archive. + struct Entry { + // Path of this entry, in its original encoding as it is stored in the ZIP + // archive. The encoding is not specified here. It might or might not be + // UTF-8, and the caller needs to use other means to determine the encoding + // if it wants to interpret this path correctly. + std::string path_in_original_encoding; + + // Path of the entry, converted to Unicode. This path is usually relative + // (eg "foo/bar.txt"), but it can also be absolute (eg "/foo/bar.txt") or + // parent-relative (eg "../foo/bar.txt"). See also |is_unsafe|. + base::FilePath path; + + // Size of the original uncompressed file, or 0 if the entry is a directory. + // This value should not be trusted, because it is stored as metadata in the + // ZIP archive and can be different from the real uncompressed size. + int64_t original_size; + + // Last modified time. If the timestamp stored in the ZIP archive is not + // valid, the Unix epoch will be returned. + // + // The timestamp stored in the ZIP archive uses the MS-DOS date and time + // format. // - // The time stored in the zip archive uses the MS-DOS date and time format. // http://msdn.microsoft.com/en-us/library/ms724247(v=vs.85).aspx + // // As such the following limitations apply: - // * only years from 1980 to 2107 can be represented. - // * the time stamp has a 2 second resolution. - // * there's no timezone information, so the time is interpreted as local. - base::Time last_modified() const { return last_modified_; } - - // Returns true if the entry is a directory. - bool is_directory() const { return is_directory_; } - - // Returns true if the entry is unsafe, like having ".." or invalid - // UTF-8 characters in its file name, or the file path is absolute. - bool is_unsafe() const { return is_unsafe_; } - - // Returns true if the entry is encrypted. - bool is_encrypted() const { return is_encrypted_; } - - private: - const base::FilePath file_path_; - int64_t original_size_; - base::Time last_modified_; - bool is_directory_; - bool is_unsafe_; - bool is_encrypted_; - DISALLOW_COPY_AND_ASSIGN(EntryInfo); + // * Only years from 1980 to 2107 can be represented. + // * The timestamp has a 2-second resolution. + // * There is no timezone information, so the time is interpreted as UTC. + base::Time last_modified; + + // True if the entry is a directory. + // False if the entry is a file. + bool is_directory; + + // True if the entry path is considered unsafe, ie if it is absolute or if + // it contains "..". + bool is_unsafe; + + // True if the file content is encrypted. + bool is_encrypted; + + // Entry POSIX permissions (POSIX systems only). + int posix_mode; }; ZipReader(); + + ZipReader(const ZipReader&) = delete; + ZipReader& operator=(const ZipReader&) = delete; + ~ZipReader(); - // Opens the zip file specified by |zip_file_path|. Returns true on + // Opens the ZIP archive specified by |zip_path|. Returns true on // success. - bool Open(const base::FilePath& zip_file_path); + bool Open(const base::FilePath& zip_path); - // Opens the zip file referred to by the platform file |zip_fd|, without + // Opens the ZIP archive referred to by the platform file |zip_fd|, without // taking ownership of |zip_fd|. Returns true on success. bool OpenFromPlatformFile(base::PlatformFile zip_fd); @@ -136,72 +153,94 @@ class ZipReader { // string until it finishes extracting files. bool OpenFromString(const std::string& data); - // Closes the currently opened zip file. This function is called in the + // Closes the currently opened ZIP archive. This function is called in the // destructor of the class, so you usually don't need to call this. void Close(); - // Returns true if there is at least one entry to read. This function is - // used to scan entries with AdvanceToNextEntry(), like: - // - // while (reader.HasMore()) { - // // Do something with the current file here. - // reader.AdvanceToNextEntry(); - // } - bool HasMore(); + // Sets the encoding of entry paths in the ZIP archive. + // By default, paths are assumed to be in UTF-8. + void SetEncoding(std::string encoding) { encoding_ = std::move(encoding); } - // Advances the next entry. Returns true on success. - bool AdvanceToNextEntry(); + // Sets the decryption password that will be used to decrypt encrypted file in + // the ZIP archive. + void SetPassword(std::string password) { password_ = std::move(password); } - // Opens the current entry in the zip file. On success, returns true and - // updates the the current entry state (i.e. current_entry_info() is - // updated). This function should be called before operations over the - // current entry like ExtractCurrentEntryToFile(). + // Gets the next entry. Returns null if there is no more entry, or if an error + // occurred while scanning entries. The returned Entry is owned by this + // ZipReader, and is valid until Next() is called again or until this + // ZipReader is closed. + // + // This function should be called before operations over the current entry + // like ExtractCurrentEntryToFile(). // - // Note that there is no CloseCurrentEntryInZip(). The the current entry - // state is reset automatically as needed. - bool OpenCurrentEntryInZip(); + // while (const ZipReader::Entry* entry = reader.Next()) { + // // Do something with the current entry here. + // ... + // } + // + // // Finished scanning entries. + // // Check if the scanning stopped because of an error. + // if (!reader.ok()) { + // // There was an error. + // ... + // } + const Entry* Next(); + + // Returns true if the enumeration of entries was successful, or false if it + // stopped because of an error. + bool ok() const { return ok_; } // Extracts |num_bytes_to_extract| bytes of the current entry to |delegate|, - // starting from the beginning of the entry. Return value specifies whether - // the entire file was extracted. + // starting from the beginning of the entry. + // + // Returns true if the entire file was extracted without error. + // + // Precondition: Next() returned a non-null Entry. bool ExtractCurrentEntry(WriterDelegate* delegate, - uint64_t num_bytes_to_extract) const; + uint64_t num_bytes_to_extract = + std::numeric_limits<uint64_t>::max()) const; - // Asynchronously extracts the current entry to the given output file path. - // If the current entry is a directory it just creates the directory - // synchronously instead. OpenCurrentEntryInZip() must be called beforehand. - // success_callback will be called on success and failure_callback will be - // called on failure. progress_callback will be called at least once. + // Asynchronously extracts the current entry to the given output file path. If + // the current entry is a directory it just creates the directory + // synchronously instead. + // + // |success_callback| will be called on success and |failure_callback| will be + // called on failure. |progress_callback| will be called at least once. // Callbacks will be posted to the current MessageLoop in-order. + // + // Precondition: Next() returned a non-null Entry. void ExtractCurrentEntryToFilePathAsync( const base::FilePath& output_file_path, SuccessCallback success_callback, FailureCallback failure_callback, - const ProgressCallback& progress_callback); + ProgressCallback progress_callback); // Extracts the current entry into memory. If the current entry is a - // directory, the |output| parameter is set to the empty string. If the - // current entry is a file, the |output| parameter is filled with its - // contents. OpenCurrentEntryInZip() must be called beforehand. Note: the - // |output| parameter can be filled with a big amount of data, avoid passing - // it around by value, but by reference or pointer. Note: the value returned - // by EntryInfo::original_size() cannot be trusted, so the real size of the - // uncompressed contents can be different. |max_read_bytes| limits the ammount - // of memory used to carry the entry. Returns true if the entire content is - // read. If the entry is bigger than |max_read_bytes|, returns false and - // |output| is filled with |max_read_bytes| of data. If an error occurs, - // returns false, and |output| is set to the empty string. + // directory, |*output| is set to the empty string. If the current entry is a + // file, |*output| is filled with its contents. + // + // The value in |Entry::original_size| cannot be trusted, so the real size of + // the uncompressed contents can be different. |max_read_bytes| limits the + // amount of memory used to carry the entry. + // + // Returns true if the entire content is read without error. If the content is + // bigger than |max_read_bytes|, this function returns false and |*output| is + // filled with |max_read_bytes| of data. If an error occurs, this function + // returns false and |*output| contains the content extracted so far, which + // might be garbage data. + // + // Precondition: Next() returned a non-null Entry. bool ExtractCurrentEntryToString(uint64_t max_read_bytes, std::string* output) const; - // Returns the current entry info. Returns NULL if the current entry is - // not yet opened. OpenCurrentEntryInZip() must be called beforehand. - EntryInfo* current_entry_info() const { - return current_entry_info_.get(); + bool ExtractCurrentEntryToString(std::string* output) const { + return ExtractCurrentEntryToString( + base::checked_cast<uint64_t>(output->max_size()), output); } - // Returns the number of entries in the zip file. - // Open() must be called beforehand. + // Returns the number of entries in the ZIP archive. + // + // Precondition: one of the Open() methods returned true. int num_entries() const { return num_entries_; } private: @@ -211,25 +250,35 @@ class ZipReader { // Resets the internal state. void Reset(); + // Opens the current entry in the ZIP archive. On success, returns true and + // updates the current entry state |entry_|. + // + // Note that there is no matching CloseEntry(). The current entry state is + // reset automatically as needed. + bool OpenEntry(); + // Extracts a chunk of the file to the target. Will post a task for the next // chunk and success/failure/progress callbacks as necessary. void ExtractChunk(base::File target_file, SuccessCallback success_callback, FailureCallback failure_callback, - const ProgressCallback& progress_callback, + ProgressCallback progress_callback, const int64_t offset); + std::string encoding_; + std::string password_; unzFile zip_file_; int num_entries_; + int next_index_; bool reached_end_; - std::unique_ptr<EntryInfo> current_entry_info_; + bool ok_; + Entry entry_; base::WeakPtrFactory<ZipReader> weak_ptr_factory_{this}; - - DISALLOW_COPY_AND_ASSIGN(ZipReader); }; -// A writer delegate that writes to a given File. +// A writer delegate that writes to a given File. This file is expected to be +// initially empty. class FileWriterDelegate : public WriterDelegate { public: // Constructs a FileWriterDelegate that manipulates |file|. The delegate will @@ -238,14 +287,14 @@ class FileWriterDelegate : public WriterDelegate { explicit FileWriterDelegate(base::File* file); // Constructs a FileWriterDelegate that takes ownership of |file|. - explicit FileWriterDelegate(std::unique_ptr<base::File> file); + explicit FileWriterDelegate(base::File owned_file); - // Truncates the file to the number of bytes written. - ~FileWriterDelegate() override; + FileWriterDelegate(const FileWriterDelegate&) = delete; + FileWriterDelegate& operator=(const FileWriterDelegate&) = delete; - // WriterDelegate methods: + ~FileWriterDelegate() override; - // Seeks to the beginning of the file, returning false if the seek fails. + // Returns true if the file handle passed to the constructor is valid. bool PrepareOutput() override; // Writes |num_bytes| bytes of |data| to the file, returning false on error or @@ -255,45 +304,48 @@ class FileWriterDelegate : public WriterDelegate { // Sets the last-modified time of the data. void SetTimeModified(const base::Time& time) override; - // Return the actual size of the file. - int64_t file_length() { return file_length_; } + // On POSIX systems, sets the file to be executable if the source file was + // executable. + void SetPosixFilePermissions(int mode) override; - private: - // The file the delegate modifies. - base::File* file_; + // Empties the file to avoid leaving garbage data in it. + void OnError() override; + + // Gets the number of bytes written into the file. + int64_t file_length() { return file_length_; } + protected: // The delegate can optionally own the file it modifies, in which case // owned_file_ is set and file_ is an alias for owned_file_. - std::unique_ptr<base::File> owned_file_; + base::File owned_file_; - int64_t file_length_ = 0; + // The file the delegate modifies. + base::File* const file_ = &owned_file_; - DISALLOW_COPY_AND_ASSIGN(FileWriterDelegate); + int64_t file_length_ = 0; }; -// A writer delegate that writes a file at a given path. -class FilePathWriterDelegate : public WriterDelegate { +// A writer delegate that creates and writes a file at a given path. This does +// not overwrite any existing file. +class FilePathWriterDelegate : public FileWriterDelegate { public: - explicit FilePathWriterDelegate(const base::FilePath& output_file_path); - ~FilePathWriterDelegate() override; + explicit FilePathWriterDelegate(base::FilePath output_file_path); - // WriterDelegate methods: + FilePathWriterDelegate(const FilePathWriterDelegate&) = delete; + FilePathWriterDelegate& operator=(const FilePathWriterDelegate&) = delete; - // Creates the output file and any necessary intermediate directories. - bool PrepareOutput() override; + ~FilePathWriterDelegate() override; - // Writes |num_bytes| bytes of |data| to the file, returning false if not all - // bytes could be written. - bool WriteBytes(const char* data, int num_bytes) override; + // Creates the output file and any necessary intermediate directories. Does + // not overwrite any existing file, and returns false if the output file + // cannot be created because another file conflicts with it. + bool PrepareOutput() override; - // Sets the last-modified time of the data. - void SetTimeModified(const base::Time& time) override; + // Deletes the output file. + void OnError() override; private: - base::FilePath output_file_path_; - base::File file_; - - DISALLOW_COPY_AND_ASSIGN(FilePathWriterDelegate); + const base::FilePath output_file_path_; }; } // namespace zip |