summaryrefslogtreecommitdiff
path: root/google/zip_reader.h
diff options
context:
space:
mode:
authorAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2022-05-11 05:08:18 +0000
committerAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2022-05-11 05:08:18 +0000
commit96127c99b09ff102fab70f95541d9d433ce10ab8 (patch)
tree650e1d3a1f5269380163831380aad1e24c6907b6 /google/zip_reader.h
parent3544c2604fc833c89a68e5fb4e563cf0a231cfb0 (diff)
parent4b17695e63c6e11735f0f7a3d3754b1cac5da348 (diff)
downloadzlib-96127c99b09ff102fab70f95541d9d433ce10ab8.tar.gz
Snap for 8570526 from 4b17695e63c6e11735f0f7a3d3754b1cac5da348 to mainline-scheduling-releaseaml_sch_331113000aml_sch_331111000android13-mainline-scheduling-release
Change-Id: I3b08362eee53ba19c76b91d5eb7988594746089d
Diffstat (limited to 'google/zip_reader.h')
-rw-r--r--google/zip_reader.h336
1 files changed, 194 insertions, 142 deletions
diff --git a/google/zip_reader.h b/google/zip_reader.h
index d442d42..df7452a 100644
--- a/google/zip_reader.h
+++ b/google/zip_reader.h
@@ -7,15 +7,15 @@
#include <stddef.h>
#include <stdint.h>
+#include <limits>
#include <memory>
#include <string>
#include "base/callback.h"
#include "base/files/file.h"
#include "base/files/file_path.h"
-#include "base/files/file_util.h"
-#include "base/macros.h"
#include "base/memory/weak_ptr.h"
+#include "base/numerics/safe_conversions.h"
#include "base/time/time.h"
#if defined(USE_SYSTEM_MINIZIP)
@@ -34,33 +34,47 @@ class WriterDelegate {
// Invoked once before any data is streamed out to pave the way (e.g., to open
// the output file). Return false on failure to cancel extraction.
- virtual bool PrepareOutput() = 0;
+ virtual bool PrepareOutput() { return true; }
// Invoked to write the next chunk of data. Return false on failure to cancel
// extraction.
- virtual bool WriteBytes(const char* data, int num_bytes) = 0;
+ virtual bool WriteBytes(const char* data, int num_bytes) { return true; }
// Sets the last-modified time of the data.
- virtual void SetTimeModified(const base::Time& time) = 0;
+ virtual void SetTimeModified(const base::Time& time) {}
+
+ // Called with the POSIX file permissions of the data; POSIX implementations
+ // may apply some of the permissions (for example, the executable bit) to the
+ // output file.
+ virtual void SetPosixFilePermissions(int mode) {}
+
+ // Called if an error occurred while extracting the file. The WriterDelegate
+ // can then remove and clean up the partially extracted data.
+ virtual void OnError() {}
};
-// This class is used for reading zip files. A typical use case of this
-// class is to scan entries in a zip file and extract them. The code will
-// look like:
+// This class is used for reading ZIP archives. A typical use case of this class
+// is to scan entries in a ZIP archive and extract them. The code will look
+// like:
//
// ZipReader reader;
-// reader.Open(zip_file_path);
-// while (reader.HasMore()) {
-// reader.OpenCurrentEntryInZip();
-// const base::FilePath& entry_path =
-// reader.current_entry_info()->file_path();
-// auto writer = CreateFilePathWriterDelegate(extract_dir, entry_path);
-// reader.ExtractCurrentEntry(writer, std::numeric_limits<uint64_t>::max());
-// reader.AdvanceToNextEntry();
+// if (!reader.Open(zip_path)) {
+// // Cannot open
+// return;
// }
//
-// For simplicity, error checking is omitted in the example code above. The
-// production code should check return values from all of these functions.
+// while (const ZipReader::entry* entry = reader.Next()) {
+// auto writer = CreateFilePathWriterDelegate(extract_dir, entry->path);
+// if (!reader.ExtractCurrentEntry(writer)) {
+// // Cannot extract
+// return;
+// }
+// }
+//
+// if (!reader.ok()) {
+// // Error while enumerating entries
+// return;
+// }
//
class ZipReader {
public:
@@ -72,62 +86,65 @@ class ZipReader {
// of bytes that have been processed so far.
using ProgressCallback = base::RepeatingCallback<void(int64_t)>;
- // This class represents information of an entry (file or directory) in
- // a zip file.
- class EntryInfo {
- public:
- EntryInfo(const std::string& filename_in_zip,
- const unz_file_info& raw_file_info);
-
- // Returns the file path. The path is usually relative like
- // "foo/bar.txt", but if it's absolute, is_unsafe() returns true.
- const base::FilePath& file_path() const { return file_path_; }
-
- // Returns the size of the original file (i.e. after uncompressed).
- // Returns 0 if the entry is a directory.
- // Note: this value should not be trusted, because it is stored as metadata
- // in the zip archive and can be different from the real uncompressed size.
- int64_t original_size() const { return original_size_; }
-
- // Returns the last modified time. If the time stored in the zip file was
- // not valid, the unix epoch will be returned.
+ // Information of an entry (file or directory) in a ZIP archive.
+ struct Entry {
+ // Path of this entry, in its original encoding as it is stored in the ZIP
+ // archive. The encoding is not specified here. It might or might not be
+ // UTF-8, and the caller needs to use other means to determine the encoding
+ // if it wants to interpret this path correctly.
+ std::string path_in_original_encoding;
+
+ // Path of the entry, converted to Unicode. This path is usually relative
+ // (eg "foo/bar.txt"), but it can also be absolute (eg "/foo/bar.txt") or
+ // parent-relative (eg "../foo/bar.txt"). See also |is_unsafe|.
+ base::FilePath path;
+
+ // Size of the original uncompressed file, or 0 if the entry is a directory.
+ // This value should not be trusted, because it is stored as metadata in the
+ // ZIP archive and can be different from the real uncompressed size.
+ int64_t original_size;
+
+ // Last modified time. If the timestamp stored in the ZIP archive is not
+ // valid, the Unix epoch will be returned.
+ //
+ // The timestamp stored in the ZIP archive uses the MS-DOS date and time
+ // format.
//
- // The time stored in the zip archive uses the MS-DOS date and time format.
// http://msdn.microsoft.com/en-us/library/ms724247(v=vs.85).aspx
+ //
// As such the following limitations apply:
- // * only years from 1980 to 2107 can be represented.
- // * the time stamp has a 2 second resolution.
- // * there's no timezone information, so the time is interpreted as local.
- base::Time last_modified() const { return last_modified_; }
-
- // Returns true if the entry is a directory.
- bool is_directory() const { return is_directory_; }
-
- // Returns true if the entry is unsafe, like having ".." or invalid
- // UTF-8 characters in its file name, or the file path is absolute.
- bool is_unsafe() const { return is_unsafe_; }
-
- // Returns true if the entry is encrypted.
- bool is_encrypted() const { return is_encrypted_; }
-
- private:
- const base::FilePath file_path_;
- int64_t original_size_;
- base::Time last_modified_;
- bool is_directory_;
- bool is_unsafe_;
- bool is_encrypted_;
- DISALLOW_COPY_AND_ASSIGN(EntryInfo);
+ // * Only years from 1980 to 2107 can be represented.
+ // * The timestamp has a 2-second resolution.
+ // * There is no timezone information, so the time is interpreted as UTC.
+ base::Time last_modified;
+
+ // True if the entry is a directory.
+ // False if the entry is a file.
+ bool is_directory;
+
+ // True if the entry path is considered unsafe, ie if it is absolute or if
+ // it contains "..".
+ bool is_unsafe;
+
+ // True if the file content is encrypted.
+ bool is_encrypted;
+
+ // Entry POSIX permissions (POSIX systems only).
+ int posix_mode;
};
ZipReader();
+
+ ZipReader(const ZipReader&) = delete;
+ ZipReader& operator=(const ZipReader&) = delete;
+
~ZipReader();
- // Opens the zip file specified by |zip_file_path|. Returns true on
+ // Opens the ZIP archive specified by |zip_path|. Returns true on
// success.
- bool Open(const base::FilePath& zip_file_path);
+ bool Open(const base::FilePath& zip_path);
- // Opens the zip file referred to by the platform file |zip_fd|, without
+ // Opens the ZIP archive referred to by the platform file |zip_fd|, without
// taking ownership of |zip_fd|. Returns true on success.
bool OpenFromPlatformFile(base::PlatformFile zip_fd);
@@ -136,72 +153,94 @@ class ZipReader {
// string until it finishes extracting files.
bool OpenFromString(const std::string& data);
- // Closes the currently opened zip file. This function is called in the
+ // Closes the currently opened ZIP archive. This function is called in the
// destructor of the class, so you usually don't need to call this.
void Close();
- // Returns true if there is at least one entry to read. This function is
- // used to scan entries with AdvanceToNextEntry(), like:
- //
- // while (reader.HasMore()) {
- // // Do something with the current file here.
- // reader.AdvanceToNextEntry();
- // }
- bool HasMore();
+ // Sets the encoding of entry paths in the ZIP archive.
+ // By default, paths are assumed to be in UTF-8.
+ void SetEncoding(std::string encoding) { encoding_ = std::move(encoding); }
- // Advances the next entry. Returns true on success.
- bool AdvanceToNextEntry();
+ // Sets the decryption password that will be used to decrypt encrypted file in
+ // the ZIP archive.
+ void SetPassword(std::string password) { password_ = std::move(password); }
- // Opens the current entry in the zip file. On success, returns true and
- // updates the the current entry state (i.e. current_entry_info() is
- // updated). This function should be called before operations over the
- // current entry like ExtractCurrentEntryToFile().
+ // Gets the next entry. Returns null if there is no more entry, or if an error
+ // occurred while scanning entries. The returned Entry is owned by this
+ // ZipReader, and is valid until Next() is called again or until this
+ // ZipReader is closed.
+ //
+ // This function should be called before operations over the current entry
+ // like ExtractCurrentEntryToFile().
//
- // Note that there is no CloseCurrentEntryInZip(). The the current entry
- // state is reset automatically as needed.
- bool OpenCurrentEntryInZip();
+ // while (const ZipReader::Entry* entry = reader.Next()) {
+ // // Do something with the current entry here.
+ // ...
+ // }
+ //
+ // // Finished scanning entries.
+ // // Check if the scanning stopped because of an error.
+ // if (!reader.ok()) {
+ // // There was an error.
+ // ...
+ // }
+ const Entry* Next();
+
+ // Returns true if the enumeration of entries was successful, or false if it
+ // stopped because of an error.
+ bool ok() const { return ok_; }
// Extracts |num_bytes_to_extract| bytes of the current entry to |delegate|,
- // starting from the beginning of the entry. Return value specifies whether
- // the entire file was extracted.
+ // starting from the beginning of the entry.
+ //
+ // Returns true if the entire file was extracted without error.
+ //
+ // Precondition: Next() returned a non-null Entry.
bool ExtractCurrentEntry(WriterDelegate* delegate,
- uint64_t num_bytes_to_extract) const;
+ uint64_t num_bytes_to_extract =
+ std::numeric_limits<uint64_t>::max()) const;
- // Asynchronously extracts the current entry to the given output file path.
- // If the current entry is a directory it just creates the directory
- // synchronously instead. OpenCurrentEntryInZip() must be called beforehand.
- // success_callback will be called on success and failure_callback will be
- // called on failure. progress_callback will be called at least once.
+ // Asynchronously extracts the current entry to the given output file path. If
+ // the current entry is a directory it just creates the directory
+ // synchronously instead.
+ //
+ // |success_callback| will be called on success and |failure_callback| will be
+ // called on failure. |progress_callback| will be called at least once.
// Callbacks will be posted to the current MessageLoop in-order.
+ //
+ // Precondition: Next() returned a non-null Entry.
void ExtractCurrentEntryToFilePathAsync(
const base::FilePath& output_file_path,
SuccessCallback success_callback,
FailureCallback failure_callback,
- const ProgressCallback& progress_callback);
+ ProgressCallback progress_callback);
// Extracts the current entry into memory. If the current entry is a
- // directory, the |output| parameter is set to the empty string. If the
- // current entry is a file, the |output| parameter is filled with its
- // contents. OpenCurrentEntryInZip() must be called beforehand. Note: the
- // |output| parameter can be filled with a big amount of data, avoid passing
- // it around by value, but by reference or pointer. Note: the value returned
- // by EntryInfo::original_size() cannot be trusted, so the real size of the
- // uncompressed contents can be different. |max_read_bytes| limits the ammount
- // of memory used to carry the entry. Returns true if the entire content is
- // read. If the entry is bigger than |max_read_bytes|, returns false and
- // |output| is filled with |max_read_bytes| of data. If an error occurs,
- // returns false, and |output| is set to the empty string.
+ // directory, |*output| is set to the empty string. If the current entry is a
+ // file, |*output| is filled with its contents.
+ //
+ // The value in |Entry::original_size| cannot be trusted, so the real size of
+ // the uncompressed contents can be different. |max_read_bytes| limits the
+ // amount of memory used to carry the entry.
+ //
+ // Returns true if the entire content is read without error. If the content is
+ // bigger than |max_read_bytes|, this function returns false and |*output| is
+ // filled with |max_read_bytes| of data. If an error occurs, this function
+ // returns false and |*output| contains the content extracted so far, which
+ // might be garbage data.
+ //
+ // Precondition: Next() returned a non-null Entry.
bool ExtractCurrentEntryToString(uint64_t max_read_bytes,
std::string* output) const;
- // Returns the current entry info. Returns NULL if the current entry is
- // not yet opened. OpenCurrentEntryInZip() must be called beforehand.
- EntryInfo* current_entry_info() const {
- return current_entry_info_.get();
+ bool ExtractCurrentEntryToString(std::string* output) const {
+ return ExtractCurrentEntryToString(
+ base::checked_cast<uint64_t>(output->max_size()), output);
}
- // Returns the number of entries in the zip file.
- // Open() must be called beforehand.
+ // Returns the number of entries in the ZIP archive.
+ //
+ // Precondition: one of the Open() methods returned true.
int num_entries() const { return num_entries_; }
private:
@@ -211,25 +250,35 @@ class ZipReader {
// Resets the internal state.
void Reset();
+ // Opens the current entry in the ZIP archive. On success, returns true and
+ // updates the current entry state |entry_|.
+ //
+ // Note that there is no matching CloseEntry(). The current entry state is
+ // reset automatically as needed.
+ bool OpenEntry();
+
// Extracts a chunk of the file to the target. Will post a task for the next
// chunk and success/failure/progress callbacks as necessary.
void ExtractChunk(base::File target_file,
SuccessCallback success_callback,
FailureCallback failure_callback,
- const ProgressCallback& progress_callback,
+ ProgressCallback progress_callback,
const int64_t offset);
+ std::string encoding_;
+ std::string password_;
unzFile zip_file_;
int num_entries_;
+ int next_index_;
bool reached_end_;
- std::unique_ptr<EntryInfo> current_entry_info_;
+ bool ok_;
+ Entry entry_;
base::WeakPtrFactory<ZipReader> weak_ptr_factory_{this};
-
- DISALLOW_COPY_AND_ASSIGN(ZipReader);
};
-// A writer delegate that writes to a given File.
+// A writer delegate that writes to a given File. This file is expected to be
+// initially empty.
class FileWriterDelegate : public WriterDelegate {
public:
// Constructs a FileWriterDelegate that manipulates |file|. The delegate will
@@ -238,14 +287,14 @@ class FileWriterDelegate : public WriterDelegate {
explicit FileWriterDelegate(base::File* file);
// Constructs a FileWriterDelegate that takes ownership of |file|.
- explicit FileWriterDelegate(std::unique_ptr<base::File> file);
+ explicit FileWriterDelegate(base::File owned_file);
- // Truncates the file to the number of bytes written.
- ~FileWriterDelegate() override;
+ FileWriterDelegate(const FileWriterDelegate&) = delete;
+ FileWriterDelegate& operator=(const FileWriterDelegate&) = delete;
- // WriterDelegate methods:
+ ~FileWriterDelegate() override;
- // Seeks to the beginning of the file, returning false if the seek fails.
+ // Returns true if the file handle passed to the constructor is valid.
bool PrepareOutput() override;
// Writes |num_bytes| bytes of |data| to the file, returning false on error or
@@ -255,45 +304,48 @@ class FileWriterDelegate : public WriterDelegate {
// Sets the last-modified time of the data.
void SetTimeModified(const base::Time& time) override;
- // Return the actual size of the file.
- int64_t file_length() { return file_length_; }
+ // On POSIX systems, sets the file to be executable if the source file was
+ // executable.
+ void SetPosixFilePermissions(int mode) override;
- private:
- // The file the delegate modifies.
- base::File* file_;
+ // Empties the file to avoid leaving garbage data in it.
+ void OnError() override;
+
+ // Gets the number of bytes written into the file.
+ int64_t file_length() { return file_length_; }
+ protected:
// The delegate can optionally own the file it modifies, in which case
// owned_file_ is set and file_ is an alias for owned_file_.
- std::unique_ptr<base::File> owned_file_;
+ base::File owned_file_;
- int64_t file_length_ = 0;
+ // The file the delegate modifies.
+ base::File* const file_ = &owned_file_;
- DISALLOW_COPY_AND_ASSIGN(FileWriterDelegate);
+ int64_t file_length_ = 0;
};
-// A writer delegate that writes a file at a given path.
-class FilePathWriterDelegate : public WriterDelegate {
+// A writer delegate that creates and writes a file at a given path. This does
+// not overwrite any existing file.
+class FilePathWriterDelegate : public FileWriterDelegate {
public:
- explicit FilePathWriterDelegate(const base::FilePath& output_file_path);
- ~FilePathWriterDelegate() override;
+ explicit FilePathWriterDelegate(base::FilePath output_file_path);
- // WriterDelegate methods:
+ FilePathWriterDelegate(const FilePathWriterDelegate&) = delete;
+ FilePathWriterDelegate& operator=(const FilePathWriterDelegate&) = delete;
- // Creates the output file and any necessary intermediate directories.
- bool PrepareOutput() override;
+ ~FilePathWriterDelegate() override;
- // Writes |num_bytes| bytes of |data| to the file, returning false if not all
- // bytes could be written.
- bool WriteBytes(const char* data, int num_bytes) override;
+ // Creates the output file and any necessary intermediate directories. Does
+ // not overwrite any existing file, and returns false if the output file
+ // cannot be created because another file conflicts with it.
+ bool PrepareOutput() override;
- // Sets the last-modified time of the data.
- void SetTimeModified(const base::Time& time) override;
+ // Deletes the output file.
+ void OnError() override;
private:
- base::FilePath output_file_path_;
- base::File file_;
-
- DISALLOW_COPY_AND_ASSIGN(FilePathWriterDelegate);
+ const base::FilePath output_file_path_;
};
} // namespace zip