diff options
author | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2022-05-09 20:34:50 +0000 |
---|---|---|
committer | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2022-05-09 20:34:50 +0000 |
commit | 16e47be66cef502b1b74deec5a9e71a3b50f6d9b (patch) | |
tree | 16a3fc37ea924893d22684c3e834c4522cdc624b | |
parent | fc9fbc4e4c1d1b3de875b28a6d10ac3cde4d8196 (diff) | |
parent | 5dc80516a0f4f01939560f770c93e5dd9c8fedb4 (diff) | |
download | puffin-16e47be66cef502b1b74deec5a9e71a3b50f6d9b.tar.gz |
Snap for 8562061 from 5dc80516a0f4f01939560f770c93e5dd9c8fedb4 to mainline-media-releaseaml_med_331911000aml_med_331712010aml_med_331612000aml_med_331511000aml_med_331410000aml_med_331318000aml_med_331115000aml_med_331012020android13-mainline-media-release
Change-Id: Ifa7756b21183d5b2ed48bc9786a6f07f5eb2acf1
28 files changed, 802 insertions, 141 deletions
@@ -50,8 +50,10 @@ cc_library_static { "puffin/src/puffin.proto", "src/bit_reader.cc", "src/bit_writer.cc", + "src/brotli_util.cc", "src/huffer.cc", "src/huffman_table.cc", + "src/memory_stream.cc", "src/puff_reader.cc", "src/puff_writer.cc", "src/puffer.cc", @@ -61,6 +63,9 @@ cc_library_static { static_libs: [ "libbspatch", ], + whole_static_libs: [ + "libzucchini", + ], proto: { type: "lite", export_proto_headers: true, @@ -72,12 +77,12 @@ cc_library_static { defaults: ["puffin_defaults"], srcs: [ "src/file_stream.cc", - "src/memory_stream.cc", "src/puffdiff.cc", "src/utils.cc", ], static_libs: [ "libbsdiff", + "libzucchini", "libpuffpatch", ], } @@ -95,6 +100,7 @@ cc_binary { static_libs: [ "libbsdiff", "libbspatch", + "libzucchini", "libdivsufsort", "libdivsufsort64", "libpuffdiff", @@ -109,7 +115,9 @@ cc_test { cflags: ["-Wno-sign-compare"], srcs: [ "src/bit_io_unittest.cc", + "src/brotli_util_unittest.cc", "src/extent_stream.cc", + "src/integration_test.cc", "src/patching_unittest.cc", "src/puff_io_unittest.cc", "src/puffin_unittest.cc", @@ -124,6 +132,7 @@ cc_test { static_libs: [ "libbsdiff", "libbspatch", + "libzucchini", "libdivsufsort", "libdivsufsort64", "libpuffdiff", @@ -1,3 +1,9 @@ set noparent -ahassani@google.com + +# Android senj@google.com +xunchang@google.com +zhangkelvin@google.com + +# Chrome OS +kimjae@google.com
\ No newline at end of file diff --git a/PRESUBMIT.cfg b/PRESUBMIT.cfg index 254479a..9993474 100644 --- a/PRESUBMIT.cfg +++ b/PRESUBMIT.cfg @@ -1,6 +1,6 @@ [Hook Scripts] hook0=../../../../chromite/bin/cros lint ${PRESUBMIT_FILES} -hook1=../../../platform2/common-mk/gyplint.py ${PRESUBMIT_FILES} +hook1=../../../platform2/common-mk/gnlint.py ${PRESUBMIT_FILES} [Hook Overrides] clang_format_check: true @@ -1,27 +1,231 @@ -# Puffin - -Source code for Puffin: A utility for deterministic DEFLATE recompression. - -TODO(ahassani): Describe the directory structure and how-tos. - -## Glossary - -* __Alphabet__ A value that occurs in the input stream. It can be either a - literal:[0..255], and end of block sign [256], a length[257..285], or a - distance [0..29]. - -* __Huffman code__ A variable length code representing the Huffman encoded of an - alphabet. Huffman codes can be created uniquely using Huffman code length - array. -* __Huffman code array__ An array which an array index identifies a Huffman code - and the array element in that index represents the corresponding - alphabet. Throughout the code, Huffman code arrays are identified by - vectors with postfix `hcodes_`. -* __Huffman reverse code array__ An array which an array index identifies an - alphabet and the array element in that index contains the Huffman code of - the alphabet. Throughout the code, The Huffman reverse code arrays are - identified by vectors with postfix `rcodes_`. -* __Huffman code length__ The number of bits in a Huffman code. -* __Huffman code length array__ An array of Huffman code lengths with the array - index as the alphabet. Throughout the code, Huffman code length arrays - are identified by vectors with postfix `lens_`. +# Puffin: A deterministic deflate re-compressor (for patching purposes) + +## Table of Contents + +[TOC] + +## Puffin + +Puffin is a deterministic deflate recompressor. It is mainly used for patching +deflate compressed images (zip, gzip, etc.) because patches created from deflate +files/streams are usually large (deflate has a bit-aligned format, hence, +changing one byte in the raw data can cause the entire deflate stream to change +drastically.) + +Puffin has two tools (operations): `puffdiff` and `puffpatch` (shown +[here](puffin).) The purpose of `puffdiff` operation is to create a patch +between a source and a target file with one or both of them having some deflate +streams. This patch is used in the `puffpatch` operation to generate the target +file from the source file deterministically. The patch itself is created by +`bsdiff` library (but can be replaced by any other diffing mechanism). But, +before it uses `bsdiff` to create the patch, we have to transform both the +source and target files into a specific format so `bsdiff` can produce smaller +patches. We define `puff` operation to perform such a transformation. The +resulting stream is called a `puff` stream. The reverse transformation (from +`puff` stream to deflate stream) is called a `huff` operation. `huff` is used in +the client to transform the `puff` stream back into its original deflate stream +deterministically. + +![puffin](images/puffin.png "Puffin architecture") + +For information about deflate format see [RFC 1951]. + +## puff and huff + +`puff` is an operation that decompresses only the Huffman part of the deflate +stream and keeps the structure of the LZ77 coding unchanged. This is roughly +equivalent of decompressing ‘half way’. + +`huff` is the exact opposite of `puff` and it deterministically converts the +`puff` stream back to its original deflate stream. This deterministic conversion +is based on two facts: + +* There is no need to perform LZ77 algorithm. This means the deflate stream + could be built by any LZ77 algorithm. +* The dynamic Huffman tables can be recreated uniquely from the code length + array stored inside the `puff` stream. This means the deflate stream can be + reconstructed deterministically using the Huffman tables. + +The inclusion of Huffman tables in the `puff` stream has minuscule space burden +(on average maximum 320 bytes for each block. There is roughly one block per +32KB of uncompressed data). + +`bsdiff` of two `puffed` streams has much smaller patch in comparison to their +deflate streams, but it is larger than uncompressed streams. + +**The major benefits** + +* Size of the patch is smaller than deflate stream’s patch. +* `puff` and `huff` are deterministic operations. +* `huff` is in order of 10X faster than full recompression. It is even faster + than Huffman algorithm because it already has the Huffman tables and does + not need to reconstruct it. +* Both algorithms have very low memory footprint. +* The underlying LZ77 algorithm can be anything (as long as it is deflate + compatible). This includes google’s [zopfli] + +**The drawbacks** + +* The need to define a file format for the puffed stream and stay with this + format forever. If format needs to be changed in the future, then some + versioning mechanism should be there to handle it and backward compatibility + should be maintained. +* The need to define a payload format and stay with it forever. Similarly + needs to be versioned if required later change. +* Does not reduces the patch size as full recompression. + + +## puffdiff and puffpatch + +A deflate compressed file (gzip, zip, etc.) contains multiple deflate streams at +different locations. When this file is puffed, the resulting `puff` stream +contains both puffs and the raw data that existed between the deflates in the +compressed file. When performing `huff` operation, the location of puffs in the +`puff` stream and deflates in the deflate stream should be passed upon. This is +necessary as `huff` operation has to know exactly where the locations of both +puffs and deflates are. (See the following [image](puffin-stream)) + +![puffin-stream](images/puffin-stream.png "Puffin stream") + +Similarly `puffpatch` requires deflates and puffs locations in both the source +and target streams. These location information is saved using Protobufs in the +patch generated by `bsdiff`. One requirement for these two operations are that +`puffpatch` has to be efficient in the client. Client devices have limited +memory and CPU bandwidth and it is necessary that each of these operations are +performed with the most efficiency available. In order to achieve this +efficiency a new operation can be added to `bspatch`, that reads and writes into +a `puff` streams using special interfaces for puffing and huffing on the fly. + + +## Puffin Patch Format + +* Magic (4 bytes) - The string literal "PUF1". +* Header Length (4 bytes) - The size of the header (length of the generated + Protobuf). +* Header - Lengths and locations of deflate and puffs streams in source and + target files in a Protobuf format. See [puffin.proto]. +* Patch - This is a binary array directly generated by the `bsdiff` program. + +## Puffin Stream Format + +* [Block Header](#block-header-format) (3+ bytes) - Defines the type of the + block. +* Data - A mix of literals list and copy length/distances. +* End of Block (2 bytes) - The end of block symbol. Similar to the symbols for + copy length/distance but without the distance bytes. The actual copy length + value is 259 (0x81FF). + +### Block Header Format + +* Length (2 Bytes) - The size of the block header excluding the two Length + bytes itself - 1. +* Final Block (1 Bit) - Whether the block is the last one or not. + * 1 - Final block + * 0 - Middle block +* Type (2 Bits) + * 0 - Uncompressed. Immediately after the header, zero or one literals + list is present which defines the content of the uncompressed blocks. + * 1 - Fixed Huffman table. The list of literals and length/distances comes + immediately after the header. Fixed Huffman table is defined the deflate + specification and will not change. + * 2 - Dynamic Huffman table. The dynamic Huffman table comes at the end of + the block header. + * 3 - Undefined. This is an error. +* Skip Bits (5 Bits) - Used only for uncompressed blocks (For other types its + value is zero). In an uncompressed block, the [RFC 1951] skips any bits + after reading final block and type bits until the byte boundary in the + input. However, it does not define what the value of these bits should + be. Most implementations assume 0, but some implementations may use any + value. This segment contains those bits as a five-bits integer. When writing + the block header back to the deflate format, the actual number of bits which + where skipped will be calculated easily. +* Huffman Table - It only comes for dynamic Huffman table. + +#### Dynamic Huffman Table Format + +Depending on the scheme for storing the Huffman tables, the payload size can +change. We discovered that the following scheme does not produce the smallest +payload, but it is the most deterministic one. In a deflate stream, Huffman +tables for literals/length and distances are themselves Huffman coded. In this +format, we also `puff` the Huffman tables into the `puff` stream instead of +completely decompressing them. + +There are three tables stored in this structure very similar to the one defined +in [RFC 1951]. A Huffman table can be defined as an array of unsigned integer +code length values. Three Puffed Huffman tables appear like the following +scheme. The first table (codes) is the Huffman table used to decode the next two +Huffman tables. The second Huffman table is used to decode literals and lengths, +and the third Huffman table is used to decode distances. + + +* Literal/Length Count (1 byte) - Number of alphabets used for literal/length + Huffman codes - 257. +* Distance Count (1 byte) - Number of alphabets used for distance Huffman + codes - 1. +* Alphabet Count (1 byte) - Number of alphabets for coding two previous + Huffman tables - 4. +* Code Lengths ((Alphabet Count + 1) / 2 bytes) - A list of codes for reading + the next two Huffman tables. Each byte contains two codes. If the number of + codes is odd, the last four Bits will be zero. +* Literal/Length/Distance Code Lengths - List of code lengths for encoding + literals/lengths/distance followed The encoding is as follows: + * [0..15] - Represent code [0..15] + * [16..19] - Copy the last code length [3..6] times. + * [20..27] - Repeat code length of 0 for [3..10] times. + * [28..155] - Repeat code length of 0 for [11..138] times. + +### Literals List +Literals lists are constructed by a “length” value followed by “length” bytes of +literals. The Puffer should try to merge adjacent literals lists as much as +possible into one literals list in the `puff` stream. This Is a length value +followed by length bytes of literals (Even if there is only one literal.) + +* Tag (1 Bit) - The value is 0 indicating that this is a list of literals (not + a copy length/distance). +* Length - The number of literals that would follow in the list. + * (7 Bits) Length = [1 .. 127], The value is: Length - 1 + * (7 Bits + 2 Bytes) Length = [128 .. 65663], The values are: 127, + Length - 127. + Conserves size by using only one byte if the number of upcoming + literals is smaller or equal to 127 (About 99% of literals length in a + normal deflate stream fit into this category.) We should never have zero + length literals. Otherwise it will use three bytes. +* Literals (Length Bytes) - A sequence of Length number of literals. + +### Copy Length/Distance + +This Is a Length value followed by a Distance value. + +* Tag (1 Bit) - The value is 1 indicating that this is a copy length/distance + field. +* Length - Conserves size by using only one byte if the length value is + smaller or equal to 129. Otherwise two bytes are used. + * (7 Bits) - Length = [3 .. 129], The value is: Length - 3 + * (7 Bites + 1 Byte) Length = [130 .. 258], The value is: 127, Length - + 130: +* Distance (2 Bytes) - Distance = [1 .. 32768], The value is: + Distance - 1. The distance value as an unsigned integer. + +## Building Puffin +Currently Puffin is being used in both Android and Chrome OS and is built +differently in each of them. There is also a Makefile build, but it is not +comprehensive. + +## Usage + +Puffin builds an executable `puffin` which can be used to perform diffing and +patching algorithms using Puffin format. To get the list of options available +run: + +```shell +puffin --help +``` + +It can also be used as a library (currently used by update_engine) that provides +different APIs. + +## References + +[RFC 1951]: https://www.ietf.org/rfc/rfc1951.txt +[puffin.proto]: /src/puffin.proto +[zopfli]: https://en.wikipedia.org/wiki/Zopfli diff --git a/TEST_MAPPING b/TEST_MAPPING index ee8f0ca..d78c7bd 100644 --- a/TEST_MAPPING +++ b/TEST_MAPPING @@ -3,5 +3,10 @@ { "name": "puffin_unittest" } + ], + "hwasan-postsubmit": [ + { + "name": "puffin_unittest" + } ] } diff --git a/images/puffin-stream.png b/images/puffin-stream.png Binary files differnew file mode 100644 index 0000000..a4a654b --- /dev/null +++ b/images/puffin-stream.png diff --git a/images/puffin.png b/images/puffin.png Binary files differnew file mode 100644 index 0000000..a0601e6 --- /dev/null +++ b/images/puffin.png diff --git a/src/brotli_util.cc b/src/brotli_util.cc new file mode 100644 index 0000000..2de5578 --- /dev/null +++ b/src/brotli_util.cc @@ -0,0 +1,112 @@ +// Copyright 2021 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "puffin/src/include/puffin/brotli_util.h" + +#include "brotli/decode.h" +#include "brotli/encode.h" +#include "puffin/src/logging.h" +#include "puffin/memory_stream.h" + +namespace puffin { + +namespace { + +constexpr auto kBufferSize = 32768; +constexpr auto kDefaultParamQuality = 9; +constexpr auto kDefaultParamLgwin = 20; +} // namespace + +bool BrotliEncode(const uint8_t* input, + size_t input_size, + UniqueStreamPtr output_stream, + int quality) { + std::unique_ptr<BrotliEncoderState, decltype(&BrotliEncoderDestroyInstance)> + encoder(BrotliEncoderCreateInstance(nullptr, nullptr, nullptr), + BrotliEncoderDestroyInstance); + TEST_AND_RETURN_FALSE(encoder != nullptr); + + BrotliEncoderSetParameter(encoder.get(), BROTLI_PARAM_QUALITY, + quality); + BrotliEncoderSetParameter(encoder.get(), BROTLI_PARAM_LGWIN, + kDefaultParamLgwin); + + size_t available_in = input_size; + while (available_in != 0 || !BrotliEncoderIsFinished(encoder.get())) { + const uint8_t* next_in = input + input_size - available_in; + // Set up the output buffer + uint8_t buffer[kBufferSize]; + uint8_t* next_out = buffer; + size_t available_out = kBufferSize; + + BrotliEncoderOperation op = + available_in == 0 ? BROTLI_OPERATION_FINISH : BROTLI_OPERATION_PROCESS; + + if (!BrotliEncoderCompressStream(encoder.get(), op, &available_in, &next_in, + &available_out, &next_out, nullptr)) { + LOG(ERROR) << "Failed to compress " << input_size << " bytes with brotli"; + return false; + } + + size_t bytes_consumed = kBufferSize - available_out; + output_stream->Write(buffer, bytes_consumed); + } + + return true; +} + +bool BrotliEncode(const uint8_t* input, + size_t input_size, + UniqueStreamPtr output_stream) { + return BrotliEncode(input, input_size, std::move(output_stream), + kDefaultParamQuality); +} + +bool BrotliEncode(const uint8_t* input, + size_t input_size, + std::vector<uint8_t>* output) { + TEST_AND_RETURN_FALSE(output != nullptr); + return BrotliEncode(input, input_size, MemoryStream::CreateForWrite(output)); +} + +bool BrotliDecode(const uint8_t* input, + size_t input_size, + UniqueStreamPtr output_stream) { + std::unique_ptr<BrotliDecoderState, decltype(&BrotliDecoderDestroyInstance)> + decoder(BrotliDecoderCreateInstance(nullptr, nullptr, nullptr), + BrotliDecoderDestroyInstance); + TEST_AND_RETURN_FALSE(decoder != nullptr); + + size_t available_in = input_size; + while (available_in != 0 || !BrotliDecoderIsFinished(decoder.get())) { + const uint8_t* next_in = input + input_size - available_in; + // Set up the output buffer + uint8_t buffer[kBufferSize]; + uint8_t* next_out = buffer; + size_t available_out = kBufferSize; + + BrotliDecoderResult result = + BrotliDecoderDecompressStream(decoder.get(), &available_in, &next_in, + &available_out, &next_out, nullptr); + if (result == BROTLI_DECODER_RESULT_ERROR || + result == BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT) { + LOG(ERROR) << "Failed to decompress " << input_size + << " bytes with brotli, result " << result; + return false; + } + + size_t bytes_consumed = kBufferSize - available_out; + output_stream->Write(buffer, bytes_consumed); + } + return true; +} + +bool BrotliDecode(const uint8_t* input, + size_t input_size, + std::vector<uint8_t>* output) { + TEST_AND_RETURN_FALSE(output != nullptr); + return BrotliDecode(input, input_size, MemoryStream::CreateForWrite(output)); +} + +} // namespace puffin diff --git a/src/brotli_util_unittest.cc b/src/brotli_util_unittest.cc new file mode 100644 index 0000000..4ce8763 --- /dev/null +++ b/src/brotli_util_unittest.cc @@ -0,0 +1,32 @@ +// Copyright 2021 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "gtest/gtest.h" + +#include "puffin/src/include/puffin/brotli_util.h" +#include "puffin/memory_stream.h" +#include "puffin/src/puffin_stream.h" + +namespace puffin { + +namespace { + +// echo "puffin test" | xxd -i +const Buffer kTestString = {0x70, 0x75, 0x66, 0x66, 0x69, 0x6e, + 0x20, 0x74, 0x65, 0x73, 0x74, 0x0a}; +} // namespace + +TEST(BrotliUtilTest, CompressAndDecompressTest) { + Buffer compressed; + ASSERT_TRUE(BrotliEncode(kTestString.data(), kTestString.size(), + MemoryStream::CreateForWrite(&compressed))); + ASSERT_FALSE(compressed.empty()); + + Buffer decompressed; + ASSERT_TRUE(BrotliDecode(compressed.data(), compressed.size(), + MemoryStream::CreateForWrite(&decompressed))); + ASSERT_EQ(kTestString, decompressed); +} + +} // namespace puffin diff --git a/src/file_stream.cc b/src/file_stream.cc index 0abde31..de03723 100644 --- a/src/file_stream.cc +++ b/src/file_stream.cc @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#include "puffin/src/file_stream.h" +#include "puffin/file_stream.h" #include <fcntl.h> #include <unistd.h> diff --git a/src/fuzzer_puffpatch.cc b/src/fuzzer_puffpatch.cc index 28ee5bc..a27c3b9 100644 --- a/src/fuzzer_puffpatch.cc +++ b/src/fuzzer_puffpatch.cc @@ -9,7 +9,7 @@ #include "puffin/src/include/puffin/common.h" #include "puffin/src/include/puffin/puffpatch.h" -#include "puffin/src/memory_stream.h" +#include "puffin/memory_stream.h" using puffin::BitExtent; using puffin::Buffer; diff --git a/src/include/puffin/brotli_util.h b/src/include/puffin/brotli_util.h new file mode 100644 index 0000000..cee84ba --- /dev/null +++ b/src/include/puffin/brotli_util.h @@ -0,0 +1,44 @@ +// Copyright 2021 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef SRC_INCLUDE_PUFFIN_BROTLI_UTIL_H_ +#define SRC_INCLUDE_PUFFIN_BROTLI_UTIL_H_ + +#include <stdint.h> + +#include <vector> + +#include "puffin/stream.h" + +namespace puffin { + +// Use brotli to compress |input_size| bytes of data, and write the result to +// |output_stream|. +bool BrotliEncode(const uint8_t* input, + size_t input_size, + UniqueStreamPtr output_stream); +// Similar to above function, and writes to a output buffer. +bool BrotliEncode(const uint8_t* input, + size_t input_size, + std::vector<uint8_t>* output); + +// Similar to the above, but quality controls how well the compression is +// |quality| should be between 0 and 11 +bool BrotliEncode(const uint8_t* input, + size_t input_size, + UniqueStreamPtr output_stream, + int quality); + +// Decompress |input_size| bytes of data with brotli, and write the result to +// |output_stream|. +bool BrotliDecode(const uint8_t* input, + size_t input_size, + UniqueStreamPtr output_stream); +// Similar to above function, and writes to a output buffer. +bool BrotliDecode(const uint8_t* input, + size_t input_size, + std::vector<uint8_t>* output); +} // namespace puffin + +#endif // SRC_INCLUDE_PUFFIN_BROTLI_UTIL_H_ diff --git a/src/include/puffin/common.h b/src/include/puffin/common.h index 954b7d9..61e9eb2 100644 --- a/src/include/puffin/common.h +++ b/src/include/puffin/common.h @@ -31,24 +31,37 @@ using Buffer = std::vector<uint8_t>; // defined an extra class so the users of puffin do not have to include // puffin.pb.h and deal with its use. struct ByteExtent { - ByteExtent(uint64_t offset, uint64_t length) + constexpr ByteExtent(uint64_t offset, uint64_t length) : offset(offset), length(length) {} - bool operator==(const ByteExtent& other) const { + constexpr bool operator==(const ByteExtent& other) const { return this->length == other.length && this->offset == other.offset; } + constexpr bool operator<(const ByteExtent& other) const { + if (offset != other.offset) { + return offset < other.offset; + } + return length < other.length; + } + uint64_t offset; uint64_t length; }; struct BitExtent { - BitExtent(uint64_t offset, uint64_t length) + constexpr BitExtent(uint64_t offset, uint64_t length) : offset(offset), length(length) {} - bool operator==(const BitExtent& other) const { + constexpr bool operator==(const BitExtent& other) const { return this->length == other.length && this->offset == other.offset; } + constexpr bool operator<(const BitExtent& other) const { + if (offset != other.offset) { + return offset < other.offset; + } + return length < other.length; + } uint64_t offset; uint64_t length; diff --git a/src/file_stream.h b/src/include/puffin/file_stream.h index 626d7c6..626d7c6 100644 --- a/src/file_stream.h +++ b/src/include/puffin/file_stream.h diff --git a/src/memory_stream.h b/src/include/puffin/memory_stream.h index a338e7f..a338e7f 100644 --- a/src/memory_stream.h +++ b/src/include/puffin/memory_stream.h diff --git a/src/include/puffin/puffdiff.h b/src/include/puffin/puffdiff.h index c8883bf..ad8a106 100644 --- a/src/include/puffin/puffdiff.h +++ b/src/include/puffin/puffdiff.h @@ -15,14 +15,22 @@ namespace puffin { +enum class PatchAlgorithm { + kBsdiff = 0, + kZucchini = 1, +}; + // Performs a diff operation between input deflate streams and creates a patch // that is used in the client to recreate the |dst| from |src|. // |src| IN Source deflate stream. // |dst| IN Destination deflate stream. // |src_deflates| IN Deflate locations in |src|. // |dst_deflates| IN Deflate locations in |dst|. -// |compressors| IN Compressors to use in the underlying bsdiff, e.g. bz2, +// |compressors| IN Compressors to use in the underlying bsdiff, e.g. +// bz2, // brotli. +// |patchAlgorithm| IN The patchAlgorithm used to create patches between +// uncompressed bytes, e.g. bsdiff, zucchini. // |tmp_filepath| IN A path to a temporary file. The caller has the // responsibility of unlinking the file after the call to // |PuffDiff| finishes. @@ -32,6 +40,16 @@ bool PuffDiff(UniqueStreamPtr src, const std::vector<BitExtent>& src_deflates, const std::vector<BitExtent>& dst_deflates, const std::vector<bsdiff::CompressorType>& compressors, + PatchAlgorithm patchAlgorithm, + const std::string& tmp_filepath, + Buffer* patch); + +// This function uses bsdiff as the patch algorithm. +bool PuffDiff(UniqueStreamPtr src, + UniqueStreamPtr dst, + const std::vector<BitExtent>& src_deflates, + const std::vector<BitExtent>& dst_deflates, + const std::vector<bsdiff::CompressorType>& compressors, const std::string& tmp_filepath, Buffer* patch); diff --git a/src/integration_test.cc b/src/integration_test.cc new file mode 100644 index 0000000..9e03768 --- /dev/null +++ b/src/integration_test.cc @@ -0,0 +1,88 @@ +// Copyright 2021 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <string> +#include <vector> + +#include "gtest/gtest.h" + +#include "puffin/src/include/puffin/puffdiff.h" +#include "puffin/src/include/puffin/puffpatch.h" +#include "puffin/src/include/puffin/utils.h" +#include "puffin/memory_stream.h" +#include "puffin/src/puffin_stream.h" +#include "puffin/src/unittest_common.h" + +namespace puffin { + +namespace { +// xxd -i <name>.zip +const Buffer kTestZipA = { + 0x50, 0x4b, 0x03, 0x04, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x79, + 0x0d, 0x53, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x01, 0x00, 0x1c, 0x00, 0x31, 0x55, 0x54, 0x09, 0x00, 0x03, + 0x5c, 0xed, 0x16, 0x61, 0x5c, 0xed, 0x16, 0x61, 0x75, 0x78, 0x0b, 0x00, + 0x01, 0x04, 0x8f, 0x66, 0x05, 0x00, 0x04, 0x53, 0x5f, 0x01, 0x00, 0x50, + 0x4b, 0x01, 0x02, 0x1e, 0x03, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, + 0x79, 0x0d, 0x53, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x01, 0x00, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xa4, 0x81, 0x00, 0x00, 0x00, 0x00, 0x31, 0x55, 0x54, + 0x05, 0x00, 0x03, 0x5c, 0xed, 0x16, 0x61, 0x75, 0x78, 0x0b, 0x00, 0x01, + 0x04, 0x8f, 0x66, 0x05, 0x00, 0x04, 0x53, 0x5f, 0x01, 0x00, 0x50, 0x4b, + 0x05, 0x06, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x47, 0x00, + 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x00, 0x00}; + +const Buffer kTestZipB = { + 0x50, 0x4b, 0x03, 0x04, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x26, 0x79, + 0x0d, 0x53, 0x4e, 0x81, 0x88, 0x47, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, + 0x00, 0x00, 0x01, 0x00, 0x1c, 0x00, 0x32, 0x55, 0x54, 0x09, 0x00, 0x03, + 0x88, 0xed, 0x16, 0x61, 0x88, 0xed, 0x16, 0x61, 0x75, 0x78, 0x0b, 0x00, + 0x01, 0x04, 0x8f, 0x66, 0x05, 0x00, 0x04, 0x53, 0x5f, 0x01, 0x00, 0x61, + 0x62, 0x63, 0x0a, 0x50, 0x4b, 0x01, 0x02, 0x1e, 0x03, 0x0a, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x26, 0x79, 0x0d, 0x53, 0x4e, 0x81, 0x88, 0x47, 0x04, + 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x18, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xa4, 0x81, 0x00, 0x00, 0x00, + 0x00, 0x32, 0x55, 0x54, 0x05, 0x00, 0x03, 0x88, 0xed, 0x16, 0x61, 0x75, + 0x78, 0x0b, 0x00, 0x01, 0x04, 0x8f, 0x66, 0x05, 0x00, 0x04, 0x53, 0x5f, + 0x01, 0x00, 0x50, 0x4b, 0x05, 0x06, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, + 0x01, 0x00, 0x47, 0x00, 0x00, 0x00, 0x3f, 0x00, 0x00, 0x00, 0x00, 0x00}; + +} // namespace + +class PuffinIntegrationTest : public testing::TestWithParam<PatchAlgorithm> { + protected: + PatchAlgorithm getPatchType() { return GetParam(); } +}; + +TEST_P(PuffinIntegrationTest, PuffinDiffPatchTest) { + std::vector<BitExtent> src_deflates; + ASSERT_TRUE(LocateDeflatesInZipArchive(kTestZipA, &src_deflates)); + + std::vector<BitExtent> dst_deflates; + ASSERT_TRUE(LocateDeflatesInZipArchive(kTestZipB, &dst_deflates)); + + std::string tmp_file; + ASSERT_TRUE(MakeTempFile(&tmp_file, nullptr)); + Buffer patch; + ASSERT_TRUE(PuffDiff(MemoryStream::CreateForRead(kTestZipA), + MemoryStream::CreateForRead(kTestZipB), src_deflates, + dst_deflates, {bsdiff::CompressorType::kBrotli}, + getPatchType(), tmp_file, &patch)); + + Buffer patched; + auto src_stream = MemoryStream::CreateForRead(kTestZipA); + auto dst_stream = MemoryStream::CreateForWrite(&patched); + ASSERT_TRUE(PuffPatch(MemoryStream::CreateForRead(kTestZipA), + MemoryStream::CreateForWrite(&patched), patch.data(), + patch.size())); + + ASSERT_EQ(kTestZipB, patched); +} + +INSTANTIATE_TEST_CASE_P(TestWithPatchType, + PuffinIntegrationTest, + testing::Values(PatchAlgorithm::kBsdiff, + PatchAlgorithm::kZucchini)); + +} // namespace puffin
\ No newline at end of file diff --git a/src/logging.h b/src/logging.h index b48fcd5..8b80153 100644 --- a/src/logging.h +++ b/src/logging.h @@ -5,7 +5,10 @@ #ifndef SRC_LOGGING_H_ #define SRC_LOGGING_H_ -#ifdef USE_BRILLO +#if defined(BASE_VER) && BASE_VER >= 822064 +#include "base/check.h" // CHECK-related macros are defined in base/check.h on Chrome OS. +#include "base/logging.h" +#elif USE_BRILLO #include "base/logging.h" #else #include "glog/logging.h" diff --git a/src/main.cc b/src/main.cc index 20f0948..0c1bdf8 100644 --- a/src/main.cc +++ b/src/main.cc @@ -13,8 +13,8 @@ #include "gflags/gflags.h" #endif +#include "puffin/file_stream.h" #include "puffin/src/extent_stream.h" -#include "puffin/src/file_stream.h" #include "puffin/src/include/puffin/common.h" #include "puffin/src/include/puffin/huffer.h" #include "puffin/src/include/puffin/puffdiff.h" @@ -22,7 +22,7 @@ #include "puffin/src/include/puffin/puffpatch.h" #include "puffin/src/include/puffin/utils.h" #include "puffin/src/logging.h" -#include "puffin/src/memory_stream.h" +#include "puffin/memory_stream.h" #include "puffin/src/puffin_stream.h" using puffin::BitExtent; @@ -154,44 +154,46 @@ bool LocateDeflatesBasedOnFileType(const UniqueStreamPtr& stream, } // namespace -#define SETUP_FLAGS \ - DEFINE_string(src_file, "", "Source file"); \ - DEFINE_string(dst_file, "", "Target file"); \ - DEFINE_string(patch_file, "", "patch file"); \ - DEFINE_string( \ - src_deflates_byte, "", \ - "Source deflate byte locations in the format offset:length,..."); \ - DEFINE_string( \ - dst_deflates_byte, "", \ - "Target deflate byte locations in the format offset:length,..."); \ - DEFINE_string( \ - src_deflates_bit, "", \ - "Source deflate bit locations in the format offset:length,..."); \ - DEFINE_string( \ - dst_deflates_bit, "", \ - "Target deflatebit locations in the format offset:length,..."); \ - DEFINE_string(src_puffs, "", \ - "Source puff locations in the format offset:length,..."); \ - DEFINE_string(dst_puffs, "", \ - "Target puff locations in the format offset:length,..."); \ - DEFINE_string(src_extents, "", \ - "Source extents in the format of offset:length,..."); \ - DEFINE_string(dst_extents, "", \ - "Target extents in the format of offset:length,..."); \ - DEFINE_string(operation, "", \ - "Type of the operation: puff, huff, puffdiff, puffpatch, " \ - "puffhuff"); \ - DEFINE_string(src_file_type, "", \ - "Type of the input source file: deflate, gzip, " \ - "zlib or zip"); \ - DEFINE_string(dst_file_type, "", \ - "Same as src_file_type but for the target file"); \ - DEFINE_bool(verbose, false, \ - "Logs all the given parameters including internally " \ - "generated ones"); \ - DEFINE_uint64(cache_size, kDefaultPuffCacheSize, \ - "Maximum size to cache the puff stream. Used in puffpatch"); - +#define SETUP_FLAGS \ + DEFINE_string(src_file, "", "Source file"); \ + DEFINE_string(dst_file, "", "Target file"); \ + DEFINE_string(patch_file, "", "patch file"); \ + DEFINE_string( \ + src_deflates_byte, "", \ + "Source deflate byte locations in the format offset:length,..."); \ + DEFINE_string( \ + dst_deflates_byte, "", \ + "Target deflate byte locations in the format offset:length,..."); \ + DEFINE_string( \ + src_deflates_bit, "", \ + "Source deflate bit locations in the format offset:length,..."); \ + DEFINE_string( \ + dst_deflates_bit, "", \ + "Target deflatebit locations in the format offset:length,..."); \ + DEFINE_string(src_puffs, "", \ + "Source puff locations in the format offset:length,..."); \ + DEFINE_string(dst_puffs, "", \ + "Target puff locations in the format offset:length,..."); \ + DEFINE_string(src_extents, "", \ + "Source extents in the format of offset:length,..."); \ + DEFINE_string(dst_extents, "", \ + "Target extents in the format of offset:length,..."); \ + DEFINE_string(operation, "", \ + "Type of the operation: puff, huff, puffdiff, puffpatch, " \ + "puffhuff"); \ + DEFINE_string(src_file_type, "", \ + "Type of the input source file: deflate, gzip, " \ + "zlib or zip"); \ + DEFINE_string(dst_file_type, "", \ + "Same as src_file_type but for the target file"); \ + DEFINE_bool(verbose, false, \ + "Logs all the given parameters including internally " \ + "generated ones"); \ + DEFINE_uint64(cache_size, kDefaultPuffCacheSize, \ + "Maximum size to cache the puff stream. Used in puffpatch"); \ + DEFINE_int32(patch_algorithm, 0, \ + "Type of raw diff algorithm to use. The current supported " \ + "ones are 0: bsdiff, 1: zucchini."); #ifndef USE_BRILLO SETUP_FLAGS; #endif @@ -343,12 +345,18 @@ bool Main(int argc, char** argv) { &dst_deflates_bit)); } + if (FLAGS_patch_algorithm != 0 && FLAGS_patch_algorithm != 1) { + LOG(ERROR) + << "The supported patch algorithms are 0: bsdiff, 1: zucchini."; + return false; + } // TODO(xunchang) add flags to select the bsdiff compressors. Buffer puffdiff_delta; TEST_AND_RETURN_FALSE(puffin::PuffDiff( std::move(src_stream), std::move(dst_stream), src_deflates_bit, dst_deflates_bit, {bsdiff::CompressorType::kBZ2, bsdiff::CompressorType::kBrotli}, + static_cast<puffin::PatchAlgorithm>(FLAGS_patch_algorithm), "/tmp/patch.tmp", &puffdiff_delta)); if (FLAGS_verbose) { LOG(INFO) << "patch_size: " << puffdiff_delta.size(); diff --git a/src/memory_stream.cc b/src/memory_stream.cc index 706f4a3..8a2afdd 100644 --- a/src/memory_stream.cc +++ b/src/memory_stream.cc @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#include "puffin/src/memory_stream.h" +#include "puffin/memory_stream.h" #include <fcntl.h> #include <unistd.h> diff --git a/src/patching_unittest.cc b/src/patching_unittest.cc index 134f137..fec2ecc 100644 --- a/src/patching_unittest.cc +++ b/src/patching_unittest.cc @@ -12,7 +12,7 @@ #include "puffin/src/include/puffin/puffpatch.h" #include "puffin/src/include/puffin/utils.h" #include "puffin/src/logging.h" -#include "puffin/src/memory_stream.h" +#include "puffin/memory_stream.h" #include "puffin/src/puffin_stream.h" #include "puffin/src/unittest_common.h" diff --git a/src/puffdiff.cc b/src/puffdiff.cc index 15d0fa6..2588379 100644 --- a/src/puffdiff.cc +++ b/src/puffdiff.cc @@ -13,14 +13,18 @@ #include "bsdiff/bsdiff.h" #include "bsdiff/patch_writer_factory.h" +#include "zucchini/buffer_view.h" +#include "zucchini/patch_writer.h" +#include "zucchini/zucchini.h" -#include "puffin/src/file_stream.h" +#include "puffin/file_stream.h" +#include "puffin/src/include/puffin/brotli_util.h" #include "puffin/src/include/puffin/common.h" #include "puffin/src/include/puffin/puffer.h" #include "puffin/src/include/puffin/puffpatch.h" #include "puffin/src/include/puffin/utils.h" #include "puffin/src/logging.h" -#include "puffin/src/memory_stream.h" +#include "puffin/memory_stream.h" #include "puffin/src/puffin.pb.h" #include "puffin/src/puffin_stream.h" @@ -47,15 +51,16 @@ void CopyVectorToRpf( // Structure of a puffin patch // +-------+------------------+-------------+--------------+ -// |P|U|F|1| PatchHeader Size | PatchHeader | bsdiff_patch | +// |P|U|F|1| PatchHeader Size | PatchHeader | raw patch | // +-------+------------------+-------------+--------------+ -bool CreatePatch(const Buffer& bsdiff_patch, +bool CreatePatch(const Buffer& raw_patch, const vector<BitExtent>& src_deflates, const vector<BitExtent>& dst_deflates, const vector<ByteExtent>& src_puffs, const vector<ByteExtent>& dst_puffs, uint64_t src_puff_size, uint64_t dst_puff_size, + PatchAlgorithm patchAlgorithm, Buffer* patch) { metadata::PatchHeader header; header.set_version(1); @@ -67,6 +72,7 @@ bool CreatePatch(const Buffer& bsdiff_patch, header.mutable_src()->set_puff_length(src_puff_size); header.mutable_dst()->set_puff_length(dst_puff_size); + header.set_type(static_cast<metadata::PatchHeader_PatchType>(patchAlgorithm)); const size_t header_size_long = header.ByteSizeLong(); TEST_AND_RETURN_FALSE(header_size_long <= UINT32_MAX); @@ -74,7 +80,7 @@ bool CreatePatch(const Buffer& bsdiff_patch, uint64_t offset = 0; patch->resize(kMagicLength + sizeof(header_size) + header_size + - bsdiff_patch.size()); + raw_patch.size()); memcpy(patch->data() + offset, kMagic, kMagicLength); offset += kMagicLength; @@ -88,9 +94,9 @@ bool CreatePatch(const Buffer& bsdiff_patch, header.SerializeToArray(patch->data() + offset, header_size)); offset += header_size; - memcpy(patch->data() + offset, bsdiff_patch.data(), bsdiff_patch.size()); + memcpy(patch->data() + offset, raw_patch.data(), raw_patch.size()); - if (bsdiff_patch.size() > patch->size()) { + if (raw_patch.size() > patch->size()) { LOG(ERROR) << "Puffin patch is invalid"; } return true; @@ -103,6 +109,7 @@ bool PuffDiff(UniqueStreamPtr src, const vector<BitExtent>& src_deflates, const vector<BitExtent>& dst_deflates, const vector<bsdiff::CompressorType>& compressors, + PatchAlgorithm patchAlgorithm, const string& tmp_filepath, Buffer* patch) { auto puffer = std::make_shared<Puffer>(); @@ -130,29 +137,70 @@ bool PuffDiff(UniqueStreamPtr src, TEST_AND_RETURN_FALSE(puff_deflate_stream(std::move(dst), dst_deflates, &dst_puff_buffer, &dst_puffs)); - auto bsdiff_patch_writer = bsdiff::CreateBSDF2PatchWriter( - tmp_filepath, compressors, kBrotliCompressionQuality); - - TEST_AND_RETURN_FALSE( - 0 == bsdiff::bsdiff(src_puff_buffer.data(), src_puff_buffer.size(), - dst_puff_buffer.data(), dst_puff_buffer.size(), - bsdiff_patch_writer.get(), nullptr)); - - auto bsdiff_patch = FileStream::Open(tmp_filepath, true, false); - TEST_AND_RETURN_FALSE(bsdiff_patch); - uint64_t patch_size; - TEST_AND_RETURN_FALSE(bsdiff_patch->GetSize(&patch_size)); - Buffer bsdiff_patch_buf(patch_size); - TEST_AND_RETURN_FALSE( - bsdiff_patch->Read(bsdiff_patch_buf.data(), bsdiff_patch_buf.size())); - TEST_AND_RETURN_FALSE(bsdiff_patch->Close()); + if (patchAlgorithm == PatchAlgorithm::kBsdiff) { + auto bsdiff_patch_writer = bsdiff::CreateBSDF2PatchWriter( + tmp_filepath, compressors, kBrotliCompressionQuality); + + TEST_AND_RETURN_FALSE( + 0 == bsdiff::bsdiff(src_puff_buffer.data(), src_puff_buffer.size(), + dst_puff_buffer.data(), dst_puff_buffer.size(), + bsdiff_patch_writer.get(), nullptr)); + + auto bsdiff_patch = FileStream::Open(tmp_filepath, true, false); + TEST_AND_RETURN_FALSE(bsdiff_patch); + uint64_t patch_size; + TEST_AND_RETURN_FALSE(bsdiff_patch->GetSize(&patch_size)); + Buffer bsdiff_patch_buf(patch_size); + TEST_AND_RETURN_FALSE( + bsdiff_patch->Read(bsdiff_patch_buf.data(), bsdiff_patch_buf.size())); + TEST_AND_RETURN_FALSE(bsdiff_patch->Close()); + + TEST_AND_RETURN_FALSE(CreatePatch( + bsdiff_patch_buf, src_deflates, dst_deflates, src_puffs, dst_puffs, + src_puff_buffer.size(), dst_puff_buffer.size(), patchAlgorithm, patch)); + } else if (patchAlgorithm == PatchAlgorithm::kZucchini) { + zucchini::ConstBufferView src_bytes(src_puff_buffer.data(), + src_puff_buffer.size()); + zucchini::ConstBufferView dst_bytes(dst_puff_buffer.data(), + dst_puff_buffer.size()); + + zucchini::EnsemblePatchWriter patch_writer(src_bytes, dst_bytes); + auto status = zucchini::GenerateBuffer(src_bytes, dst_bytes, &patch_writer); + TEST_AND_RETURN_FALSE(status == zucchini::status::kStatusSuccess); + + Buffer zucchini_patch_buf(patch_writer.SerializedSize()); + patch_writer.SerializeInto( + {zucchini_patch_buf.data(), zucchini_patch_buf.size()}); + + // Use brotli to compress the zucchini patch. + // TODO(197361113) respect the CompressorType parameter for zucchini. + Buffer compressed_patch; + TEST_AND_RETURN_FALSE(BrotliEncode(zucchini_patch_buf.data(), + zucchini_patch_buf.size(), + &compressed_patch)); + + TEST_AND_RETURN_FALSE(CreatePatch( + compressed_patch, src_deflates, dst_deflates, src_puffs, dst_puffs, + src_puff_buffer.size(), dst_puff_buffer.size(), patchAlgorithm, patch)); + } else { + LOG(ERROR) << "unsupported type " << static_cast<int>(patchAlgorithm); + return false; + } - TEST_AND_RETURN_FALSE(CreatePatch( - bsdiff_patch_buf, src_deflates, dst_deflates, src_puffs, dst_puffs, - src_puff_buffer.size(), dst_puff_buffer.size(), patch)); return true; } +bool PuffDiff(UniqueStreamPtr src, + UniqueStreamPtr dst, + const std::vector<BitExtent>& src_deflates, + const std::vector<BitExtent>& dst_deflates, + const std::vector<bsdiff::CompressorType>& compressors, + const std::string& tmp_filepath, + Buffer* patch) { + return PuffDiff(std::move(src), std::move(dst), src_deflates, dst_deflates, + compressors, PatchAlgorithm::kBsdiff, tmp_filepath, patch); +} + bool PuffDiff(const Buffer& src, const Buffer& dst, const vector<BitExtent>& src_deflates, @@ -162,7 +210,7 @@ bool PuffDiff(const Buffer& src, Buffer* patch) { return PuffDiff(MemoryStream::CreateForRead(src), MemoryStream::CreateForRead(dst), src_deflates, dst_deflates, - compressors, tmp_filepath, patch); + compressors, PatchAlgorithm::kBsdiff, tmp_filepath, patch); } bool PuffDiff(const Buffer& src, diff --git a/src/puffin.proto b/src/puffin.proto index 7651bc0..7ed5bc7 100644 --- a/src/puffin.proto +++ b/src/puffin.proto @@ -19,8 +19,15 @@ message StreamInfo { } message PatchHeader { + enum PatchType { + BSDIFF = 0; + ZUCCHINI = 1; + } + int32 version = 1; StreamInfo src = 2; StreamInfo dst = 3; // The bsdiff patch is installed right after this protobuf. + + PatchType type = 4; }
\ No newline at end of file diff --git a/src/puffin_unittest.cc b/src/puffin_unittest.cc index 0279813..3ac0173 100644 --- a/src/puffin_unittest.cc +++ b/src/puffin_unittest.cc @@ -15,7 +15,7 @@ #include "puffin/src/include/puffin/puffer.h" #include "puffin/src/include/puffin/utils.h" #include "puffin/src/logging.h" -#include "puffin/src/memory_stream.h" +#include "puffin/memory_stream.h" #include "puffin/src/puff_reader.h" #include "puffin/src/puff_writer.h" #include "puffin/src/puffin_stream.h" diff --git a/src/puffpatch.cc b/src/puffpatch.cc index 0b4ffcb..e9e4a32 100644 --- a/src/puffpatch.cc +++ b/src/puffpatch.cc @@ -14,12 +14,16 @@ #include "bsdiff/bspatch.h" #include "bsdiff/file_interface.h" +#include "zucchini/patch_reader.h" +#include "zucchini/zucchini.h" +#include "puffin/src/include/puffin/brotli_util.h" #include "puffin/src/include/puffin/common.h" #include "puffin/src/include/puffin/huffer.h" #include "puffin/src/include/puffin/puffer.h" #include "puffin/src/include/puffin/stream.h" #include "puffin/src/logging.h" +#include "puffin/memory_stream.h" #include "puffin/src/puffin.pb.h" #include "puffin/src/puffin_stream.h" @@ -92,8 +96,6 @@ class BsdiffStream : public bsdiff::FileInterface { DISALLOW_COPY_AND_ASSIGN(BsdiffStream); }; -} // namespace - bool DecodePatch(const uint8_t* patch, size_t patch_length, size_t* bsdiff_patch_offset, @@ -103,7 +105,8 @@ bool DecodePatch(const uint8_t* patch, vector<ByteExtent>* src_puffs, vector<ByteExtent>* dst_puffs, uint64_t* src_puff_size, - uint64_t* dst_puff_size) { + uint64_t* dst_puff_size, + metadata::PatchHeader_PatchType* patch_type) { size_t offset = 0; uint32_t header_size; TEST_AND_RETURN_FALSE(patch_length >= (kMagicLength + sizeof(header_size))); @@ -135,43 +138,104 @@ bool DecodePatch(const uint8_t* patch, *bsdiff_patch_offset = offset; *bsdiff_patch_size = patch_length - offset; + + *patch_type = header.type(); + return true; +} + +bool ApplyZucchiniPatch(UniqueStreamPtr src_stream, + size_t src_size, + const uint8_t* patch_start, + size_t patch_size, + UniqueStreamPtr dst_stream) { + // Read the source data + Buffer puffed_src(src_size); + Buffer buffer(1024 * 1024); + uint64_t bytes_wrote = 0; + while (bytes_wrote < src_size) { + auto write_size = + std::min(static_cast<uint64_t>(buffer.size()), src_size - bytes_wrote); + TEST_AND_RETURN_FALSE(src_stream->Read(buffer.data(), write_size)); + std::copy(buffer.data(), buffer.data() + write_size, + puffed_src.data() + bytes_wrote); + bytes_wrote += write_size; + } + // Read the patch + Buffer zucchini_patch; + TEST_AND_RETURN_FALSE(BrotliDecode(patch_start, patch_size, &zucchini_patch)); + auto patch_reader = zucchini::EnsemblePatchReader::Create( + {zucchini_patch.data(), zucchini_patch.size()}); + if (!patch_reader.has_value()) { + LOG(ERROR) << "Failed to parse the zucchini patch."; + return false; + } + + // TODO(197361113) Stream the patched result once zucchini supports it. So we + // can save some memory when applying patch on device. + Buffer patched_data(patch_reader->header().new_size); + auto status = zucchini::ApplyBuffer( + {puffed_src.data(), puffed_src.size()}, *patch_reader, + {patched_data.data(), patched_data.size()}); + if (status != zucchini::status::kStatusSuccess) { + LOG(ERROR) << "Failed to parse the zucchini patch: " << status; + return false; + } + + TEST_AND_RETURN_FALSE( + dst_stream->Write(patched_data.data(), patched_data.size())); return true; } +} // namespace + bool PuffPatch(UniqueStreamPtr src, UniqueStreamPtr dst, const uint8_t* patch, size_t patch_length, size_t max_cache_size) { - size_t bsdiff_patch_offset; // bsdiff offset in |patch|. - size_t bsdiff_patch_size = 0; + size_t patch_offset; // raw patch offset in puffin |patch|. + size_t raw_patch_size = 0; vector<BitExtent> src_deflates, dst_deflates; vector<ByteExtent> src_puffs, dst_puffs; uint64_t src_puff_size, dst_puff_size; - // Decode the patch and get the bsdiff_patch. - TEST_AND_RETURN_FALSE(DecodePatch(patch, patch_length, &bsdiff_patch_offset, - &bsdiff_patch_size, &src_deflates, - &dst_deflates, &src_puffs, &dst_puffs, - &src_puff_size, &dst_puff_size)); + metadata::PatchHeader_PatchType patch_type; + + // Decode the patch and get the raw patch (e.g. bsdiff, zucchini). + TEST_AND_RETURN_FALSE( + DecodePatch(patch, patch_length, &patch_offset, &raw_patch_size, + &src_deflates, &dst_deflates, &src_puffs, &dst_puffs, + &src_puff_size, &dst_puff_size, &patch_type)); auto puffer = std::make_shared<Puffer>(); auto huffer = std::make_shared<Huffer>(); - // For reading from source. - auto reader = BsdiffStream::Create( + auto src_stream = PuffinStream::CreateForPuff(std::move(src), puffer, src_puff_size, - src_deflates, src_puffs, max_cache_size)); - TEST_AND_RETURN_FALSE(reader); - - // For writing into destination. - auto writer = BsdiffStream::Create(PuffinStream::CreateForHuff( - std::move(dst), huffer, dst_puff_size, dst_deflates, dst_puffs)); - TEST_AND_RETURN_FALSE(writer); - - // Running bspatch itself. - TEST_AND_RETURN_FALSE(0 == bspatch(reader, writer, - &patch[bsdiff_patch_offset], - bsdiff_patch_size)); + src_deflates, src_puffs, max_cache_size); + TEST_AND_RETURN_FALSE(src_stream); + auto dst_stream = PuffinStream::CreateForHuff( + std::move(dst), huffer, dst_puff_size, dst_deflates, dst_puffs); + TEST_AND_RETURN_FALSE(dst_stream); + + if (patch_type == metadata::PatchHeader_PatchType_BSDIFF) { + // For reading from source. + auto reader = BsdiffStream::Create(std::move(src_stream)); + TEST_AND_RETURN_FALSE(reader); + // For writing into destination. + auto writer = BsdiffStream::Create(std::move(dst_stream)); + TEST_AND_RETURN_FALSE(writer); + + // Running bspatch itself. + TEST_AND_RETURN_FALSE( + 0 == bspatch(reader, writer, &patch[patch_offset], raw_patch_size)); + } else if (patch_type == metadata::PatchHeader_PatchType_ZUCCHINI) { + TEST_AND_RETURN_FALSE(ApplyZucchiniPatch( + std::move(src_stream), src_puff_size, patch + patch_offset, + raw_patch_size, std::move(dst_stream))); + } else { + LOG(ERROR) << "Unsupported patch type " << patch_type; + return false; + } return true; } diff --git a/src/stream_unittest.cc b/src/stream_unittest.cc index 5c1f984..ef3503d 100644 --- a/src/stream_unittest.cc +++ b/src/stream_unittest.cc @@ -6,11 +6,11 @@ #include "gtest/gtest.h" +#include "puffin/file_stream.h" #include "puffin/src/extent_stream.h" -#include "puffin/src/file_stream.h" #include "puffin/src/include/puffin/huffer.h" #include "puffin/src/include/puffin/puffer.h" -#include "puffin/src/memory_stream.h" +#include "puffin/memory_stream.h" #include "puffin/src/puffin_stream.h" #include "puffin/src/unittest_common.h" diff --git a/src/utils.cc b/src/utils.cc index b9b06c1..23d8479 100644 --- a/src/utils.cc +++ b/src/utils.cc @@ -12,12 +12,12 @@ #include <string> #include <vector> +#include "puffin/file_stream.h" #include "puffin/src/bit_reader.h" -#include "puffin/src/file_stream.h" #include "puffin/src/include/puffin/common.h" #include "puffin/src/include/puffin/puffer.h" #include "puffin/src/logging.h" -#include "puffin/src/memory_stream.h" +#include "puffin/memory_stream.h" #include "puffin/src/puff_writer.h" using std::set; @@ -185,7 +185,7 @@ bool IsValidGzipHeader(const uint8_t* header, size_t size) { // 0 1 0x1F // 1 1 0x8B // 2 1 compression method (8 denotes deflate) - static const uint8_t magic[] = {0x1F, 0x8B, 8}; + static constexpr uint8_t magic[] = {0x1F, 0x8B, 8}; return size >= 10 && std::equal(std::begin(magic), std::end(magic), header); } } // namespace @@ -240,10 +240,10 @@ bool LocateDeflatesInGzip(const Buffer& data, vector<BitExtent>* deflates) { offset += compressed_size; // Ignore CRC32 and uncompressed size. - TEST_AND_RETURN_FALSE(offset + 8 <= data.size()); offset += 8; member_start = offset; - } while (IsValidGzipHeader(&data[member_start], data.size() - member_start)); + } while (member_start < data.size() && + IsValidGzipHeader(&data[member_start], data.size() - member_start)); return true; } diff --git a/src/utils_unittest.cc b/src/utils_unittest.cc index bbeecbf..7876c53 100644 --- a/src/utils_unittest.cc +++ b/src/utils_unittest.cc @@ -8,10 +8,10 @@ #include "gtest/gtest.h" -#include "puffin/src/file_stream.h" +#include "puffin/file_stream.h" #include "puffin/src/include/puffin/common.h" #include "puffin/src/include/puffin/utils.h" -#include "puffin/src/memory_stream.h" +#include "puffin/memory_stream.h" #include "puffin/src/unittest_common.h" using std::string; |