aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2022-05-11 05:10:07 +0000
committerAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2022-05-11 05:10:07 +0000
commit47de8acc90fd9371a64a05f05384ea6d57c22fae (patch)
tree16a3fc37ea924893d22684c3e834c4522cdc624b
parent36ec8d2e399811a221241a6c7473d3b780e9ef6d (diff)
parent5dc80516a0f4f01939560f770c93e5dd9c8fedb4 (diff)
downloadpuffin-android13-mainline-networking-release.tar.gz
Change-Id: I80f9573affb2328a75be1d04714c33e4b7d3bc6d
-rw-r--r--Android.bp11
-rw-r--r--OWNERS8
-rw-r--r--PRESUBMIT.cfg2
-rw-r--r--README.md258
-rw-r--r--TEST_MAPPING5
-rw-r--r--images/puffin-stream.pngbin0 -> 15099 bytes
-rw-r--r--images/puffin.pngbin0 -> 106009 bytes
-rw-r--r--src/brotli_util.cc112
-rw-r--r--src/brotli_util_unittest.cc32
-rw-r--r--src/file_stream.cc2
-rw-r--r--src/fuzzer_puffpatch.cc2
-rw-r--r--src/include/puffin/brotli_util.h44
-rw-r--r--src/include/puffin/common.h21
-rw-r--r--src/include/puffin/file_stream.h (renamed from src/file_stream.h)0
-rw-r--r--src/include/puffin/memory_stream.h (renamed from src/memory_stream.h)0
-rw-r--r--src/include/puffin/puffdiff.h20
-rw-r--r--src/integration_test.cc88
-rw-r--r--src/logging.h5
-rw-r--r--src/main.cc88
-rw-r--r--src/memory_stream.cc2
-rw-r--r--src/patching_unittest.cc2
-rw-r--r--src/puffdiff.cc102
-rw-r--r--src/puffin.proto7
-rw-r--r--src/puffin_unittest.cc2
-rw-r--r--src/puffpatch.cc112
-rw-r--r--src/stream_unittest.cc4
-rw-r--r--src/utils.cc10
-rw-r--r--src/utils_unittest.cc4
28 files changed, 802 insertions, 141 deletions
diff --git a/Android.bp b/Android.bp
index f49c69b..022be2e 100644
--- a/Android.bp
+++ b/Android.bp
@@ -50,8 +50,10 @@ cc_library_static {
"puffin/src/puffin.proto",
"src/bit_reader.cc",
"src/bit_writer.cc",
+ "src/brotli_util.cc",
"src/huffer.cc",
"src/huffman_table.cc",
+ "src/memory_stream.cc",
"src/puff_reader.cc",
"src/puff_writer.cc",
"src/puffer.cc",
@@ -61,6 +63,9 @@ cc_library_static {
static_libs: [
"libbspatch",
],
+ whole_static_libs: [
+ "libzucchini",
+ ],
proto: {
type: "lite",
export_proto_headers: true,
@@ -72,12 +77,12 @@ cc_library_static {
defaults: ["puffin_defaults"],
srcs: [
"src/file_stream.cc",
- "src/memory_stream.cc",
"src/puffdiff.cc",
"src/utils.cc",
],
static_libs: [
"libbsdiff",
+ "libzucchini",
"libpuffpatch",
],
}
@@ -95,6 +100,7 @@ cc_binary {
static_libs: [
"libbsdiff",
"libbspatch",
+ "libzucchini",
"libdivsufsort",
"libdivsufsort64",
"libpuffdiff",
@@ -109,7 +115,9 @@ cc_test {
cflags: ["-Wno-sign-compare"],
srcs: [
"src/bit_io_unittest.cc",
+ "src/brotli_util_unittest.cc",
"src/extent_stream.cc",
+ "src/integration_test.cc",
"src/patching_unittest.cc",
"src/puff_io_unittest.cc",
"src/puffin_unittest.cc",
@@ -124,6 +132,7 @@ cc_test {
static_libs: [
"libbsdiff",
"libbspatch",
+ "libzucchini",
"libdivsufsort",
"libdivsufsort64",
"libpuffdiff",
diff --git a/OWNERS b/OWNERS
index e27504b..de970a0 100644
--- a/OWNERS
+++ b/OWNERS
@@ -1,3 +1,9 @@
set noparent
-ahassani@google.com
+
+# Android
senj@google.com
+xunchang@google.com
+zhangkelvin@google.com
+
+# Chrome OS
+kimjae@google.com \ No newline at end of file
diff --git a/PRESUBMIT.cfg b/PRESUBMIT.cfg
index 254479a..9993474 100644
--- a/PRESUBMIT.cfg
+++ b/PRESUBMIT.cfg
@@ -1,6 +1,6 @@
[Hook Scripts]
hook0=../../../../chromite/bin/cros lint ${PRESUBMIT_FILES}
-hook1=../../../platform2/common-mk/gyplint.py ${PRESUBMIT_FILES}
+hook1=../../../platform2/common-mk/gnlint.py ${PRESUBMIT_FILES}
[Hook Overrides]
clang_format_check: true
diff --git a/README.md b/README.md
index 88cc22f..0523e89 100644
--- a/README.md
+++ b/README.md
@@ -1,27 +1,231 @@
-# Puffin
-
-Source code for Puffin: A utility for deterministic DEFLATE recompression.
-
-TODO(ahassani): Describe the directory structure and how-tos.
-
-## Glossary
-
-* __Alphabet__ A value that occurs in the input stream. It can be either a
- literal:[0..255], and end of block sign [256], a length[257..285], or a
- distance [0..29].
-
-* __Huffman code__ A variable length code representing the Huffman encoded of an
- alphabet. Huffman codes can be created uniquely using Huffman code length
- array.
-* __Huffman code array__ An array which an array index identifies a Huffman code
- and the array element in that index represents the corresponding
- alphabet. Throughout the code, Huffman code arrays are identified by
- vectors with postfix `hcodes_`.
-* __Huffman reverse code array__ An array which an array index identifies an
- alphabet and the array element in that index contains the Huffman code of
- the alphabet. Throughout the code, The Huffman reverse code arrays are
- identified by vectors with postfix `rcodes_`.
-* __Huffman code length__ The number of bits in a Huffman code.
-* __Huffman code length array__ An array of Huffman code lengths with the array
- index as the alphabet. Throughout the code, Huffman code length arrays
- are identified by vectors with postfix `lens_`.
+# Puffin: A deterministic deflate re-compressor (for patching purposes)
+
+## Table of Contents
+
+[TOC]
+
+## Puffin
+
+Puffin is a deterministic deflate recompressor. It is mainly used for patching
+deflate compressed images (zip, gzip, etc.) because patches created from deflate
+files/streams are usually large (deflate has a bit-aligned format, hence,
+changing one byte in the raw data can cause the entire deflate stream to change
+drastically.)
+
+Puffin has two tools (operations): `puffdiff` and `puffpatch` (shown
+[here](puffin).) The purpose of `puffdiff` operation is to create a patch
+between a source and a target file with one or both of them having some deflate
+streams. This patch is used in the `puffpatch` operation to generate the target
+file from the source file deterministically. The patch itself is created by
+`bsdiff` library (but can be replaced by any other diffing mechanism). But,
+before it uses `bsdiff` to create the patch, we have to transform both the
+source and target files into a specific format so `bsdiff` can produce smaller
+patches. We define `puff` operation to perform such a transformation. The
+resulting stream is called a `puff` stream. The reverse transformation (from
+`puff` stream to deflate stream) is called a `huff` operation. `huff` is used in
+the client to transform the `puff` stream back into its original deflate stream
+deterministically.
+
+![puffin](images/puffin.png "Puffin architecture")
+
+For information about deflate format see [RFC 1951].
+
+## puff and huff
+
+`puff` is an operation that decompresses only the Huffman part of the deflate
+stream and keeps the structure of the LZ77 coding unchanged. This is roughly
+equivalent of decompressing ‘half way’.
+
+`huff` is the exact opposite of `puff` and it deterministically converts the
+`puff` stream back to its original deflate stream. This deterministic conversion
+is based on two facts:
+
+* There is no need to perform LZ77 algorithm. This means the deflate stream
+ could be built by any LZ77 algorithm.
+* The dynamic Huffman tables can be recreated uniquely from the code length
+ array stored inside the `puff` stream. This means the deflate stream can be
+ reconstructed deterministically using the Huffman tables.
+
+The inclusion of Huffman tables in the `puff` stream has minuscule space burden
+(on average maximum 320 bytes for each block. There is roughly one block per
+32KB of uncompressed data).
+
+`bsdiff` of two `puffed` streams has much smaller patch in comparison to their
+deflate streams, but it is larger than uncompressed streams.
+
+**The major benefits**
+
+* Size of the patch is smaller than deflate stream’s patch.
+* `puff` and `huff` are deterministic operations.
+* `huff` is in order of 10X faster than full recompression. It is even faster
+ than Huffman algorithm because it already has the Huffman tables and does
+ not need to reconstruct it.
+* Both algorithms have very low memory footprint.
+* The underlying LZ77 algorithm can be anything (as long as it is deflate
+ compatible). This includes google’s [zopfli]
+
+**The drawbacks**
+
+* The need to define a file format for the puffed stream and stay with this
+ format forever. If format needs to be changed in the future, then some
+ versioning mechanism should be there to handle it and backward compatibility
+ should be maintained.
+* The need to define a payload format and stay with it forever. Similarly
+ needs to be versioned if required later change.
+* Does not reduces the patch size as full recompression.
+
+
+## puffdiff and puffpatch
+
+A deflate compressed file (gzip, zip, etc.) contains multiple deflate streams at
+different locations. When this file is puffed, the resulting `puff` stream
+contains both puffs and the raw data that existed between the deflates in the
+compressed file. When performing `huff` operation, the location of puffs in the
+`puff` stream and deflates in the deflate stream should be passed upon. This is
+necessary as `huff` operation has to know exactly where the locations of both
+puffs and deflates are. (See the following [image](puffin-stream))
+
+![puffin-stream](images/puffin-stream.png "Puffin stream")
+
+Similarly `puffpatch` requires deflates and puffs locations in both the source
+and target streams. These location information is saved using Protobufs in the
+patch generated by `bsdiff`. One requirement for these two operations are that
+`puffpatch` has to be efficient in the client. Client devices have limited
+memory and CPU bandwidth and it is necessary that each of these operations are
+performed with the most efficiency available. In order to achieve this
+efficiency a new operation can be added to `bspatch`, that reads and writes into
+a `puff` streams using special interfaces for puffing and huffing on the fly.
+
+
+## Puffin Patch Format
+
+* Magic (4 bytes) - The string literal "PUF1".
+* Header Length (4 bytes) - The size of the header (length of the generated
+ Protobuf).
+* Header - Lengths and locations of deflate and puffs streams in source and
+ target files in a Protobuf format. See [puffin.proto].
+* Patch - This is a binary array directly generated by the `bsdiff` program.
+
+## Puffin Stream Format
+
+* [Block Header](#block-header-format) (3+ bytes) - Defines the type of the
+ block.
+* Data - A mix of literals list and copy length/distances.
+* End of Block (2 bytes) - The end of block symbol. Similar to the symbols for
+ copy length/distance but without the distance bytes. The actual copy length
+ value is 259 (0x81FF).
+
+### Block Header Format
+
+* Length (2 Bytes) - The size of the block header excluding the two Length
+ bytes itself - 1.
+* Final Block (1 Bit) - Whether the block is the last one or not.
+ * 1 - Final block
+ * 0 - Middle block
+* Type (2 Bits)
+ * 0 - Uncompressed. Immediately after the header, zero or one literals
+ list is present which defines the content of the uncompressed blocks.
+ * 1 - Fixed Huffman table. The list of literals and length/distances comes
+ immediately after the header. Fixed Huffman table is defined the deflate
+ specification and will not change.
+ * 2 - Dynamic Huffman table. The dynamic Huffman table comes at the end of
+ the block header.
+ * 3 - Undefined. This is an error.
+* Skip Bits (5 Bits) - Used only for uncompressed blocks (For other types its
+ value is zero). In an uncompressed block, the [RFC 1951] skips any bits
+ after reading final block and type bits until the byte boundary in the
+ input. However, it does not define what the value of these bits should
+ be. Most implementations assume 0, but some implementations may use any
+ value. This segment contains those bits as a five-bits integer. When writing
+ the block header back to the deflate format, the actual number of bits which
+ where skipped will be calculated easily.
+* Huffman Table - It only comes for dynamic Huffman table.
+
+#### Dynamic Huffman Table Format
+
+Depending on the scheme for storing the Huffman tables, the payload size can
+change. We discovered that the following scheme does not produce the smallest
+payload, but it is the most deterministic one. In a deflate stream, Huffman
+tables for literals/length and distances are themselves Huffman coded. In this
+format, we also `puff` the Huffman tables into the `puff` stream instead of
+completely decompressing them.
+
+There are three tables stored in this structure very similar to the one defined
+in [RFC 1951]. A Huffman table can be defined as an array of unsigned integer
+code length values. Three Puffed Huffman tables appear like the following
+scheme. The first table (codes) is the Huffman table used to decode the next two
+Huffman tables. The second Huffman table is used to decode literals and lengths,
+and the third Huffman table is used to decode distances.
+
+
+* Literal/Length Count (1 byte) - Number of alphabets used for literal/length
+ Huffman codes - 257.
+* Distance Count (1 byte) - Number of alphabets used for distance Huffman
+ codes - 1.
+* Alphabet Count (1 byte) - Number of alphabets for coding two previous
+ Huffman tables - 4.
+* Code Lengths ((Alphabet Count + 1) / 2 bytes) - A list of codes for reading
+ the next two Huffman tables. Each byte contains two codes. If the number of
+ codes is odd, the last four Bits will be zero.
+* Literal/Length/Distance Code Lengths - List of code lengths for encoding
+ literals/lengths/distance followed The encoding is as follows:
+ * [0..15] - Represent code [0..15]
+ * [16..19] - Copy the last code length [3..6] times.
+ * [20..27] - Repeat code length of 0 for [3..10] times.
+ * [28..155] - Repeat code length of 0 for [11..138] times.
+
+### Literals List
+Literals lists are constructed by a “length” value followed by “length” bytes of
+literals. The Puffer should try to merge adjacent literals lists as much as
+possible into one literals list in the `puff` stream. This Is a length value
+followed by length bytes of literals (Even if there is only one literal.)
+
+* Tag (1 Bit) - The value is 0 indicating that this is a list of literals (not
+ a copy length/distance).
+* Length - The number of literals that would follow in the list.
+ * (7 Bits) Length = [1 .. 127], The value is: Length - 1
+ * (7 Bits + 2 Bytes) Length = [128 .. 65663], The values are: 127,
+ Length - 127.
+ Conserves size by using only one byte if the number of upcoming
+ literals is smaller or equal to 127 (About 99% of literals length in a
+ normal deflate stream fit into this category.) We should never have zero
+ length literals. Otherwise it will use three bytes.
+* Literals (Length Bytes) - A sequence of Length number of literals.
+
+### Copy Length/Distance
+
+This Is a Length value followed by a Distance value.
+
+* Tag (1 Bit) - The value is 1 indicating that this is a copy length/distance
+ field.
+* Length - Conserves size by using only one byte if the length value is
+ smaller or equal to 129. Otherwise two bytes are used.
+ * (7 Bits) - Length = [3 .. 129], The value is: Length - 3
+ * (7 Bites + 1 Byte) Length = [130 .. 258], The value is: 127, Length -
+ 130:
+* Distance (2 Bytes) - Distance = [1 .. 32768], The value is:
+ Distance - 1. The distance value as an unsigned integer.
+
+## Building Puffin
+Currently Puffin is being used in both Android and Chrome OS and is built
+differently in each of them. There is also a Makefile build, but it is not
+comprehensive.
+
+## Usage
+
+Puffin builds an executable `puffin` which can be used to perform diffing and
+patching algorithms using Puffin format. To get the list of options available
+run:
+
+```shell
+puffin --help
+```
+
+It can also be used as a library (currently used by update_engine) that provides
+different APIs.
+
+## References
+
+[RFC 1951]: https://www.ietf.org/rfc/rfc1951.txt
+[puffin.proto]: /src/puffin.proto
+[zopfli]: https://en.wikipedia.org/wiki/Zopfli
diff --git a/TEST_MAPPING b/TEST_MAPPING
index ee8f0ca..d78c7bd 100644
--- a/TEST_MAPPING
+++ b/TEST_MAPPING
@@ -3,5 +3,10 @@
{
"name": "puffin_unittest"
}
+ ],
+ "hwasan-postsubmit": [
+ {
+ "name": "puffin_unittest"
+ }
]
}
diff --git a/images/puffin-stream.png b/images/puffin-stream.png
new file mode 100644
index 0000000..a4a654b
--- /dev/null
+++ b/images/puffin-stream.png
Binary files differ
diff --git a/images/puffin.png b/images/puffin.png
new file mode 100644
index 0000000..a0601e6
--- /dev/null
+++ b/images/puffin.png
Binary files differ
diff --git a/src/brotli_util.cc b/src/brotli_util.cc
new file mode 100644
index 0000000..2de5578
--- /dev/null
+++ b/src/brotli_util.cc
@@ -0,0 +1,112 @@
+// Copyright 2021 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "puffin/src/include/puffin/brotli_util.h"
+
+#include "brotli/decode.h"
+#include "brotli/encode.h"
+#include "puffin/src/logging.h"
+#include "puffin/memory_stream.h"
+
+namespace puffin {
+
+namespace {
+
+constexpr auto kBufferSize = 32768;
+constexpr auto kDefaultParamQuality = 9;
+constexpr auto kDefaultParamLgwin = 20;
+} // namespace
+
+bool BrotliEncode(const uint8_t* input,
+ size_t input_size,
+ UniqueStreamPtr output_stream,
+ int quality) {
+ std::unique_ptr<BrotliEncoderState, decltype(&BrotliEncoderDestroyInstance)>
+ encoder(BrotliEncoderCreateInstance(nullptr, nullptr, nullptr),
+ BrotliEncoderDestroyInstance);
+ TEST_AND_RETURN_FALSE(encoder != nullptr);
+
+ BrotliEncoderSetParameter(encoder.get(), BROTLI_PARAM_QUALITY,
+ quality);
+ BrotliEncoderSetParameter(encoder.get(), BROTLI_PARAM_LGWIN,
+ kDefaultParamLgwin);
+
+ size_t available_in = input_size;
+ while (available_in != 0 || !BrotliEncoderIsFinished(encoder.get())) {
+ const uint8_t* next_in = input + input_size - available_in;
+ // Set up the output buffer
+ uint8_t buffer[kBufferSize];
+ uint8_t* next_out = buffer;
+ size_t available_out = kBufferSize;
+
+ BrotliEncoderOperation op =
+ available_in == 0 ? BROTLI_OPERATION_FINISH : BROTLI_OPERATION_PROCESS;
+
+ if (!BrotliEncoderCompressStream(encoder.get(), op, &available_in, &next_in,
+ &available_out, &next_out, nullptr)) {
+ LOG(ERROR) << "Failed to compress " << input_size << " bytes with brotli";
+ return false;
+ }
+
+ size_t bytes_consumed = kBufferSize - available_out;
+ output_stream->Write(buffer, bytes_consumed);
+ }
+
+ return true;
+}
+
+bool BrotliEncode(const uint8_t* input,
+ size_t input_size,
+ UniqueStreamPtr output_stream) {
+ return BrotliEncode(input, input_size, std::move(output_stream),
+ kDefaultParamQuality);
+}
+
+bool BrotliEncode(const uint8_t* input,
+ size_t input_size,
+ std::vector<uint8_t>* output) {
+ TEST_AND_RETURN_FALSE(output != nullptr);
+ return BrotliEncode(input, input_size, MemoryStream::CreateForWrite(output));
+}
+
+bool BrotliDecode(const uint8_t* input,
+ size_t input_size,
+ UniqueStreamPtr output_stream) {
+ std::unique_ptr<BrotliDecoderState, decltype(&BrotliDecoderDestroyInstance)>
+ decoder(BrotliDecoderCreateInstance(nullptr, nullptr, nullptr),
+ BrotliDecoderDestroyInstance);
+ TEST_AND_RETURN_FALSE(decoder != nullptr);
+
+ size_t available_in = input_size;
+ while (available_in != 0 || !BrotliDecoderIsFinished(decoder.get())) {
+ const uint8_t* next_in = input + input_size - available_in;
+ // Set up the output buffer
+ uint8_t buffer[kBufferSize];
+ uint8_t* next_out = buffer;
+ size_t available_out = kBufferSize;
+
+ BrotliDecoderResult result =
+ BrotliDecoderDecompressStream(decoder.get(), &available_in, &next_in,
+ &available_out, &next_out, nullptr);
+ if (result == BROTLI_DECODER_RESULT_ERROR ||
+ result == BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT) {
+ LOG(ERROR) << "Failed to decompress " << input_size
+ << " bytes with brotli, result " << result;
+ return false;
+ }
+
+ size_t bytes_consumed = kBufferSize - available_out;
+ output_stream->Write(buffer, bytes_consumed);
+ }
+ return true;
+}
+
+bool BrotliDecode(const uint8_t* input,
+ size_t input_size,
+ std::vector<uint8_t>* output) {
+ TEST_AND_RETURN_FALSE(output != nullptr);
+ return BrotliDecode(input, input_size, MemoryStream::CreateForWrite(output));
+}
+
+} // namespace puffin
diff --git a/src/brotli_util_unittest.cc b/src/brotli_util_unittest.cc
new file mode 100644
index 0000000..4ce8763
--- /dev/null
+++ b/src/brotli_util_unittest.cc
@@ -0,0 +1,32 @@
+// Copyright 2021 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "gtest/gtest.h"
+
+#include "puffin/src/include/puffin/brotli_util.h"
+#include "puffin/memory_stream.h"
+#include "puffin/src/puffin_stream.h"
+
+namespace puffin {
+
+namespace {
+
+// echo "puffin test" | xxd -i
+const Buffer kTestString = {0x70, 0x75, 0x66, 0x66, 0x69, 0x6e,
+ 0x20, 0x74, 0x65, 0x73, 0x74, 0x0a};
+} // namespace
+
+TEST(BrotliUtilTest, CompressAndDecompressTest) {
+ Buffer compressed;
+ ASSERT_TRUE(BrotliEncode(kTestString.data(), kTestString.size(),
+ MemoryStream::CreateForWrite(&compressed)));
+ ASSERT_FALSE(compressed.empty());
+
+ Buffer decompressed;
+ ASSERT_TRUE(BrotliDecode(compressed.data(), compressed.size(),
+ MemoryStream::CreateForWrite(&decompressed)));
+ ASSERT_EQ(kTestString, decompressed);
+}
+
+} // namespace puffin
diff --git a/src/file_stream.cc b/src/file_stream.cc
index 0abde31..de03723 100644
--- a/src/file_stream.cc
+++ b/src/file_stream.cc
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-#include "puffin/src/file_stream.h"
+#include "puffin/file_stream.h"
#include <fcntl.h>
#include <unistd.h>
diff --git a/src/fuzzer_puffpatch.cc b/src/fuzzer_puffpatch.cc
index 28ee5bc..a27c3b9 100644
--- a/src/fuzzer_puffpatch.cc
+++ b/src/fuzzer_puffpatch.cc
@@ -9,7 +9,7 @@
#include "puffin/src/include/puffin/common.h"
#include "puffin/src/include/puffin/puffpatch.h"
-#include "puffin/src/memory_stream.h"
+#include "puffin/memory_stream.h"
using puffin::BitExtent;
using puffin::Buffer;
diff --git a/src/include/puffin/brotli_util.h b/src/include/puffin/brotli_util.h
new file mode 100644
index 0000000..cee84ba
--- /dev/null
+++ b/src/include/puffin/brotli_util.h
@@ -0,0 +1,44 @@
+// Copyright 2021 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SRC_INCLUDE_PUFFIN_BROTLI_UTIL_H_
+#define SRC_INCLUDE_PUFFIN_BROTLI_UTIL_H_
+
+#include <stdint.h>
+
+#include <vector>
+
+#include "puffin/stream.h"
+
+namespace puffin {
+
+// Use brotli to compress |input_size| bytes of data, and write the result to
+// |output_stream|.
+bool BrotliEncode(const uint8_t* input,
+ size_t input_size,
+ UniqueStreamPtr output_stream);
+// Similar to above function, and writes to a output buffer.
+bool BrotliEncode(const uint8_t* input,
+ size_t input_size,
+ std::vector<uint8_t>* output);
+
+// Similar to the above, but quality controls how well the compression is
+// |quality| should be between 0 and 11
+bool BrotliEncode(const uint8_t* input,
+ size_t input_size,
+ UniqueStreamPtr output_stream,
+ int quality);
+
+// Decompress |input_size| bytes of data with brotli, and write the result to
+// |output_stream|.
+bool BrotliDecode(const uint8_t* input,
+ size_t input_size,
+ UniqueStreamPtr output_stream);
+// Similar to above function, and writes to a output buffer.
+bool BrotliDecode(const uint8_t* input,
+ size_t input_size,
+ std::vector<uint8_t>* output);
+} // namespace puffin
+
+#endif // SRC_INCLUDE_PUFFIN_BROTLI_UTIL_H_
diff --git a/src/include/puffin/common.h b/src/include/puffin/common.h
index 954b7d9..61e9eb2 100644
--- a/src/include/puffin/common.h
+++ b/src/include/puffin/common.h
@@ -31,24 +31,37 @@ using Buffer = std::vector<uint8_t>;
// defined an extra class so the users of puffin do not have to include
// puffin.pb.h and deal with its use.
struct ByteExtent {
- ByteExtent(uint64_t offset, uint64_t length)
+ constexpr ByteExtent(uint64_t offset, uint64_t length)
: offset(offset), length(length) {}
- bool operator==(const ByteExtent& other) const {
+ constexpr bool operator==(const ByteExtent& other) const {
return this->length == other.length && this->offset == other.offset;
}
+ constexpr bool operator<(const ByteExtent& other) const {
+ if (offset != other.offset) {
+ return offset < other.offset;
+ }
+ return length < other.length;
+ }
+
uint64_t offset;
uint64_t length;
};
struct BitExtent {
- BitExtent(uint64_t offset, uint64_t length)
+ constexpr BitExtent(uint64_t offset, uint64_t length)
: offset(offset), length(length) {}
- bool operator==(const BitExtent& other) const {
+ constexpr bool operator==(const BitExtent& other) const {
return this->length == other.length && this->offset == other.offset;
}
+ constexpr bool operator<(const BitExtent& other) const {
+ if (offset != other.offset) {
+ return offset < other.offset;
+ }
+ return length < other.length;
+ }
uint64_t offset;
uint64_t length;
diff --git a/src/file_stream.h b/src/include/puffin/file_stream.h
index 626d7c6..626d7c6 100644
--- a/src/file_stream.h
+++ b/src/include/puffin/file_stream.h
diff --git a/src/memory_stream.h b/src/include/puffin/memory_stream.h
index a338e7f..a338e7f 100644
--- a/src/memory_stream.h
+++ b/src/include/puffin/memory_stream.h
diff --git a/src/include/puffin/puffdiff.h b/src/include/puffin/puffdiff.h
index c8883bf..ad8a106 100644
--- a/src/include/puffin/puffdiff.h
+++ b/src/include/puffin/puffdiff.h
@@ -15,14 +15,22 @@
namespace puffin {
+enum class PatchAlgorithm {
+ kBsdiff = 0,
+ kZucchini = 1,
+};
+
// Performs a diff operation between input deflate streams and creates a patch
// that is used in the client to recreate the |dst| from |src|.
// |src| IN Source deflate stream.
// |dst| IN Destination deflate stream.
// |src_deflates| IN Deflate locations in |src|.
// |dst_deflates| IN Deflate locations in |dst|.
-// |compressors| IN Compressors to use in the underlying bsdiff, e.g. bz2,
+// |compressors| IN Compressors to use in the underlying bsdiff, e.g.
+// bz2,
// brotli.
+// |patchAlgorithm| IN The patchAlgorithm used to create patches between
+// uncompressed bytes, e.g. bsdiff, zucchini.
// |tmp_filepath| IN A path to a temporary file. The caller has the
// responsibility of unlinking the file after the call to
// |PuffDiff| finishes.
@@ -32,6 +40,16 @@ bool PuffDiff(UniqueStreamPtr src,
const std::vector<BitExtent>& src_deflates,
const std::vector<BitExtent>& dst_deflates,
const std::vector<bsdiff::CompressorType>& compressors,
+ PatchAlgorithm patchAlgorithm,
+ const std::string& tmp_filepath,
+ Buffer* patch);
+
+// This function uses bsdiff as the patch algorithm.
+bool PuffDiff(UniqueStreamPtr src,
+ UniqueStreamPtr dst,
+ const std::vector<BitExtent>& src_deflates,
+ const std::vector<BitExtent>& dst_deflates,
+ const std::vector<bsdiff::CompressorType>& compressors,
const std::string& tmp_filepath,
Buffer* patch);
diff --git a/src/integration_test.cc b/src/integration_test.cc
new file mode 100644
index 0000000..9e03768
--- /dev/null
+++ b/src/integration_test.cc
@@ -0,0 +1,88 @@
+// Copyright 2021 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+
+#include "puffin/src/include/puffin/puffdiff.h"
+#include "puffin/src/include/puffin/puffpatch.h"
+#include "puffin/src/include/puffin/utils.h"
+#include "puffin/memory_stream.h"
+#include "puffin/src/puffin_stream.h"
+#include "puffin/src/unittest_common.h"
+
+namespace puffin {
+
+namespace {
+// xxd -i <name>.zip
+const Buffer kTestZipA = {
+ 0x50, 0x4b, 0x03, 0x04, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x79,
+ 0x0d, 0x53, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x01, 0x00, 0x1c, 0x00, 0x31, 0x55, 0x54, 0x09, 0x00, 0x03,
+ 0x5c, 0xed, 0x16, 0x61, 0x5c, 0xed, 0x16, 0x61, 0x75, 0x78, 0x0b, 0x00,
+ 0x01, 0x04, 0x8f, 0x66, 0x05, 0x00, 0x04, 0x53, 0x5f, 0x01, 0x00, 0x50,
+ 0x4b, 0x01, 0x02, 0x1e, 0x03, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e,
+ 0x79, 0x0d, 0x53, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x01, 0x00, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0xa4, 0x81, 0x00, 0x00, 0x00, 0x00, 0x31, 0x55, 0x54,
+ 0x05, 0x00, 0x03, 0x5c, 0xed, 0x16, 0x61, 0x75, 0x78, 0x0b, 0x00, 0x01,
+ 0x04, 0x8f, 0x66, 0x05, 0x00, 0x04, 0x53, 0x5f, 0x01, 0x00, 0x50, 0x4b,
+ 0x05, 0x06, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x47, 0x00,
+ 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x00, 0x00};
+
+const Buffer kTestZipB = {
+ 0x50, 0x4b, 0x03, 0x04, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x26, 0x79,
+ 0x0d, 0x53, 0x4e, 0x81, 0x88, 0x47, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00,
+ 0x00, 0x00, 0x01, 0x00, 0x1c, 0x00, 0x32, 0x55, 0x54, 0x09, 0x00, 0x03,
+ 0x88, 0xed, 0x16, 0x61, 0x88, 0xed, 0x16, 0x61, 0x75, 0x78, 0x0b, 0x00,
+ 0x01, 0x04, 0x8f, 0x66, 0x05, 0x00, 0x04, 0x53, 0x5f, 0x01, 0x00, 0x61,
+ 0x62, 0x63, 0x0a, 0x50, 0x4b, 0x01, 0x02, 0x1e, 0x03, 0x0a, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x26, 0x79, 0x0d, 0x53, 0x4e, 0x81, 0x88, 0x47, 0x04,
+ 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x18, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xa4, 0x81, 0x00, 0x00, 0x00,
+ 0x00, 0x32, 0x55, 0x54, 0x05, 0x00, 0x03, 0x88, 0xed, 0x16, 0x61, 0x75,
+ 0x78, 0x0b, 0x00, 0x01, 0x04, 0x8f, 0x66, 0x05, 0x00, 0x04, 0x53, 0x5f,
+ 0x01, 0x00, 0x50, 0x4b, 0x05, 0x06, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00,
+ 0x01, 0x00, 0x47, 0x00, 0x00, 0x00, 0x3f, 0x00, 0x00, 0x00, 0x00, 0x00};
+
+} // namespace
+
+class PuffinIntegrationTest : public testing::TestWithParam<PatchAlgorithm> {
+ protected:
+ PatchAlgorithm getPatchType() { return GetParam(); }
+};
+
+TEST_P(PuffinIntegrationTest, PuffinDiffPatchTest) {
+ std::vector<BitExtent> src_deflates;
+ ASSERT_TRUE(LocateDeflatesInZipArchive(kTestZipA, &src_deflates));
+
+ std::vector<BitExtent> dst_deflates;
+ ASSERT_TRUE(LocateDeflatesInZipArchive(kTestZipB, &dst_deflates));
+
+ std::string tmp_file;
+ ASSERT_TRUE(MakeTempFile(&tmp_file, nullptr));
+ Buffer patch;
+ ASSERT_TRUE(PuffDiff(MemoryStream::CreateForRead(kTestZipA),
+ MemoryStream::CreateForRead(kTestZipB), src_deflates,
+ dst_deflates, {bsdiff::CompressorType::kBrotli},
+ getPatchType(), tmp_file, &patch));
+
+ Buffer patched;
+ auto src_stream = MemoryStream::CreateForRead(kTestZipA);
+ auto dst_stream = MemoryStream::CreateForWrite(&patched);
+ ASSERT_TRUE(PuffPatch(MemoryStream::CreateForRead(kTestZipA),
+ MemoryStream::CreateForWrite(&patched), patch.data(),
+ patch.size()));
+
+ ASSERT_EQ(kTestZipB, patched);
+}
+
+INSTANTIATE_TEST_CASE_P(TestWithPatchType,
+ PuffinIntegrationTest,
+ testing::Values(PatchAlgorithm::kBsdiff,
+ PatchAlgorithm::kZucchini));
+
+} // namespace puffin \ No newline at end of file
diff --git a/src/logging.h b/src/logging.h
index b48fcd5..8b80153 100644
--- a/src/logging.h
+++ b/src/logging.h
@@ -5,7 +5,10 @@
#ifndef SRC_LOGGING_H_
#define SRC_LOGGING_H_
-#ifdef USE_BRILLO
+#if defined(BASE_VER) && BASE_VER >= 822064
+#include "base/check.h" // CHECK-related macros are defined in base/check.h on Chrome OS.
+#include "base/logging.h"
+#elif USE_BRILLO
#include "base/logging.h"
#else
#include "glog/logging.h"
diff --git a/src/main.cc b/src/main.cc
index 20f0948..0c1bdf8 100644
--- a/src/main.cc
+++ b/src/main.cc
@@ -13,8 +13,8 @@
#include "gflags/gflags.h"
#endif
+#include "puffin/file_stream.h"
#include "puffin/src/extent_stream.h"
-#include "puffin/src/file_stream.h"
#include "puffin/src/include/puffin/common.h"
#include "puffin/src/include/puffin/huffer.h"
#include "puffin/src/include/puffin/puffdiff.h"
@@ -22,7 +22,7 @@
#include "puffin/src/include/puffin/puffpatch.h"
#include "puffin/src/include/puffin/utils.h"
#include "puffin/src/logging.h"
-#include "puffin/src/memory_stream.h"
+#include "puffin/memory_stream.h"
#include "puffin/src/puffin_stream.h"
using puffin::BitExtent;
@@ -154,44 +154,46 @@ bool LocateDeflatesBasedOnFileType(const UniqueStreamPtr& stream,
} // namespace
-#define SETUP_FLAGS \
- DEFINE_string(src_file, "", "Source file"); \
- DEFINE_string(dst_file, "", "Target file"); \
- DEFINE_string(patch_file, "", "patch file"); \
- DEFINE_string( \
- src_deflates_byte, "", \
- "Source deflate byte locations in the format offset:length,..."); \
- DEFINE_string( \
- dst_deflates_byte, "", \
- "Target deflate byte locations in the format offset:length,..."); \
- DEFINE_string( \
- src_deflates_bit, "", \
- "Source deflate bit locations in the format offset:length,..."); \
- DEFINE_string( \
- dst_deflates_bit, "", \
- "Target deflatebit locations in the format offset:length,..."); \
- DEFINE_string(src_puffs, "", \
- "Source puff locations in the format offset:length,..."); \
- DEFINE_string(dst_puffs, "", \
- "Target puff locations in the format offset:length,..."); \
- DEFINE_string(src_extents, "", \
- "Source extents in the format of offset:length,..."); \
- DEFINE_string(dst_extents, "", \
- "Target extents in the format of offset:length,..."); \
- DEFINE_string(operation, "", \
- "Type of the operation: puff, huff, puffdiff, puffpatch, " \
- "puffhuff"); \
- DEFINE_string(src_file_type, "", \
- "Type of the input source file: deflate, gzip, " \
- "zlib or zip"); \
- DEFINE_string(dst_file_type, "", \
- "Same as src_file_type but for the target file"); \
- DEFINE_bool(verbose, false, \
- "Logs all the given parameters including internally " \
- "generated ones"); \
- DEFINE_uint64(cache_size, kDefaultPuffCacheSize, \
- "Maximum size to cache the puff stream. Used in puffpatch");
-
+#define SETUP_FLAGS \
+ DEFINE_string(src_file, "", "Source file"); \
+ DEFINE_string(dst_file, "", "Target file"); \
+ DEFINE_string(patch_file, "", "patch file"); \
+ DEFINE_string( \
+ src_deflates_byte, "", \
+ "Source deflate byte locations in the format offset:length,..."); \
+ DEFINE_string( \
+ dst_deflates_byte, "", \
+ "Target deflate byte locations in the format offset:length,..."); \
+ DEFINE_string( \
+ src_deflates_bit, "", \
+ "Source deflate bit locations in the format offset:length,..."); \
+ DEFINE_string( \
+ dst_deflates_bit, "", \
+ "Target deflatebit locations in the format offset:length,..."); \
+ DEFINE_string(src_puffs, "", \
+ "Source puff locations in the format offset:length,..."); \
+ DEFINE_string(dst_puffs, "", \
+ "Target puff locations in the format offset:length,..."); \
+ DEFINE_string(src_extents, "", \
+ "Source extents in the format of offset:length,..."); \
+ DEFINE_string(dst_extents, "", \
+ "Target extents in the format of offset:length,..."); \
+ DEFINE_string(operation, "", \
+ "Type of the operation: puff, huff, puffdiff, puffpatch, " \
+ "puffhuff"); \
+ DEFINE_string(src_file_type, "", \
+ "Type of the input source file: deflate, gzip, " \
+ "zlib or zip"); \
+ DEFINE_string(dst_file_type, "", \
+ "Same as src_file_type but for the target file"); \
+ DEFINE_bool(verbose, false, \
+ "Logs all the given parameters including internally " \
+ "generated ones"); \
+ DEFINE_uint64(cache_size, kDefaultPuffCacheSize, \
+ "Maximum size to cache the puff stream. Used in puffpatch"); \
+ DEFINE_int32(patch_algorithm, 0, \
+ "Type of raw diff algorithm to use. The current supported " \
+ "ones are 0: bsdiff, 1: zucchini.");
#ifndef USE_BRILLO
SETUP_FLAGS;
#endif
@@ -343,12 +345,18 @@ bool Main(int argc, char** argv) {
&dst_deflates_bit));
}
+ if (FLAGS_patch_algorithm != 0 && FLAGS_patch_algorithm != 1) {
+ LOG(ERROR)
+ << "The supported patch algorithms are 0: bsdiff, 1: zucchini.";
+ return false;
+ }
// TODO(xunchang) add flags to select the bsdiff compressors.
Buffer puffdiff_delta;
TEST_AND_RETURN_FALSE(puffin::PuffDiff(
std::move(src_stream), std::move(dst_stream), src_deflates_bit,
dst_deflates_bit,
{bsdiff::CompressorType::kBZ2, bsdiff::CompressorType::kBrotli},
+ static_cast<puffin::PatchAlgorithm>(FLAGS_patch_algorithm),
"/tmp/patch.tmp", &puffdiff_delta));
if (FLAGS_verbose) {
LOG(INFO) << "patch_size: " << puffdiff_delta.size();
diff --git a/src/memory_stream.cc b/src/memory_stream.cc
index 706f4a3..8a2afdd 100644
--- a/src/memory_stream.cc
+++ b/src/memory_stream.cc
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-#include "puffin/src/memory_stream.h"
+#include "puffin/memory_stream.h"
#include <fcntl.h>
#include <unistd.h>
diff --git a/src/patching_unittest.cc b/src/patching_unittest.cc
index 134f137..fec2ecc 100644
--- a/src/patching_unittest.cc
+++ b/src/patching_unittest.cc
@@ -12,7 +12,7 @@
#include "puffin/src/include/puffin/puffpatch.h"
#include "puffin/src/include/puffin/utils.h"
#include "puffin/src/logging.h"
-#include "puffin/src/memory_stream.h"
+#include "puffin/memory_stream.h"
#include "puffin/src/puffin_stream.h"
#include "puffin/src/unittest_common.h"
diff --git a/src/puffdiff.cc b/src/puffdiff.cc
index 15d0fa6..2588379 100644
--- a/src/puffdiff.cc
+++ b/src/puffdiff.cc
@@ -13,14 +13,18 @@
#include "bsdiff/bsdiff.h"
#include "bsdiff/patch_writer_factory.h"
+#include "zucchini/buffer_view.h"
+#include "zucchini/patch_writer.h"
+#include "zucchini/zucchini.h"
-#include "puffin/src/file_stream.h"
+#include "puffin/file_stream.h"
+#include "puffin/src/include/puffin/brotli_util.h"
#include "puffin/src/include/puffin/common.h"
#include "puffin/src/include/puffin/puffer.h"
#include "puffin/src/include/puffin/puffpatch.h"
#include "puffin/src/include/puffin/utils.h"
#include "puffin/src/logging.h"
-#include "puffin/src/memory_stream.h"
+#include "puffin/memory_stream.h"
#include "puffin/src/puffin.pb.h"
#include "puffin/src/puffin_stream.h"
@@ -47,15 +51,16 @@ void CopyVectorToRpf(
// Structure of a puffin patch
// +-------+------------------+-------------+--------------+
-// |P|U|F|1| PatchHeader Size | PatchHeader | bsdiff_patch |
+// |P|U|F|1| PatchHeader Size | PatchHeader | raw patch |
// +-------+------------------+-------------+--------------+
-bool CreatePatch(const Buffer& bsdiff_patch,
+bool CreatePatch(const Buffer& raw_patch,
const vector<BitExtent>& src_deflates,
const vector<BitExtent>& dst_deflates,
const vector<ByteExtent>& src_puffs,
const vector<ByteExtent>& dst_puffs,
uint64_t src_puff_size,
uint64_t dst_puff_size,
+ PatchAlgorithm patchAlgorithm,
Buffer* patch) {
metadata::PatchHeader header;
header.set_version(1);
@@ -67,6 +72,7 @@ bool CreatePatch(const Buffer& bsdiff_patch,
header.mutable_src()->set_puff_length(src_puff_size);
header.mutable_dst()->set_puff_length(dst_puff_size);
+ header.set_type(static_cast<metadata::PatchHeader_PatchType>(patchAlgorithm));
const size_t header_size_long = header.ByteSizeLong();
TEST_AND_RETURN_FALSE(header_size_long <= UINT32_MAX);
@@ -74,7 +80,7 @@ bool CreatePatch(const Buffer& bsdiff_patch,
uint64_t offset = 0;
patch->resize(kMagicLength + sizeof(header_size) + header_size +
- bsdiff_patch.size());
+ raw_patch.size());
memcpy(patch->data() + offset, kMagic, kMagicLength);
offset += kMagicLength;
@@ -88,9 +94,9 @@ bool CreatePatch(const Buffer& bsdiff_patch,
header.SerializeToArray(patch->data() + offset, header_size));
offset += header_size;
- memcpy(patch->data() + offset, bsdiff_patch.data(), bsdiff_patch.size());
+ memcpy(patch->data() + offset, raw_patch.data(), raw_patch.size());
- if (bsdiff_patch.size() > patch->size()) {
+ if (raw_patch.size() > patch->size()) {
LOG(ERROR) << "Puffin patch is invalid";
}
return true;
@@ -103,6 +109,7 @@ bool PuffDiff(UniqueStreamPtr src,
const vector<BitExtent>& src_deflates,
const vector<BitExtent>& dst_deflates,
const vector<bsdiff::CompressorType>& compressors,
+ PatchAlgorithm patchAlgorithm,
const string& tmp_filepath,
Buffer* patch) {
auto puffer = std::make_shared<Puffer>();
@@ -130,29 +137,70 @@ bool PuffDiff(UniqueStreamPtr src,
TEST_AND_RETURN_FALSE(puff_deflate_stream(std::move(dst), dst_deflates,
&dst_puff_buffer, &dst_puffs));
- auto bsdiff_patch_writer = bsdiff::CreateBSDF2PatchWriter(
- tmp_filepath, compressors, kBrotliCompressionQuality);
-
- TEST_AND_RETURN_FALSE(
- 0 == bsdiff::bsdiff(src_puff_buffer.data(), src_puff_buffer.size(),
- dst_puff_buffer.data(), dst_puff_buffer.size(),
- bsdiff_patch_writer.get(), nullptr));
-
- auto bsdiff_patch = FileStream::Open(tmp_filepath, true, false);
- TEST_AND_RETURN_FALSE(bsdiff_patch);
- uint64_t patch_size;
- TEST_AND_RETURN_FALSE(bsdiff_patch->GetSize(&patch_size));
- Buffer bsdiff_patch_buf(patch_size);
- TEST_AND_RETURN_FALSE(
- bsdiff_patch->Read(bsdiff_patch_buf.data(), bsdiff_patch_buf.size()));
- TEST_AND_RETURN_FALSE(bsdiff_patch->Close());
+ if (patchAlgorithm == PatchAlgorithm::kBsdiff) {
+ auto bsdiff_patch_writer = bsdiff::CreateBSDF2PatchWriter(
+ tmp_filepath, compressors, kBrotliCompressionQuality);
+
+ TEST_AND_RETURN_FALSE(
+ 0 == bsdiff::bsdiff(src_puff_buffer.data(), src_puff_buffer.size(),
+ dst_puff_buffer.data(), dst_puff_buffer.size(),
+ bsdiff_patch_writer.get(), nullptr));
+
+ auto bsdiff_patch = FileStream::Open(tmp_filepath, true, false);
+ TEST_AND_RETURN_FALSE(bsdiff_patch);
+ uint64_t patch_size;
+ TEST_AND_RETURN_FALSE(bsdiff_patch->GetSize(&patch_size));
+ Buffer bsdiff_patch_buf(patch_size);
+ TEST_AND_RETURN_FALSE(
+ bsdiff_patch->Read(bsdiff_patch_buf.data(), bsdiff_patch_buf.size()));
+ TEST_AND_RETURN_FALSE(bsdiff_patch->Close());
+
+ TEST_AND_RETURN_FALSE(CreatePatch(
+ bsdiff_patch_buf, src_deflates, dst_deflates, src_puffs, dst_puffs,
+ src_puff_buffer.size(), dst_puff_buffer.size(), patchAlgorithm, patch));
+ } else if (patchAlgorithm == PatchAlgorithm::kZucchini) {
+ zucchini::ConstBufferView src_bytes(src_puff_buffer.data(),
+ src_puff_buffer.size());
+ zucchini::ConstBufferView dst_bytes(dst_puff_buffer.data(),
+ dst_puff_buffer.size());
+
+ zucchini::EnsemblePatchWriter patch_writer(src_bytes, dst_bytes);
+ auto status = zucchini::GenerateBuffer(src_bytes, dst_bytes, &patch_writer);
+ TEST_AND_RETURN_FALSE(status == zucchini::status::kStatusSuccess);
+
+ Buffer zucchini_patch_buf(patch_writer.SerializedSize());
+ patch_writer.SerializeInto(
+ {zucchini_patch_buf.data(), zucchini_patch_buf.size()});
+
+ // Use brotli to compress the zucchini patch.
+ // TODO(197361113) respect the CompressorType parameter for zucchini.
+ Buffer compressed_patch;
+ TEST_AND_RETURN_FALSE(BrotliEncode(zucchini_patch_buf.data(),
+ zucchini_patch_buf.size(),
+ &compressed_patch));
+
+ TEST_AND_RETURN_FALSE(CreatePatch(
+ compressed_patch, src_deflates, dst_deflates, src_puffs, dst_puffs,
+ src_puff_buffer.size(), dst_puff_buffer.size(), patchAlgorithm, patch));
+ } else {
+ LOG(ERROR) << "unsupported type " << static_cast<int>(patchAlgorithm);
+ return false;
+ }
- TEST_AND_RETURN_FALSE(CreatePatch(
- bsdiff_patch_buf, src_deflates, dst_deflates, src_puffs, dst_puffs,
- src_puff_buffer.size(), dst_puff_buffer.size(), patch));
return true;
}
+bool PuffDiff(UniqueStreamPtr src,
+ UniqueStreamPtr dst,
+ const std::vector<BitExtent>& src_deflates,
+ const std::vector<BitExtent>& dst_deflates,
+ const std::vector<bsdiff::CompressorType>& compressors,
+ const std::string& tmp_filepath,
+ Buffer* patch) {
+ return PuffDiff(std::move(src), std::move(dst), src_deflates, dst_deflates,
+ compressors, PatchAlgorithm::kBsdiff, tmp_filepath, patch);
+}
+
bool PuffDiff(const Buffer& src,
const Buffer& dst,
const vector<BitExtent>& src_deflates,
@@ -162,7 +210,7 @@ bool PuffDiff(const Buffer& src,
Buffer* patch) {
return PuffDiff(MemoryStream::CreateForRead(src),
MemoryStream::CreateForRead(dst), src_deflates, dst_deflates,
- compressors, tmp_filepath, patch);
+ compressors, PatchAlgorithm::kBsdiff, tmp_filepath, patch);
}
bool PuffDiff(const Buffer& src,
diff --git a/src/puffin.proto b/src/puffin.proto
index 7651bc0..7ed5bc7 100644
--- a/src/puffin.proto
+++ b/src/puffin.proto
@@ -19,8 +19,15 @@ message StreamInfo {
}
message PatchHeader {
+ enum PatchType {
+ BSDIFF = 0;
+ ZUCCHINI = 1;
+ }
+
int32 version = 1;
StreamInfo src = 2;
StreamInfo dst = 3;
// The bsdiff patch is installed right after this protobuf.
+
+ PatchType type = 4;
} \ No newline at end of file
diff --git a/src/puffin_unittest.cc b/src/puffin_unittest.cc
index 0279813..3ac0173 100644
--- a/src/puffin_unittest.cc
+++ b/src/puffin_unittest.cc
@@ -15,7 +15,7 @@
#include "puffin/src/include/puffin/puffer.h"
#include "puffin/src/include/puffin/utils.h"
#include "puffin/src/logging.h"
-#include "puffin/src/memory_stream.h"
+#include "puffin/memory_stream.h"
#include "puffin/src/puff_reader.h"
#include "puffin/src/puff_writer.h"
#include "puffin/src/puffin_stream.h"
diff --git a/src/puffpatch.cc b/src/puffpatch.cc
index 0b4ffcb..e9e4a32 100644
--- a/src/puffpatch.cc
+++ b/src/puffpatch.cc
@@ -14,12 +14,16 @@
#include "bsdiff/bspatch.h"
#include "bsdiff/file_interface.h"
+#include "zucchini/patch_reader.h"
+#include "zucchini/zucchini.h"
+#include "puffin/src/include/puffin/brotli_util.h"
#include "puffin/src/include/puffin/common.h"
#include "puffin/src/include/puffin/huffer.h"
#include "puffin/src/include/puffin/puffer.h"
#include "puffin/src/include/puffin/stream.h"
#include "puffin/src/logging.h"
+#include "puffin/memory_stream.h"
#include "puffin/src/puffin.pb.h"
#include "puffin/src/puffin_stream.h"
@@ -92,8 +96,6 @@ class BsdiffStream : public bsdiff::FileInterface {
DISALLOW_COPY_AND_ASSIGN(BsdiffStream);
};
-} // namespace
-
bool DecodePatch(const uint8_t* patch,
size_t patch_length,
size_t* bsdiff_patch_offset,
@@ -103,7 +105,8 @@ bool DecodePatch(const uint8_t* patch,
vector<ByteExtent>* src_puffs,
vector<ByteExtent>* dst_puffs,
uint64_t* src_puff_size,
- uint64_t* dst_puff_size) {
+ uint64_t* dst_puff_size,
+ metadata::PatchHeader_PatchType* patch_type) {
size_t offset = 0;
uint32_t header_size;
TEST_AND_RETURN_FALSE(patch_length >= (kMagicLength + sizeof(header_size)));
@@ -135,43 +138,104 @@ bool DecodePatch(const uint8_t* patch,
*bsdiff_patch_offset = offset;
*bsdiff_patch_size = patch_length - offset;
+
+ *patch_type = header.type();
+ return true;
+}
+
+bool ApplyZucchiniPatch(UniqueStreamPtr src_stream,
+ size_t src_size,
+ const uint8_t* patch_start,
+ size_t patch_size,
+ UniqueStreamPtr dst_stream) {
+ // Read the source data
+ Buffer puffed_src(src_size);
+ Buffer buffer(1024 * 1024);
+ uint64_t bytes_wrote = 0;
+ while (bytes_wrote < src_size) {
+ auto write_size =
+ std::min(static_cast<uint64_t>(buffer.size()), src_size - bytes_wrote);
+ TEST_AND_RETURN_FALSE(src_stream->Read(buffer.data(), write_size));
+ std::copy(buffer.data(), buffer.data() + write_size,
+ puffed_src.data() + bytes_wrote);
+ bytes_wrote += write_size;
+ }
+ // Read the patch
+ Buffer zucchini_patch;
+ TEST_AND_RETURN_FALSE(BrotliDecode(patch_start, patch_size, &zucchini_patch));
+ auto patch_reader = zucchini::EnsemblePatchReader::Create(
+ {zucchini_patch.data(), zucchini_patch.size()});
+ if (!patch_reader.has_value()) {
+ LOG(ERROR) << "Failed to parse the zucchini patch.";
+ return false;
+ }
+
+ // TODO(197361113) Stream the patched result once zucchini supports it. So we
+ // can save some memory when applying patch on device.
+ Buffer patched_data(patch_reader->header().new_size);
+ auto status = zucchini::ApplyBuffer(
+ {puffed_src.data(), puffed_src.size()}, *patch_reader,
+ {patched_data.data(), patched_data.size()});
+ if (status != zucchini::status::kStatusSuccess) {
+ LOG(ERROR) << "Failed to parse the zucchini patch: " << status;
+ return false;
+ }
+
+ TEST_AND_RETURN_FALSE(
+ dst_stream->Write(patched_data.data(), patched_data.size()));
return true;
}
+} // namespace
+
bool PuffPatch(UniqueStreamPtr src,
UniqueStreamPtr dst,
const uint8_t* patch,
size_t patch_length,
size_t max_cache_size) {
- size_t bsdiff_patch_offset; // bsdiff offset in |patch|.
- size_t bsdiff_patch_size = 0;
+ size_t patch_offset; // raw patch offset in puffin |patch|.
+ size_t raw_patch_size = 0;
vector<BitExtent> src_deflates, dst_deflates;
vector<ByteExtent> src_puffs, dst_puffs;
uint64_t src_puff_size, dst_puff_size;
- // Decode the patch and get the bsdiff_patch.
- TEST_AND_RETURN_FALSE(DecodePatch(patch, patch_length, &bsdiff_patch_offset,
- &bsdiff_patch_size, &src_deflates,
- &dst_deflates, &src_puffs, &dst_puffs,
- &src_puff_size, &dst_puff_size));
+ metadata::PatchHeader_PatchType patch_type;
+
+ // Decode the patch and get the raw patch (e.g. bsdiff, zucchini).
+ TEST_AND_RETURN_FALSE(
+ DecodePatch(patch, patch_length, &patch_offset, &raw_patch_size,
+ &src_deflates, &dst_deflates, &src_puffs, &dst_puffs,
+ &src_puff_size, &dst_puff_size, &patch_type));
auto puffer = std::make_shared<Puffer>();
auto huffer = std::make_shared<Huffer>();
- // For reading from source.
- auto reader = BsdiffStream::Create(
+ auto src_stream =
PuffinStream::CreateForPuff(std::move(src), puffer, src_puff_size,
- src_deflates, src_puffs, max_cache_size));
- TEST_AND_RETURN_FALSE(reader);
-
- // For writing into destination.
- auto writer = BsdiffStream::Create(PuffinStream::CreateForHuff(
- std::move(dst), huffer, dst_puff_size, dst_deflates, dst_puffs));
- TEST_AND_RETURN_FALSE(writer);
-
- // Running bspatch itself.
- TEST_AND_RETURN_FALSE(0 == bspatch(reader, writer,
- &patch[bsdiff_patch_offset],
- bsdiff_patch_size));
+ src_deflates, src_puffs, max_cache_size);
+ TEST_AND_RETURN_FALSE(src_stream);
+ auto dst_stream = PuffinStream::CreateForHuff(
+ std::move(dst), huffer, dst_puff_size, dst_deflates, dst_puffs);
+ TEST_AND_RETURN_FALSE(dst_stream);
+
+ if (patch_type == metadata::PatchHeader_PatchType_BSDIFF) {
+ // For reading from source.
+ auto reader = BsdiffStream::Create(std::move(src_stream));
+ TEST_AND_RETURN_FALSE(reader);
+ // For writing into destination.
+ auto writer = BsdiffStream::Create(std::move(dst_stream));
+ TEST_AND_RETURN_FALSE(writer);
+
+ // Running bspatch itself.
+ TEST_AND_RETURN_FALSE(
+ 0 == bspatch(reader, writer, &patch[patch_offset], raw_patch_size));
+ } else if (patch_type == metadata::PatchHeader_PatchType_ZUCCHINI) {
+ TEST_AND_RETURN_FALSE(ApplyZucchiniPatch(
+ std::move(src_stream), src_puff_size, patch + patch_offset,
+ raw_patch_size, std::move(dst_stream)));
+ } else {
+ LOG(ERROR) << "Unsupported patch type " << patch_type;
+ return false;
+ }
return true;
}
diff --git a/src/stream_unittest.cc b/src/stream_unittest.cc
index 5c1f984..ef3503d 100644
--- a/src/stream_unittest.cc
+++ b/src/stream_unittest.cc
@@ -6,11 +6,11 @@
#include "gtest/gtest.h"
+#include "puffin/file_stream.h"
#include "puffin/src/extent_stream.h"
-#include "puffin/src/file_stream.h"
#include "puffin/src/include/puffin/huffer.h"
#include "puffin/src/include/puffin/puffer.h"
-#include "puffin/src/memory_stream.h"
+#include "puffin/memory_stream.h"
#include "puffin/src/puffin_stream.h"
#include "puffin/src/unittest_common.h"
diff --git a/src/utils.cc b/src/utils.cc
index b9b06c1..23d8479 100644
--- a/src/utils.cc
+++ b/src/utils.cc
@@ -12,12 +12,12 @@
#include <string>
#include <vector>
+#include "puffin/file_stream.h"
#include "puffin/src/bit_reader.h"
-#include "puffin/src/file_stream.h"
#include "puffin/src/include/puffin/common.h"
#include "puffin/src/include/puffin/puffer.h"
#include "puffin/src/logging.h"
-#include "puffin/src/memory_stream.h"
+#include "puffin/memory_stream.h"
#include "puffin/src/puff_writer.h"
using std::set;
@@ -185,7 +185,7 @@ bool IsValidGzipHeader(const uint8_t* header, size_t size) {
// 0 1 0x1F
// 1 1 0x8B
// 2 1 compression method (8 denotes deflate)
- static const uint8_t magic[] = {0x1F, 0x8B, 8};
+ static constexpr uint8_t magic[] = {0x1F, 0x8B, 8};
return size >= 10 && std::equal(std::begin(magic), std::end(magic), header);
}
} // namespace
@@ -240,10 +240,10 @@ bool LocateDeflatesInGzip(const Buffer& data, vector<BitExtent>* deflates) {
offset += compressed_size;
// Ignore CRC32 and uncompressed size.
- TEST_AND_RETURN_FALSE(offset + 8 <= data.size());
offset += 8;
member_start = offset;
- } while (IsValidGzipHeader(&data[member_start], data.size() - member_start));
+ } while (member_start < data.size() &&
+ IsValidGzipHeader(&data[member_start], data.size() - member_start));
return true;
}
diff --git a/src/utils_unittest.cc b/src/utils_unittest.cc
index bbeecbf..7876c53 100644
--- a/src/utils_unittest.cc
+++ b/src/utils_unittest.cc
@@ -8,10 +8,10 @@
#include "gtest/gtest.h"
-#include "puffin/src/file_stream.h"
+#include "puffin/file_stream.h"
#include "puffin/src/include/puffin/common.h"
#include "puffin/src/include/puffin/utils.h"
-#include "puffin/src/memory_stream.h"
+#include "puffin/memory_stream.h"
#include "puffin/src/unittest_common.h"
using std::string;