aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSamuel Huang <huangs@chromium.org>2018-03-13 18:19:34 +0000
committerEdward Lesmes <ehmaldonado@google.com>2021-07-23 21:50:59 +0000
commit06f1ae9aaca969ee95ef840f22b6b461c304542d (patch)
treef1e5c6624e70628e81fbf38d6cd14b974abe5d93
downloadzucchini-06f1ae9aaca969ee95ef840f22b6b461c304542d.tar.gz
[Zucchini] Move Zucchini from /chrome/installer/ to /components/.
(Use "git log --follow" to see older revisions of files). /components/ is the most logical place to put Zucchini, which only depends on /base and /testing/gtest. This move also enables Zucchini to be used by the Component Updater. Details: - Move all files; run the following to change deps and guards: sed 's/chrome\/installer/components/' *.cc *.h -i sed 's/CHROME_INSTALLER/COMPONENTS/' *.cc *.h -i - Sorting works out pretty well! - Change all 'chrome/installer/zucchini' to 'components/zucchini' throughout other parts of the repo; sort if necessary. - Fix 6 'git cl lint' errors. - Change 1 Bind() usage to BindRepeated(). - Update OWNER. Bug: 729154 Change-Id: I50c5a7d411ea85f707b5994ab319dfb2a1acccf7 Reviewed-on: https://chromium-review.googlesource.com/954923 Reviewed-by: Greg Thompson <grt@chromium.org> Reviewed-by: Jochen Eisinger <jochen@chromium.org> Reviewed-by: Samuel Huang <huangs@chromium.org> Commit-Queue: Samuel Huang <huangs@chromium.org> Cr-Commit-Position: refs/heads/master@{#542857} NOKEYCHECK=True GitOrigin-RevId: 577ef6c435e8d43be6e3e60ccbcbd1881780f4ec
-rw-r--r--BUILD.gn195
-rw-r--r--OWNERS5
-rw-r--r--README.md259
-rw-r--r--abs32_utils.cc201
-rw-r--r--abs32_utils.h137
-rw-r--r--abs32_utils_unittest.cc496
-rw-r--r--address_translator.cc254
-rw-r--r--address_translator.h198
-rw-r--r--address_translator_unittest.cc556
-rw-r--r--algorithm.h84
-rw-r--r--algorithm_unittest.cc206
-rw-r--r--binary_data_histogram.cc91
-rw-r--r--binary_data_histogram.h91
-rw-r--r--binary_data_histogram_unittest.cc132
-rw-r--r--buffer_sink.cc11
-rw-r--r--buffer_sink.h68
-rw-r--r--buffer_sink_unittest.cc71
-rw-r--r--buffer_source.cc105
-rw-r--r--buffer_source.h141
-rw-r--r--buffer_source_unittest.cc347
-rw-r--r--buffer_view.h201
-rw-r--r--buffer_view_unittest.cc242
-rw-r--r--crc32.cc43
-rw-r--r--crc32.h17
-rw-r--r--crc32_unittest.cc47
-rw-r--r--disassembler.cc36
-rw-r--r--disassembler.h133
-rw-r--r--disassembler_no_op.cc28
-rw-r--r--disassembler_no_op.h40
-rw-r--r--disassembler_win32.cc392
-rw-r--r--disassembler_win32.h129
-rw-r--r--element_detection.cc84
-rw-r--r--element_detection.h60
-rw-r--r--element_detection_unittest.cc78
-rw-r--r--encoded_view.cc77
-rw-r--r--encoded_view.h182
-rw-r--r--encoded_view_unittest.cc202
-rw-r--r--ensemble_matcher.cc24
-rw-r--r--ensemble_matcher.h62
-rw-r--r--equivalence_map.cc482
-rw-r--r--equivalence_map.h183
-rw-r--r--equivalence_map_unittest.cc446
-rw-r--r--heuristic_ensemble_matcher.cc369
-rw-r--r--heuristic_ensemble_matcher.h39
-rw-r--r--image_index.cc78
-rw-r--r--image_index.h116
-rw-r--r--image_index_unittest.cc131
-rw-r--r--image_utils.h206
-rw-r--r--image_utils_unittest.cc77
-rw-r--r--integration_test.cc104
-rw-r--r--io_utils.cc52
-rw-r--r--io_utils.h146
-rw-r--r--io_utils_unittest.cc161
-rw-r--r--label_manager.cc93
-rw-r--r--label_manager.h113
-rw-r--r--label_manager_unittest.cc137
-rw-r--r--main_utils.cc193
-rw-r--r--main_utils.h35
-rw-r--r--mapped_file.cc70
-rw-r--r--mapped_file.h83
-rw-r--r--mapped_file_unittest.cc61
-rw-r--r--patch_fuzzer.cc19
-rw-r--r--patch_read_write_unittest.cc604
-rw-r--r--patch_reader.cc345
-rw-r--r--patch_reader.h277
-rw-r--r--patch_utils.h152
-rw-r--r--patch_utils_unittest.cc171
-rw-r--r--patch_writer.cc294
-rw-r--r--patch_writer.h276
-rw-r--r--reference_set.cc68
-rw-r--r--reference_set.h66
-rw-r--r--reference_set_unittest.cc51
-rw-r--r--rel32_finder.cc137
-rw-r--r--rel32_finder.h189
-rw-r--r--rel32_finder_unittest.cc353
-rw-r--r--rel32_utils.cc69
-rw-r--r--rel32_utils.h70
-rw-r--r--rel32_utils_unittest.cc128
-rw-r--r--reloc_utils.cc193
-rw-r--r--reloc_utils.h140
-rw-r--r--reloc_utils_unittest.cc273
-rw-r--r--suffix_array.h475
-rw-r--r--suffix_array_unittest.cc331
-rw-r--r--target_pool.cc84
-rw-r--r--target_pool.h77
-rw-r--r--target_pool_unittest.cc64
-rw-r--r--targets_affinity.cc108
-rw-r--r--targets_affinity.h74
-rw-r--r--targets_affinity_unittest.cc131
-rw-r--r--test_disassembler.cc58
-rw-r--r--test_disassembler.h78
-rw-r--r--test_reference_reader.cc20
-rw-r--r--test_reference_reader.h32
-rw-r--r--test_utils.cc26
-rw-r--r--test_utils.h20
-rw-r--r--testdata/chrome64_1.exe.sha11
-rw-r--r--testdata/chrome64_2.exe.sha11
-rw-r--r--testdata/patch_fuzzer/empty.zucbin0 -> 80 bytes
-rw-r--r--testdata/setup1.exe.sha11
-rw-r--r--testdata/setup2.exe.sha11
-rw-r--r--type_win_pe.h188
-rw-r--r--typed_value.h57
-rw-r--r--typed_value_unittest.cc40
-rw-r--r--zucchini.h54
-rw-r--r--zucchini_apply.cc202
-rw-r--r--zucchini_apply.h43
-rw-r--r--zucchini_apply_unittest.cc22
-rw-r--r--zucchini_commands.cc176
-rw-r--r--zucchini_commands.h51
-rw-r--r--zucchini_exe_version.rc.version46
-rw-r--r--zucchini_gen.cc430
-rw-r--r--zucchini_gen.h84
-rw-r--r--zucchini_gen_unittest.cc176
-rw-r--r--zucchini_integration.cc122
-rw-r--r--zucchini_integration.h34
-rw-r--r--zucchini_main.cc54
-rw-r--r--zucchini_tools.cc126
-rw-r--r--zucchini_tools.h38
118 files changed, 16500 insertions, 0 deletions
diff --git a/BUILD.gn b/BUILD.gn
new file mode 100644
index 0000000..47eef3a
--- /dev/null
+++ b/BUILD.gn
@@ -0,0 +1,195 @@
+# Copyright 2017 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import("//chrome/process_version_rc_template.gni")
+import("//testing/libfuzzer/fuzzer_test.gni")
+import("//testing/test.gni")
+
+static_library("zucchini_lib") {
+ sources = [
+ "abs32_utils.cc",
+ "abs32_utils.h",
+ "address_translator.cc",
+ "address_translator.h",
+ "algorithm.h",
+ "binary_data_histogram.cc",
+ "binary_data_histogram.h",
+ "buffer_sink.cc",
+ "buffer_sink.h",
+ "buffer_source.cc",
+ "buffer_source.h",
+ "buffer_view.h",
+ "crc32.cc",
+ "crc32.h",
+ "disassembler.cc",
+ "disassembler.h",
+ "disassembler_no_op.cc",
+ "disassembler_no_op.h",
+ "disassembler_win32.cc",
+ "disassembler_win32.h",
+ "element_detection.cc",
+ "element_detection.h",
+ "encoded_view.cc",
+ "encoded_view.h",
+ "ensemble_matcher.cc",
+ "ensemble_matcher.h",
+ "equivalence_map.cc",
+ "equivalence_map.h",
+ "heuristic_ensemble_matcher.cc",
+ "heuristic_ensemble_matcher.h",
+ "image_index.cc",
+ "image_index.h",
+ "image_utils.h",
+ "io_utils.cc",
+ "io_utils.h",
+ "label_manager.cc",
+ "label_manager.h",
+ "patch_reader.cc",
+ "patch_reader.h",
+ "patch_utils.h",
+ "patch_writer.cc",
+ "patch_writer.h",
+ "reference_set.cc",
+ "reference_set.h",
+ "rel32_finder.cc",
+ "rel32_finder.h",
+ "rel32_utils.cc",
+ "rel32_utils.h",
+ "reloc_utils.cc",
+ "reloc_utils.h",
+ "suffix_array.h",
+ "target_pool.cc",
+ "target_pool.h",
+ "targets_affinity.cc",
+ "targets_affinity.h",
+ "type_win_pe.h",
+ "typed_value.h",
+ "zucchini.h",
+ "zucchini_apply.cc",
+ "zucchini_apply.h",
+ "zucchini_gen.cc",
+ "zucchini_gen.h",
+ "zucchini_tools.cc",
+ "zucchini_tools.h",
+ ]
+
+ deps = [
+ "//base",
+ ]
+}
+
+static_library("zucchini_io") {
+ sources = [
+ "mapped_file.cc",
+ "mapped_file.h",
+ "zucchini_integration.cc",
+ "zucchini_integration.h",
+ ]
+
+ deps = [
+ ":zucchini_lib",
+ "//base",
+ ]
+}
+
+executable("zucchini") {
+ sources = [
+ "main_utils.cc",
+ "main_utils.h",
+ "zucchini_commands.cc",
+ "zucchini_commands.h",
+ "zucchini_main.cc",
+ ]
+
+ deps = [
+ ":zucchini_io",
+ ":zucchini_lib",
+ "//base",
+ "//build/config:exe_and_shlib_deps",
+ ]
+
+ if (is_win) {
+ deps += [ ":zucchini_exe_version" ]
+ }
+}
+
+if (is_win) {
+ process_version_rc_template("zucchini_exe_version") {
+ template_file = "zucchini_exe_version.rc.version"
+ output = "$target_gen_dir/zucchini_exe_version.rc"
+ }
+}
+
+fuzzer_test("zucchini_patch_fuzzer") {
+ sources = [
+ "patch_fuzzer.cc",
+ ]
+ deps = [
+ ":zucchini_lib",
+ "//base",
+ ]
+ seed_corpus = "testdata/patch_fuzzer"
+}
+
+test("zucchini_unittests") {
+ sources = [
+ "abs32_utils_unittest.cc",
+ "address_translator_unittest.cc",
+ "algorithm_unittest.cc",
+ "binary_data_histogram_unittest.cc",
+ "buffer_sink_unittest.cc",
+ "buffer_source_unittest.cc",
+ "buffer_view_unittest.cc",
+ "crc32_unittest.cc",
+ "element_detection_unittest.cc",
+ "encoded_view_unittest.cc",
+ "equivalence_map_unittest.cc",
+ "image_index_unittest.cc",
+ "image_utils_unittest.cc",
+ "io_utils_unittest.cc",
+ "label_manager_unittest.cc",
+ "mapped_file_unittest.cc",
+ "patch_read_write_unittest.cc",
+ "patch_utils_unittest.cc",
+ "reference_set_unittest.cc",
+ "rel32_finder_unittest.cc",
+ "rel32_utils_unittest.cc",
+ "reloc_utils_unittest.cc",
+ "suffix_array_unittest.cc",
+ "target_pool_unittest.cc",
+ "targets_affinity_unittest.cc",
+ "test_disassembler.cc",
+ "test_disassembler.h",
+ "test_reference_reader.cc",
+ "test_reference_reader.h",
+ "test_utils.cc",
+ "test_utils.h",
+ "typed_value_unittest.cc",
+ "zucchini_apply_unittest.cc",
+ "zucchini_gen_unittest.cc",
+ ]
+
+ deps = [
+ ":zucchini_io",
+ ":zucchini_lib",
+ "//base",
+ "//base/test:run_all_unittests",
+ "//base/test:test_support",
+ "//testing/gtest",
+ ]
+}
+
+test("zucchini_integration_test") {
+ sources = [
+ "integration_test.cc",
+ ]
+
+ deps = [
+ ":zucchini_lib",
+ "//base",
+ "//base/test:run_all_unittests",
+ "//base/test:test_support",
+ "//testing/gtest",
+ ]
+}
diff --git a/OWNERS b/OWNERS
new file mode 100644
index 0000000..0c93e58
--- /dev/null
+++ b/OWNERS
@@ -0,0 +1,5 @@
+huangs@chromium.org
+grt@chromium.org
+wfh@chromium.org
+
+# COMPONENT: Internals>Installer>Diff
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..fe11a0f
--- /dev/null
+++ b/README.md
@@ -0,0 +1,259 @@
+
+## Basic Definitions for Patching
+
+**Binary**: Executable image and data. Binaries may persist in an archive
+(e.g., chrome.7z), and need to be periodically updated. Formats for binaries
+include {PE files EXE / DLL, ELF, DEX}. Architectures binaries include
+{x86, x64, ARM, AArch64, Dalvik}. A binary is also referred to as an executable
+or an image file.
+
+**Patching**: Sending a "new" file to clients who have an "old" file by
+computing and transmitting a "patch" that can be used to transform "old" into
+"new". Patches are compressed for transmission. A key performance metric is
+patch size, which refers to the size of compressed patch file. For our
+experiments we use 7z.
+
+**Patch generation**: Computation of a "patch" from "old" and "new". This can be
+expensive (e.g., ~15-20 min for Chrome, using 1 GB of RAM), but since patch
+generation is a run-once step on the server-side when releasing "new" binaries,
+the expense is not too critical.
+
+**Patch application**: Transformation from "old" binaries to "new", using a
+(downloaded) "patch". This is executed on client side on updates, so resource
+constraints (e.g., time, RAM, disk space) is more stringent. Also, fault-
+tolerance is important. This is usually achieved by an update system by having
+a fallback method of directly downloading "new" in case of patching failure.
+
+**Offset**: Position relative to the start of a file.
+
+**Local offset**: An offset relative to the start of a region of a file.
+
+**Element**: A region in a file with associated executable type, represented by
+the tuple (exe_type, offset, length). Every Element in new file is associated
+with an Element in old file and patched independently.
+
+**Reference**: A directed connection between two offsets in a binary. For
+example, consider jump instructions in x86:
+
+ 00401000: E9 3D 00 00 00 jmp 00401042
+
+Here, the 4 bytes `[3D 00 00 00]` starting at address `00401001` point to
+address `00401042` in memory. This forms a reference from `offset(00401001)`
+(length 4) to `offset(00401042)`, where `offset(addr)` indicates the disk
+offset corresponding to `addr`. A reference has a location, length (implicitly
+determined by reference type), body, and target.
+
+**Location**: The starting offset of bytes that store a reference. In the
+preceding example, `offset(00401001)` is a location. Each location is the
+beginning of a reference body.
+
+**Body**: The span of bytes that encodes reference data, i.e.,
+[location, location + length) =
+[location, location + 1, ..., location + length - 1].
+In the preceding example, `length = 4`, so the reference body is
+`[00401001, 00401001 + 4) = [00401001, 00401002, 00401003, 00401004]`.
+All reference bodies in an image must not overlap, and often regions boundaries
+are required to not straddle a reference body.
+
+**Target**: The offset that's the destination of a reference. In the preceding
+example, `offset(00401042)` is the target. Different references can share common
+targets. For example, in
+
+ 00401000: E9 3D 00 00 00 jmp 00401042
+ 00401005: EB 3B jmp 00401042
+
+we have two references with different locations and bodies, but same target
+of `00401042`.
+
+Because the bytes that encode a reference depend on its target, and potentially
+on its location, they are more likely to get modified from an old version of a
+binary to a newer version. This is why "naive" patching does not do well on
+binaries.
+
+**Disassembler**: Architecture specific data and operations, used to extract and
+correct references in a binary.
+
+**Type of reference**: The type of a reference determines the binary
+representation used to encode its target. This affects how references are parsed
+and written by a disassembler. There can be many types of references in the same
+binary.
+
+A reference is represented by the tuple (disassembler, location, target, type).
+This tuple contains sufficient information to write the reference in a binary.
+
+**Pool of targets**: Collection of targets that is assumed to have some semantic
+relationship. Each reference type belong to exactly one reference pool. Targets
+for references in the same pool are shared.
+
+For example, the following describes two pools defined for Dalvik Executable
+format (DEX). Both pools spawn multiple types of references.
+
+1. Index in string table.
+ - From bytecode to string index using 16 bits.
+ - From bytecode to string index using 32 bits.
+ - From field item to string index using 32 bits.
+2. Address in code.
+ - Relative 16 bits pointer.
+ - Relative 32 bits pointer.
+
+Boundaries between different pools can be ambiguous. Having all targets belong
+to the same pool can reduce redundancy, but will use more memory and might
+cause larger corrections to happen, so this is a trade-off that can be resolved
+with benchmarks.
+
+**Abs32 references**: References whose targets are adjusted by the OS during
+program load. In an image, a **relocation table** typically provides locations
+of abs32 references. At each abs32 location, the stored bytes then encode
+semantic information about the target (e.g., as RVA).
+
+**Rel32 references**: References embedded within machine code, in which targets
+are encoded as some delta relative to the reference's location. Typical examples
+of rel32 references are branching instructions and instruction pointer-relative
+memory access.
+
+**Equivalence**: A (src_offset, dst_offset, length) tuple describing a region of
+"old" binary, at an offset of |src_offset|, that is similar to a region of "new"
+binary, at an offset of |dst_offset|.
+
+**Raw delta unit**: Describes a raw modification to apply on the new image, as a
+pair (copy_offset, diff), where copy_offset describes the position in new file
+as an offset in the data that was copied from the old file, and diff is the
+bytewise difference to apply.
+
+**Associated Targets**: A target in "old" binary is associated with a target in
+"new" binary if both targets:
+1. are part of similar regions from the same equivalence, and
+2. have the same local offset (relative to respective start regions), and
+3. are not part of any larger region from a different equivalence.
+Not all targets are necessarily associated with another target.
+
+**Label**: An (offset, index) pair, where |offset| is a target, and |index| is
+an integer used to uniquely identify |offset| in its corresponding pool of
+targets. Labels are created for each Reference in "old" and "new" binary as part
+of generating a patch, and used to alias targets when searching for similar
+regions that will form equivalences. Labels are created such that associated
+targets in old and new binaries share the same |index|, and such that indices in
+a pool are tightly packed. For example, suppose "old" Labels are:
+ - (0x1111, 0), (0x3333, 4), (0x5555, 1), (0x7777, 3)
+and given the following association of targets between "old" and "new":
+ - 0x1111 <=> 0x6666, 0x3333 <=> 0x2222.
+then we could assign indices for "new" Labels as:
+ - (0x2222, 4}, (0x4444, 8), (0x6666, 0), (0x8888, 2)
+
+**Encoded Image**: The result of projecting the content of an image to scalar
+values that describe content on a higher level of abstraction, masking away
+undesirable noise in raw content. Notably, the projection encodes references
+based on their associated label.
+
+## Zucchini Ensemble Patch Format
+
+### Types
+
+**int8**: 8-bit unsigned int.
+
+**uint32**: 32-bit unsigned int, little-endian.
+
+**int32**: 32-bit signed int, little-endian.
+
+**Varints**: This is a generic variable-length encoding for integer quantities
+that strips away leading (most-significant) null bytes.
+The Varints format is borrowed from protocol-buffers, see
+[documentation](https://developers.google.com/protocol-buffers/docs/encoding#varints)
+for more info.
+
+**varuint32**: A uint32 encoded using Varints format.
+
+**varint32**: A int32 encoded using Varints format.
+
+### File Layout
+
+Name | Format | Description
+--- | --- | ---
+header | PatchHeader | The header.
+patch_type | uint32 | Type of this patch, see `enum PatchType`.
+elements_count | uint32 | Number of patch units.
+elements | PatchElement[elements_count] | List of all patch elements.
+
+Position of elements in new file is ascending.
+
+### Structures
+
+**PatchHeader**
+
+Name | Format | Description
+--- | --- | ---
+magic | uint32 = kMagic | Magic value.
+old_size | uint32 | Size of old file in bytes.
+old_crc | uint32 | CRC32 of old file.
+new_size | uint32 | Size of new file in bytes.
+new_crc | uint32 | CRC32 of new file.
+
+**kMagic** == `'Z' | ('u' << 8) | ('c' << 16)`
+
+**PatchElement**
+Contains all the information required to produce a single element in new file.
+
+Name | Format | Description
+--- | --- | ---
+header | PatchElementHeader | The header.
+equivalences | EquivalenceList | List of equivalences.
+raw_deltas | RawDeltaList | List of raw deltas.
+reference_deltas | ReferenceDeltaList | List of reference deltas.
+pool_count | uint32 | Number of pools.
+extra_targets | ExtraTargetList[pool_count] | Lists of extra targets.
+
+**PatchElementHeader**
+Describes a correspondence between an element in old and in new files. Some
+redundancy arise from storing |new_offset|, but it is necessary to make
+PatchElement self contained.
+
+Name | Format | Description
+--- | --- | ---
+old_offset | uint32 | Starting offset of the element in old file.
+new_offset | uint32 | Starting offset of the element in new file.
+old_length | uint32 | Length of the element in old file.
+new_length | uint32 | Length of the element in new file.
+exe_type | uint32 | Executable type for this unit, see `enum ExecutableType`.
+
+**EquivalenceList**
+Encodes a list of equivalences, where dst offsets (in new image) are ascending.
+
+Name | Format | Description
+--- | --- | ---
+src_skip | Buffer<varint32> | Src offset for each equivalence, delta encoded.
+dst_skip | Buffer<varuint32> | Dst offset for each equivalence, delta encoded.
+copy_count | Buffer<varuint32> | Length for each equivalence.
+
+**RawDeltaList**
+Encodes a list of raw delta units, with ascending copy offsets.
+
+Name | Format | Description
+--- | --- | ---
+raw_delta_skip | Buffer<varuint32> | Copy offset for each delta unit, delta encoded and biased by -1.
+raw_delta_diff | Buffer<int8> | Bytewise difference for each delta unit.
+
+**ReferenceDeltaList**
+Encodes a list of reference deltas, in the order they appear in the new
+image file. A reference delta is a signed integer representing a jump through a
+list of targets.
+
+Name | Format | Description
+--- | --- | ---
+reference_delta | Buffer<varuint32> | Vector of reference deltas.
+
+**ExtraTargetList**
+Encodes a list of additional targets in the new image file, in ascending
+order.
+
+Name | Format | Description
+--- | --- | ---
+pool_tag | uint8_t | Unique identifier for this pool of targets.
+extra_targets | Buffer<varuint32> | Additional targets, delta encoded and biased by -1.
+
+**Buffer<T>**
+A generic vector of data.
+
+Name | Format | Description
+--- | --- | ---
+size |uint32 | Size of content in bytes.
+content |T[] | List of integers.
diff --git a/abs32_utils.cc b/abs32_utils.cc
new file mode 100644
index 0000000..b45da7e
--- /dev/null
+++ b/abs32_utils.cc
@@ -0,0 +1,201 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/abs32_utils.h"
+
+#include <algorithm>
+#include <type_traits>
+#include <utility>
+
+#include "base/logging.h"
+#include "components/zucchini/io_utils.h"
+
+namespace zucchini {
+
+namespace {
+
+// Templated helper for AbsoluteAddress::Read().
+template <typename T>
+bool ReadAbs(ConstBufferView image, offset_t offset, uint64_t* value) {
+ static_assert(std::is_unsigned<T>::value, "Value type must be unsigned.");
+ if (!image.can_access<T>(offset))
+ return false;
+ *value = static_cast<uint64_t>(image.read<T>(offset));
+ return true;
+}
+
+// Templated helper for AbsoluteAddress::Write().
+template <typename T>
+bool WriteAbs(offset_t offset, T value, MutableBufferView* image) {
+ static_assert(std::is_unsigned<T>::value, "Value type must be unsigned.");
+ if (!image->can_access<T>(offset))
+ return false;
+ image->write<T>(offset, value);
+ return true;
+}
+
+} // namespace
+
+/******** AbsoluteAddress ********/
+
+AbsoluteAddress::AbsoluteAddress(Bitness bitness, uint64_t image_base)
+ : bitness_(bitness), image_base_(image_base), value_(image_base) {
+ CHECK(bitness_ == kBit64 || image_base_ < 0x100000000ULL);
+}
+
+AbsoluteAddress::AbsoluteAddress(AbsoluteAddress&&) = default;
+
+AbsoluteAddress::~AbsoluteAddress() = default;
+
+bool AbsoluteAddress::FromRva(rva_t rva) {
+ if (rva >= kRvaBound)
+ return false;
+ uint64_t value = image_base_ + rva;
+ // Check overflow, which manifests as |value| "wrapping around", resulting in
+ // |value| less than |image_base_| (preprocessing needed for 32-bit).
+ if (((bitness_ == kBit32) ? (value & 0xFFFFFFFFU) : value) < image_base_)
+ return false;
+ value_ = value;
+ return true;
+}
+
+rva_t AbsoluteAddress::ToRva() const {
+ if (value_ < image_base_)
+ return kInvalidRva;
+ uint64_t raw_rva = value_ - image_base_;
+ if (raw_rva >= kRvaBound)
+ return kInvalidRva;
+ return static_cast<rva_t>(raw_rva);
+}
+
+bool AbsoluteAddress::Read(offset_t offset, const ConstBufferView& image) {
+ // Read raw data; |value_| is not guaranteed to represent a valid RVA.
+ if (bitness_ == kBit32)
+ return ReadAbs<uint32_t>(image, offset, &value_);
+ DCHECK_EQ(kBit64, bitness_);
+ return ReadAbs<uint64_t>(image, offset, &value_);
+}
+
+bool AbsoluteAddress::Write(offset_t offset, MutableBufferView* image) {
+ if (bitness_ == kBit32)
+ return WriteAbs<uint32_t>(offset, static_cast<uint32_t>(value_), image);
+ DCHECK_EQ(kBit64, bitness_);
+ return WriteAbs<uint64_t>(offset, value_, image);
+}
+
+/******** Abs32RvaExtractorWin32 ********/
+
+Abs32RvaExtractorWin32::Abs32RvaExtractorWin32(
+ ConstBufferView image,
+ AbsoluteAddress&& addr,
+ const std::vector<offset_t>& abs32_locations,
+ offset_t lo,
+ offset_t hi)
+ : image_(image), addr_(std::move(addr)) {
+ CHECK_LE(lo, hi);
+ auto find_and_check = [&addr](const std::vector<offset_t>& locations,
+ offset_t offset) {
+ auto it = std::lower_bound(locations.begin(), locations.end(), offset);
+ // Ensure |offset| does not straddle a reference body.
+ CHECK(it == locations.begin() || offset - *(it - 1) >= addr.width());
+ return it;
+ };
+ cur_abs32_ = find_and_check(abs32_locations, lo);
+ end_abs32_ = find_and_check(abs32_locations, hi);
+}
+
+Abs32RvaExtractorWin32::Abs32RvaExtractorWin32(Abs32RvaExtractorWin32&&) =
+ default;
+
+Abs32RvaExtractorWin32::~Abs32RvaExtractorWin32() = default;
+
+base::Optional<Abs32RvaExtractorWin32::Unit> Abs32RvaExtractorWin32::GetNext() {
+ while (cur_abs32_ < end_abs32_) {
+ offset_t location = *(cur_abs32_++);
+ if (!addr_.Read(location, image_))
+ continue;
+ rva_t target_rva = addr_.ToRva();
+ if (target_rva == kInvalidRva)
+ continue;
+ return Unit{location, target_rva};
+ }
+ return base::nullopt;
+}
+
+/******** Abs32ReaderWin32 ********/
+
+Abs32ReaderWin32::Abs32ReaderWin32(Abs32RvaExtractorWin32&& abs32_rva_extractor,
+ const AddressTranslator& translator)
+ : abs32_rva_extractor_(std::move(abs32_rva_extractor)),
+ target_rva_to_offset_(translator) {}
+
+Abs32ReaderWin32::~Abs32ReaderWin32() = default;
+
+base::Optional<Reference> Abs32ReaderWin32::GetNext() {
+ for (auto unit = abs32_rva_extractor_.GetNext(); unit.has_value();
+ unit = abs32_rva_extractor_.GetNext()) {
+ offset_t location = unit->location;
+ offset_t target = target_rva_to_offset_.Convert(unit->target_rva);
+ if (target == kInvalidOffset)
+ continue;
+ // In rare cases, the most significant bit of |target| is set. This
+ // interferes with label marking. A quick fix is to reject these.
+ if (IsMarked(target)) {
+ LOG(WARNING) << "Warning: Skipping mark-aliased PE abs32 target: "
+ << AsHex<8>(location) << " -> " << AsHex<8>(target) << ".";
+ continue;
+ }
+ return Reference{location, target};
+ }
+ return base::nullopt;
+}
+
+/******** Abs32WriterWin32 ********/
+
+Abs32WriterWin32::Abs32WriterWin32(MutableBufferView image,
+ AbsoluteAddress&& addr,
+ const AddressTranslator& translator)
+ : image_(image),
+ addr_(std::move(addr)),
+ target_offset_to_rva_(translator) {}
+
+Abs32WriterWin32::~Abs32WriterWin32() = default;
+
+void Abs32WriterWin32::PutNext(Reference ref) {
+ rva_t target_rva = target_offset_to_rva_.Convert(ref.target);
+ if (target_rva != kInvalidRva) {
+ addr_.FromRva(target_rva);
+ addr_.Write(ref.location, &image_);
+ }
+}
+
+/******** Exported Functions ********/
+
+size_t RemoveOverlappingAbs32Locations(Bitness bitness,
+ std::vector<offset_t>* locations) {
+ if (locations->size() <= 1)
+ return 0;
+
+ uint32_t width = WidthOf(bitness);
+ auto slow = locations->begin();
+ auto fast = locations->begin() + 1;
+ for (;;) {
+ // Find next good location.
+ while (fast != locations->end() && *fast - *slow < width)
+ ++fast;
+ // Advance |slow|. For the last iteration this becomes the new sentinel.
+ ++slow;
+ if (fast == locations->end())
+ break;
+ // Compactify good locations (potentially overwrite bad locations).
+ if (slow != fast)
+ *slow = *fast;
+ ++fast;
+ }
+ size_t num_removed = locations->end() - slow;
+ locations->erase(slow, locations->end());
+ return num_removed;
+}
+
+} // namespace zucchini
diff --git a/abs32_utils.h b/abs32_utils.h
new file mode 100644
index 0000000..b1d3ae0
--- /dev/null
+++ b/abs32_utils.h
@@ -0,0 +1,137 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_ABS32_UTILS_H_
+#define COMPONENTS_ZUCCHINI_ABS32_UTILS_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "base/macros.h"
+#include "base/optional.h"
+#include "components/zucchini/address_translator.h"
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/image_utils.h"
+
+namespace zucchini {
+
+// A class to represent an abs32 address (32-bit or 64-bit). Accessors are
+// provided to translate from / to RVA, and to read / write the represented
+// abs32 address from / to an image.
+class AbsoluteAddress {
+ public:
+ AbsoluteAddress(Bitness bitness, uint64_t image_base);
+ AbsoluteAddress(AbsoluteAddress&&);
+ ~AbsoluteAddress();
+
+ // Attempts to translate |rva| to an abs32 address. On success, assigns
+ // |value_| to the result and returns true. On failure (invalid |rva| or
+ // overflow), returns false.
+ bool FromRva(rva_t rva);
+
+ // Returns the RVA for |value_|, or |kInvalidRva| if the represented value
+ // address does not correspond to a valid RVA.
+ rva_t ToRva() const;
+
+ // Attempts to read the abs32 address at |image[offset]| into |value_|. On
+ // success, updates |value_| and returns true. On failure (invalid |offset|),
+ // returns false.
+ bool Read(offset_t offset, const ConstBufferView& image);
+
+ // Attempts to write |value_| to to |(*image)[offset]|. On success, performs
+ // the write and returns true. On failure (invalid |offset|), returns false.
+ bool Write(offset_t offset, MutableBufferView* image);
+
+ size_t width() const { return WidthOf(bitness_); }
+
+ // Exposing |value_| for testing.
+ uint64_t* mutable_value() { return &value_; }
+
+ private:
+ const Bitness bitness_;
+ const uint64_t image_base_; // Accommodates 32-bit and 64-bit.
+ uint64_t value_; // Accommodates 32-bit and 64-bit.
+};
+
+// A class to extract Win32 abs32 references from |abs32_locations| within
+// |image_| bounded by |[lo, hi)|. GetNext() is used to successively return
+// data as Units, which are locations and (potentially out-of-bound) RVAs.
+// |addr| determines the bitness of abs32 values stored, and mediates all reads.
+class Abs32RvaExtractorWin32 {
+ public:
+ struct Unit {
+ offset_t location;
+ rva_t target_rva;
+ };
+
+ // Requires |lo| <= |hi|, and they must not straddle a reference body (with
+ // length |addr.width()|) in |abs32_locations|.
+ Abs32RvaExtractorWin32(ConstBufferView image,
+ AbsoluteAddress&& addr,
+ const std::vector<offset_t>& abs32_locations,
+ offset_t lo,
+ offset_t hi);
+ Abs32RvaExtractorWin32(Abs32RvaExtractorWin32&&);
+ ~Abs32RvaExtractorWin32();
+
+ // Visits given abs32 locations, rejects invalid locations and non-existent
+ // RVAs, and returns reference as Unit, or base::nullopt on completion.
+ base::Optional<Unit> GetNext();
+
+ private:
+ ConstBufferView image_;
+ AbsoluteAddress addr_;
+ std::vector<offset_t>::const_iterator cur_abs32_;
+ std::vector<offset_t>::const_iterator end_abs32_;
+};
+
+// A reader for Win32 abs32 references that filters and translates results from
+// |abs32_rva_extractor_|.
+class Abs32ReaderWin32 : public ReferenceReader {
+ public:
+ Abs32ReaderWin32(Abs32RvaExtractorWin32&& abs32_rva_extractor,
+ const AddressTranslator& translator);
+ ~Abs32ReaderWin32() override;
+
+ // ReferenceReader:
+ base::Optional<Reference> GetNext() override;
+
+ private:
+ Abs32RvaExtractorWin32 abs32_rva_extractor_;
+ AddressTranslator::RvaToOffsetCache target_rva_to_offset_;
+
+ DISALLOW_COPY_AND_ASSIGN(Abs32ReaderWin32);
+};
+
+// A writer for Win32 abs32 references. |addr| determines the bitness of the
+// abs32 values stored, and mediates all writes.
+class Abs32WriterWin32 : public ReferenceWriter {
+ public:
+ Abs32WriterWin32(MutableBufferView image,
+ AbsoluteAddress&& addr,
+ const AddressTranslator& translator);
+ ~Abs32WriterWin32() override;
+
+ // ReferenceWriter:
+ void PutNext(Reference ref) override;
+
+ private:
+ MutableBufferView image_;
+ AbsoluteAddress addr_;
+ AddressTranslator::OffsetToRvaCache target_offset_to_rva_;
+
+ DISALLOW_COPY_AND_ASSIGN(Abs32WriterWin32);
+};
+
+// Given a sorted list of abs32 |locations|, removes all elements whose body
+// overlaps with the body of a previous element (|bitness| determines length).
+// Returns the number of elements removed.
+size_t RemoveOverlappingAbs32Locations(Bitness bitness,
+ std::vector<offset_t>* locations);
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_ABS32_UTILS_H_
diff --git a/abs32_utils_unittest.cc b/abs32_utils_unittest.cc
new file mode 100644
index 0000000..480fea0
--- /dev/null
+++ b/abs32_utils_unittest.cc
@@ -0,0 +1,496 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/abs32_utils.h"
+
+#include <stdint.h>
+
+#include <algorithm>
+#include <string>
+#include <utility>
+
+#include "base/numerics/safe_conversions.h"
+#include "components/zucchini/image_utils.h"
+#include "components/zucchini/test_utils.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+namespace {
+
+// A trivial AddressTranslator that applies constant shift.
+class TestAddressTranslator : public AddressTranslator {
+ public:
+ TestAddressTranslator(size_t image_size, rva_t rva_begin) {
+ DCHECK_GE(rva_begin, 0U);
+ CHECK_EQ(AddressTranslator::kSuccess,
+ Initialize({{0, base::checked_cast<offset_t>(image_size),
+ rva_begin, base::checked_cast<rva_t>(image_size)}}));
+ }
+};
+
+// Helper to translate address |value| to RVA. May return |kInvalidRva|.
+rva_t AddrValueToRva(uint64_t value, AbsoluteAddress* addr) {
+ *addr->mutable_value() = value;
+ return addr->ToRva();
+}
+
+} // namespace
+
+TEST(Abs32UtilsTest, AbsoluteAddress32) {
+ std::vector<uint8_t> data32 = ParseHexString(
+ "00 00 32 00 21 43 65 4A 00 00 00 00 FF FF FF FF FF FF 31 00");
+ ConstBufferView image32(data32.data(), data32.size());
+ MutableBufferView mutable_image32(data32.data(), data32.size());
+
+ AbsoluteAddress addr32(kBit32, 0x00320000U);
+ EXPECT_TRUE(addr32.Read(0x0U, image32));
+ EXPECT_EQ(0x00000000U, addr32.ToRva());
+ EXPECT_TRUE(addr32.Read(0x4U, image32));
+ EXPECT_EQ(0x4A334321U, addr32.ToRva());
+ EXPECT_TRUE(addr32.Read(0x8U, image32));
+ EXPECT_EQ(kInvalidRva, addr32.ToRva()); // Underflow.
+ EXPECT_TRUE(addr32.Read(0xCU, image32));
+ EXPECT_EQ(kInvalidRva, addr32.ToRva()); // Translated RVA would be to large.
+ EXPECT_TRUE(addr32.Read(0x10U, image32));
+ EXPECT_EQ(kInvalidRva, addr32.ToRva()); // Underflow (boundary case).
+
+ EXPECT_FALSE(addr32.Read(0x11U, image32));
+ EXPECT_FALSE(addr32.Read(0x14U, image32));
+ EXPECT_FALSE(addr32.Read(0x100000U, image32));
+ EXPECT_FALSE(addr32.Read(0x80000000U, image32));
+ EXPECT_FALSE(addr32.Read(0xFFFFFFFFU, image32));
+
+ EXPECT_TRUE(addr32.FromRva(0x11223344U));
+ EXPECT_TRUE(addr32.Write(0x2U, &mutable_image32));
+ EXPECT_TRUE(addr32.Write(0x10U, &mutable_image32));
+ std::vector<uint8_t> expected_data32 = ParseHexString(
+ "00 00 44 33 54 11 65 4A 00 00 00 00 FF FF FF FF 44 33 54 11");
+ EXPECT_EQ(expected_data32, data32);
+ EXPECT_FALSE(addr32.Write(0x11U, &mutable_image32));
+ EXPECT_FALSE(addr32.Write(0xFFFFFFFFU, &mutable_image32));
+ EXPECT_EQ(expected_data32, data32);
+}
+
+TEST(Abs32UtilsTest, AbsoluteAddress32Overflow) {
+ AbsoluteAddress addr32(kBit32, 0xC0000000U);
+ EXPECT_TRUE(addr32.FromRva(0x00000000U));
+ EXPECT_TRUE(addr32.FromRva(0x11223344U));
+ EXPECT_TRUE(addr32.FromRva(0x3FFFFFFFU));
+ EXPECT_FALSE(addr32.FromRva(0x40000000U));
+ EXPECT_FALSE(addr32.FromRva(0x40000001U));
+ EXPECT_FALSE(addr32.FromRva(0x80000000U));
+ EXPECT_FALSE(addr32.FromRva(0xFFFFFFFFU));
+
+ EXPECT_EQ(0x00000000U, AddrValueToRva(0xC0000000U, &addr32));
+ EXPECT_EQ(kInvalidRva, AddrValueToRva(0xBFFFFFFFU, &addr32));
+ EXPECT_EQ(kInvalidRva, AddrValueToRva(0x00000000U, &addr32));
+ EXPECT_EQ(0x3FFFFFFFU, AddrValueToRva(0xFFFFFFFFU, &addr32));
+}
+
+TEST(Abs32UtilsTest, AbsoluteAddress64) {
+ std::vector<uint8_t> data64 = ParseHexString(
+ "00 00 00 00 64 00 00 00 21 43 65 4A 64 00 00 00 "
+ "00 00 00 00 00 00 00 00 FF FF FF FF FF FF FF FF "
+ "00 00 00 00 64 00 00 80 FF FF FF FF 63 00 00 00");
+ ConstBufferView image64(data64.data(), data64.size());
+ MutableBufferView mutable_image64(data64.data(), data64.size());
+
+ AbsoluteAddress addr64(kBit64, 0x0000006400000000ULL);
+ EXPECT_TRUE(addr64.Read(0x0U, image64));
+ EXPECT_EQ(0x00000000U, addr64.ToRva());
+ EXPECT_TRUE(addr64.Read(0x8U, image64));
+ EXPECT_EQ(0x4A654321U, addr64.ToRva());
+ EXPECT_TRUE(addr64.Read(0x10U, image64)); // Succeeds, in spite of value.
+ EXPECT_EQ(kInvalidRva, addr64.ToRva()); // Underflow.
+ EXPECT_TRUE(addr64.Read(0x18U, image64));
+ EXPECT_EQ(kInvalidRva, addr64.ToRva()); // Translated RVA too large.
+ EXPECT_TRUE(addr64.Read(0x20U, image64));
+ EXPECT_EQ(kInvalidRva, addr64.ToRva()); // Translated RVA toolarge.
+ EXPECT_TRUE(addr64.Read(0x28U, image64));
+ EXPECT_EQ(kInvalidRva, addr64.ToRva()); // Underflow.
+
+ EXPECT_FALSE(addr64.Read(0x29U, image64)); // Extends outside.
+ EXPECT_FALSE(addr64.Read(0x30U, image64)); // Entirely outside (note: hex).
+ EXPECT_FALSE(addr64.Read(0x100000U, image64));
+ EXPECT_FALSE(addr64.Read(0x80000000U, image64));
+ EXPECT_FALSE(addr64.Read(0xFFFFFFFFU, image64));
+
+ EXPECT_TRUE(addr64.FromRva(0x11223344U));
+ EXPECT_TRUE(addr64.Write(0x13U, &mutable_image64));
+ EXPECT_TRUE(addr64.Write(0x20U, &mutable_image64));
+ std::vector<uint8_t> expected_data64 = ParseHexString(
+ "00 00 00 00 64 00 00 00 21 43 65 4A 64 00 00 00 "
+ "00 00 00 44 33 22 11 64 00 00 00 FF FF FF FF FF "
+ "44 33 22 11 64 00 00 00 FF FF FF FF 63 00 00 00");
+ EXPECT_EQ(expected_data64, data64);
+ EXPECT_FALSE(addr64.Write(0x29U, &mutable_image64));
+ EXPECT_FALSE(addr64.Write(0x30U, &mutable_image64));
+ EXPECT_FALSE(addr64.Write(0xFFFFFFFFU, &mutable_image64));
+ EXPECT_EQ(expected_data64, data64);
+
+ EXPECT_FALSE(addr64.FromRva(0xFFFFFFFFU));
+}
+
+TEST(Abs32UtilsTest, AbsoluteAddress64Overflow) {
+ {
+ // Counterpart to AbsoluteAddress632verflow test.
+ AbsoluteAddress addr64(kBit64, 0xFFFFFFFFC0000000ULL);
+ EXPECT_TRUE(addr64.FromRva(0x00000000U));
+ EXPECT_TRUE(addr64.FromRva(0x11223344U));
+ EXPECT_TRUE(addr64.FromRva(0x3FFFFFFFU));
+ EXPECT_FALSE(addr64.FromRva(0x40000000U));
+ EXPECT_FALSE(addr64.FromRva(0x40000001U));
+ EXPECT_FALSE(addr64.FromRva(0x80000000U));
+ EXPECT_FALSE(addr64.FromRva(0xFFFFFFFFU));
+
+ EXPECT_EQ(0x00000000U, AddrValueToRva(0xFFFFFFFFC0000000U, &addr64));
+ EXPECT_EQ(kInvalidRva, AddrValueToRva(0xFFFFFFFFBFFFFFFFU, &addr64));
+ EXPECT_EQ(kInvalidRva, AddrValueToRva(0x0000000000000000U, &addr64));
+ EXPECT_EQ(kInvalidRva, AddrValueToRva(0xFFFFFFFF00000000U, &addr64));
+ EXPECT_EQ(0x3FFFFFFFU, AddrValueToRva(0xFFFFFFFFFFFFFFFFU, &addr64));
+ }
+ {
+ // Pseudo-counterpart to AbsoluteAddress632verflow test: Some now pass.
+ AbsoluteAddress addr64(kBit64, 0xC0000000U);
+ EXPECT_TRUE(addr64.FromRva(0x00000000U));
+ EXPECT_TRUE(addr64.FromRva(0x11223344U));
+ EXPECT_TRUE(addr64.FromRva(0x3FFFFFFFU));
+ EXPECT_TRUE(addr64.FromRva(0x40000000U));
+ EXPECT_TRUE(addr64.FromRva(0x40000001U));
+ EXPECT_FALSE(addr64.FromRva(0x80000000U));
+ EXPECT_FALSE(addr64.FromRva(0xFFFFFFFFU));
+
+ // ToRva() still fail though.
+ EXPECT_EQ(0x00000000U, AddrValueToRva(0xC0000000U, &addr64));
+ EXPECT_EQ(kInvalidRva, AddrValueToRva(0xBFFFFFFFU, &addr64));
+ EXPECT_EQ(kInvalidRva, AddrValueToRva(0x00000000U, &addr64));
+ EXPECT_EQ(0x3FFFFFFFU, AddrValueToRva(0xFFFFFFFFU, &addr64));
+ }
+ {
+ AbsoluteAddress addr64(kBit64, 0xC000000000000000ULL);
+ EXPECT_TRUE(addr64.FromRva(0x00000000ULL));
+ EXPECT_TRUE(addr64.FromRva(0x11223344ULL));
+ EXPECT_TRUE(addr64.FromRva(0x3FFFFFFFULL));
+ EXPECT_TRUE(addr64.FromRva(0x40000000ULL));
+ EXPECT_TRUE(addr64.FromRva(0x40000001ULL));
+ EXPECT_FALSE(addr64.FromRva(0x80000000ULL));
+ EXPECT_FALSE(addr64.FromRva(0xFFFFFFFFULL));
+
+ EXPECT_EQ(0x00000000U, AddrValueToRva(0xC000000000000000ULL, &addr64));
+ EXPECT_EQ(kInvalidRva, AddrValueToRva(0xBFFFFFFFFFFFFFFFULL, &addr64));
+ EXPECT_EQ(kInvalidRva, AddrValueToRva(0x0000000000000000ULL, &addr64));
+ EXPECT_EQ(0x3FFFFFFFU, AddrValueToRva(0xC00000003FFFFFFFULL, &addr64));
+ EXPECT_EQ(kInvalidRva, AddrValueToRva(0xFFFFFFFFFFFFFFFFULL, &addr64));
+ }
+}
+
+TEST(Abs32UtilsTest, Win32Read32) {
+ constexpr uint32_t kImageBase = 0xA0000000U;
+ constexpr uint32_t kRvaBegin = 0x00C00000U;
+ struct {
+ std::vector<uint8_t> data32;
+ std::vector<offset_t> abs32_locations; // Assumtion: Sorted.
+ offset_t lo; // Assumption: In range, does not straddle |abs32_location|.
+ offset_t hi; // Assumption: Also >= |lo|.
+ std::vector<Reference> expected_refs;
+ } test_cases[] = {
+ // Targets at beginning and end.
+ {ParseHexString("FF FF FF FF 0F 00 C0 A0 00 00 C0 A0 FF FF FF FF"),
+ {0x4U, 0x8U},
+ 0x0U,
+ 0x10U,
+ {{0x4U, 0xFU}, {0x8U, 0x0U}}},
+ // Targets at beginning and end are out of bound: Rejected.
+ {ParseHexString("FF FF FF FF 10 00 C0 A0 FF FF BF A0 FF FF FF FF"),
+ {0x4U, 0x8U},
+ 0x0U,
+ 0x10U,
+ std::vector<Reference>()},
+ // Same with more extreme target values: Rejected.
+ {ParseHexString("FF FF FF FF FF FF FF FF 00 00 00 00 FF FF FF FF"),
+ {0x4U, 0x8U},
+ 0x0U,
+ 0x10U,
+ std::vector<Reference>()},
+ // Locations at beginning and end, plus invalid locations.
+ {ParseHexString("08 00 C0 A0 FF FF FF FF FF FF FF FF 04 00 C0 A0"),
+ {0x0U, 0xCU, 0x10U, 0x1000U, 0x80000000U, 0xFFFFFFFFU},
+ 0x0U,
+ 0x10U,
+ {{0x0U, 0x8U}, {0xCU, 0x4U}}},
+ // Odd size, location, target.
+ {ParseHexString("FF FF FF 09 00 C0 A0 FF FF FF FF FF FF FF FF FF "
+ "FF FF FF"),
+ {0x3U},
+ 0x0U,
+ 0x13U,
+ {{0x3U, 0x9U}}},
+ // No location given.
+ {ParseHexString("FF FF FF FF 0C 00 C0 A0 00 00 C0 A0 FF FF FF FF"),
+ std::vector<offset_t>(), 0x0U, 0x10U, std::vector<Reference>()},
+ // Simple alternation.
+ {ParseHexString("04 00 C0 A0 FF FF FF FF 0C 00 C0 A0 FF FF FF FF "
+ "14 00 C0 A0 FF FF FF FF 1C 00 C0 A0 FF FF FF FF"),
+ {0x0U, 0x8U, 0x10U, 0x18U},
+ 0x0U,
+ 0x20U,
+ {{0x0U, 0x4U}, {0x8U, 0xCU}, {0x10U, 0x14U}, {0x18U, 0x1CU}}},
+ // Same, with locations limited by |lo| and |hi|. By assumption these must
+ // not cut accross Reference body.
+ {ParseHexString("04 00 C0 A0 FF FF FF FF 0C 00 C0 A0 FF FF FF FF "
+ "14 00 C0 A0 FF FF FF FF 1C 00 C0 A0 FF FF FF FF"),
+ {0x0U, 0x8U, 0x10U, 0x18U},
+ 0x04U,
+ 0x17U,
+ {{0x8U, 0xCU}, {0x10U, 0x14U}}},
+ // Same, with very limiting |lo| and |hi|.
+ {ParseHexString("04 00 C0 A0 FF FF FF FF 0C 00 C0 A0 FF FF FF FF "
+ "14 00 C0 A0 FF FF FF FF 1C 00 C0 A0 FF FF FF FF"),
+ {0x0U, 0x8U, 0x10U, 0x18U},
+ 0x0CU,
+ 0x10U,
+ std::vector<Reference>()},
+ // Same, |lo| == |hi|.
+ {ParseHexString("04 00 C0 A0 FF FF FF FF 0C 00 C0 A0 FF FF FF FF "
+ "14 00 C0 A0 FF FF FF FF 1C 00 C0 A0 FF FF FF FF"),
+ {0x0U, 0x8U, 0x10U, 0x18U},
+ 0x14U,
+ 0x14U,
+ std::vector<Reference>()},
+ // Same, |lo| and |hi| at end.
+ {ParseHexString("04 00 C0 A0 FF FF FF FF 0C 00 C0 A0 FF FF FF FF "
+ "14 00 C0 A0 FF FF FF FF 1C 00 C0 A0 FF FF FF FF"),
+ {0x0U, 0x8U, 0x10U, 0x18U},
+ 0x20U,
+ 0x20U,
+ std::vector<Reference>()},
+ // Mix. Note that targets can overlap.
+ {ParseHexString("FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF "
+ "06 00 C0 A0 2C 00 C0 A0 FF FF C0 A0 2B 00 C0 A0 "
+ "FF 06 00 C0 A0 00 00 C0 A0 FF FF FF FF FF FF FF"),
+ {0x10U, 0x14U, 0x18U, 0x1CU, 0x21U, 0x25U, 0xAAAAU},
+ 0x07U,
+ 0x25U,
+ {{0x10U, 0x6U}, {0x14U, 0x2CU}, {0x1CU, 0x2BU}, {0x21, 0x6U}}},
+ };
+
+ for (const auto& test_case : test_cases) {
+ ConstBufferView image32(test_case.data32.data(), test_case.data32.size());
+ Abs32RvaExtractorWin32 extractor(image32, {kBit32, kImageBase},
+ test_case.abs32_locations, test_case.lo,
+ test_case.hi);
+
+ TestAddressTranslator translator(test_case.data32.size(), kRvaBegin);
+ Abs32ReaderWin32 reader(std::move(extractor), translator);
+
+ // Loop over |expected_ref| to check element-by-element.
+ base::Optional<Reference> ref;
+ for (const auto& expected_ref : test_case.expected_refs) {
+ ref = reader.GetNext();
+ EXPECT_TRUE(ref.has_value());
+ EXPECT_EQ(expected_ref, ref.value());
+ }
+ // Check that nothing is left.
+ ref = reader.GetNext();
+ EXPECT_FALSE(ref.has_value());
+ }
+}
+
+TEST(Abs32UtilsTest, Win32Read64) {
+ constexpr uint64_t kImageBase = 0x31415926A0000000U;
+ constexpr uint32_t kRvaBegin = 0x00C00000U;
+ // For simplicity, just test mixed case.
+ std::vector<uint8_t> data64 = ParseHexString(
+ "FF FF FF FF FF FF FF FF 00 00 C0 A0 26 59 41 31 "
+ "06 00 C0 A0 26 59 41 31 02 00 C0 A0 26 59 41 31 "
+ "FF FF FF BF 26 59 41 31 FF FF FF FF FF FF FF FF "
+ "02 00 C0 A0 26 59 41 31 07 00 C0 A0 26 59 41 31");
+ std::vector<offset_t> abs32_locations = {0x8U, 0x10U, 0x18U, 0x20U,
+ 0x28U, 0x30U, 0x38U, 0x40U};
+ offset_t lo = 0x10U;
+ offset_t hi = 0x38U;
+ std::vector<Reference> expected_refs = {
+ {0x10U, 0x06U}, {0x18U, 0x02U}, {0x30U, 0x02U}};
+
+ ConstBufferView image64(data64.data(), data64.size());
+ Abs32RvaExtractorWin32 extractor(image64, {kBit64, kImageBase},
+ abs32_locations, lo, hi);
+ TestAddressTranslator translator(data64.size(), kRvaBegin);
+ Abs32ReaderWin32 reader(std::move(extractor), translator);
+
+ std::vector<Reference> refs;
+ base::Optional<Reference> ref;
+ for (ref = reader.GetNext(); ref.has_value(); ref = reader.GetNext())
+ refs.push_back(ref.value());
+ EXPECT_EQ(expected_refs, refs);
+}
+
+TEST(Abs32UtilsTest, Win32ReadFail) {
+ // Make |bitness| a state to reduce repetition.
+ Bitness bitness = kBit32;
+
+ constexpr uint32_t kImageBase = 0xA0000000U; // Shared for 32-bit and 64-bit.
+ std::vector<uint8_t> data(32U, 0xFFU);
+ ConstBufferView image(data.data(), data.size());
+
+ auto try_make = [&](std::vector<offset_t>&& abs32_locations, offset_t lo,
+ offset_t hi) {
+ Abs32RvaExtractorWin32 extractor(image, {bitness, kImageBase},
+ abs32_locations, lo, hi);
+ extractor.GetNext(); // Dummy call so |extractor| gets used.
+ };
+
+ // 32-bit tests.
+ bitness = kBit32;
+ try_make({8U, 24U}, 0U, 32U);
+ EXPECT_DEATH(try_make({4U, 24U}, 32U, 0U), ""); // |lo| > |hi|.
+ try_make({8U, 24U}, 0U, 12U);
+ try_make({8U, 24U}, 0U, 28U);
+ try_make({8U, 24U}, 8U, 32U);
+ try_make({8U, 24U}, 24U, 32U);
+ EXPECT_DEATH(try_make({8U, 24U}, 0U, 11U), ""); // |hi| straddles.
+ EXPECT_DEATH(try_make({8U, 24U}, 26U, 32U), ""); // |lo| straddles.
+ try_make({8U, 24U}, 12U, 24U);
+
+ // 64-bit tests.
+ bitness = kBit64;
+ try_make({6U, 22U}, 0U, 32U);
+ // |lo| > |hi|.
+ EXPECT_DEATH(try_make(std::vector<offset_t>(), 32U, 31U), "");
+ try_make({6U, 22U}, 0U, 14U);
+ try_make({6U, 22U}, 0U, 30U);
+ try_make({6U, 22U}, 6U, 32U);
+ try_make({6U, 22U}, 22U, 32U);
+ EXPECT_DEATH(try_make({6U, 22U}, 0U, 29U), ""); // |hi| straddles.
+ EXPECT_DEATH(try_make({6U, 22U}, 7U, 32U), ""); // |lo| straddles.
+ try_make({6U, 22U}, 14U, 20U);
+ try_make({16U}, 16U, 24U);
+ EXPECT_DEATH(try_make({16U}, 18U, 18U), ""); // |lo|, |hi| straddle.
+}
+
+TEST(Abs32UtilsTest, Win32Write32) {
+ constexpr uint32_t kImageBase = 0xA0000000U;
+ constexpr uint32_t kRvaBegin = 0x00C00000U;
+ std::vector<uint8_t> data32(0x30, 0xFFU);
+ MutableBufferView image32(data32.data(), data32.size());
+ AbsoluteAddress addr(kBit32, kImageBase);
+ TestAddressTranslator translator(data32.size(), kRvaBegin);
+ Abs32WriterWin32 writer(image32, std::move(addr), translator);
+
+ // Successful writes.
+ writer.PutNext({0x02U, 0x10U});
+ writer.PutNext({0x0BU, 0x21U});
+ writer.PutNext({0x16U, 0x10U});
+ writer.PutNext({0x2CU, 0x00U});
+
+ // Invalid data: For simplicity, Abs32WriterWin32 simply ignores bad writes.
+ // Invalid location.
+ writer.PutNext({0x2DU, 0x20U});
+ writer.PutNext({0x80000000U, 0x20U});
+ writer.PutNext({0xFFFFFFFFU, 0x20U});
+ // Invalid target.
+ writer.PutNext({0x1CU, 0x00001111U});
+ writer.PutNext({0x10U, 0xFFFFFF00U});
+
+ std::vector<uint8_t> expected_data32 = ParseHexString(
+ "FF FF 10 00 C0 A0 FF FF FF FF FF 21 00 C0 A0 FF "
+ "FF FF FF FF FF FF 10 00 C0 A0 FF FF FF FF FF FF "
+ "FF FF FF FF FF FF FF FF FF FF FF FF 00 00 C0 A0");
+ EXPECT_EQ(expected_data32, data32);
+}
+
+TEST(Abs32UtilsTest, Win32Write64) {
+ constexpr uint64_t kImageBase = 0x31415926A0000000U;
+ constexpr uint32_t kRvaBegin = 0x00C00000U;
+ std::vector<uint8_t> data64(0x30, 0xFFU);
+ MutableBufferView image32(data64.data(), data64.size());
+ AbsoluteAddress addr(kBit64, kImageBase);
+ TestAddressTranslator translator(data64.size(), kRvaBegin);
+ Abs32WriterWin32 writer(image32, std::move(addr), translator);
+
+ // Successful writes.
+ writer.PutNext({0x02U, 0x10U});
+ writer.PutNext({0x0BU, 0x21U});
+ writer.PutNext({0x16U, 0x10U});
+ writer.PutNext({0x28U, 0x00U});
+
+ // Invalid data: For simplicity, Abs32WriterWin32 simply ignores bad writes.
+ // Invalid location.
+ writer.PutNext({0x29U, 0x20U});
+ writer.PutNext({0x80000000U, 0x20U});
+ writer.PutNext({0xFFFFFFFFU, 0x20U});
+ // Invalid target.
+ writer.PutNext({0x1CU, 0x00001111U});
+ writer.PutNext({0x10U, 0xFFFFFF00U});
+
+ std::vector<uint8_t> expected_data64 = ParseHexString(
+ "FF FF 10 00 C0 A0 26 59 41 31 FF 21 00 C0 A0 26 "
+ "59 41 31 FF FF FF 10 00 C0 A0 26 59 41 31 FF FF "
+ "FF FF FF FF FF FF FF FF 00 00 C0 A0 26 59 41 31");
+ EXPECT_EQ(expected_data64, data64);
+}
+
+TEST(Abs32UtilsTest, RemoveOverlappingAbs32Locations) {
+ // Make |bitness| a state to reduce repetition.
+ Bitness bitness = kBit32;
+
+ auto run_test = [&bitness](const std::vector<offset_t>& expected_locations,
+ std::vector<offset_t>&& locations) {
+ ASSERT_TRUE(std::is_sorted(locations.begin(), locations.end()));
+ size_t expected_removals = locations.size() - expected_locations.size();
+ size_t removals = RemoveOverlappingAbs32Locations(bitness, &locations);
+ EXPECT_EQ(expected_removals, removals);
+ EXPECT_EQ(expected_locations, locations);
+ };
+
+ // 32-bit tests.
+ bitness = kBit32;
+ run_test(std::vector<offset_t>(), std::vector<offset_t>());
+ run_test({4U}, {4U});
+ run_test({4U, 10U}, {4U, 10U});
+ run_test({4U, 8U}, {4U, 8U});
+ run_test({4U}, {4U, 7U});
+ run_test({4U}, {4U, 4U});
+ run_test({4U, 8U}, {4U, 7U, 8U});
+ run_test({4U, 10U}, {4U, 7U, 10U});
+ run_test({4U, 9U}, {4U, 9U, 10U});
+ run_test({3U}, {3U, 5U, 6U});
+ run_test({3U, 7U}, {3U, 4U, 5U, 6U, 7U, 8U, 9U, 10U});
+ run_test({3U, 7U, 11U}, {3U, 4U, 5U, 6U, 7U, 8U, 9U, 10U, 11U, 12U});
+ run_test({4U, 8U, 12U}, {4U, 6U, 8U, 10U, 12U});
+ run_test({4U, 8U, 12U, 16U}, {4U, 8U, 12U, 16U});
+ run_test({4U, 8U, 12U}, {4U, 8U, 9U, 12U});
+ run_test({4U}, {4U, 4U, 4U, 4U, 4U, 4U});
+ run_test({3U}, {3U, 4U, 4U, 4U, 5U, 5U});
+ run_test({3U, 7U}, {3U, 4U, 4U, 4U, 7U, 7U, 8U});
+ run_test({10U, 20U, 30U, 40U}, {10U, 20U, 22U, 22U, 30U, 40U});
+ run_test({1000000U, 1000004U}, {1000000U, 1000004U});
+ run_test({1000000U}, {1000000U, 1000002U});
+
+ // 64-bit tests.
+ bitness = kBit64;
+ run_test(std::vector<offset_t>(), std::vector<offset_t>());
+ run_test({4U}, {4U});
+ run_test({4U, 20U}, {4U, 20U});
+ run_test({4U, 12U}, {4U, 12U});
+ run_test({4U}, {4U, 11U});
+ run_test({4U}, {4U, 5U});
+ run_test({4U}, {4U, 4U});
+ run_test({4U, 12U, 20U}, {4U, 12U, 20U});
+ run_test({1U, 9U, 17U}, {1U, 9U, 17U});
+ run_test({1U, 17U}, {1U, 8U, 17U});
+ run_test({1U, 10U}, {1U, 10U, 17U});
+ run_test({3U, 11U}, {3U, 4U, 5U, 6U, 7U, 8U, 9U, 10U, 11U, 12U});
+ run_test({4U, 12U}, {4U, 6U, 8U, 10U, 12U});
+ run_test({4U, 12U}, {4U, 12U, 16U});
+ run_test({4U, 12U, 20U, 28U}, {4U, 12U, 20U, 28U});
+ run_test({4U}, {4U, 4U, 4U, 4U, 5U, 5U});
+ run_test({3U, 11U}, {3U, 4U, 4U, 4U, 11U, 11U, 12U});
+ run_test({10U, 20U, 30U, 40U}, {10U, 20U, 22U, 22U, 30U, 40U});
+ run_test({1000000U, 1000008U}, {1000000U, 1000008U});
+ run_test({1000000U}, {1000000U, 1000004U});
+}
+
+} // namespace zucchini
diff --git a/address_translator.cc b/address_translator.cc
new file mode 100644
index 0000000..79e7ba6
--- /dev/null
+++ b/address_translator.cc
@@ -0,0 +1,254 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/address_translator.h"
+
+#include <algorithm>
+#include <utility>
+
+namespace zucchini {
+
+/******** AddressTranslator::OffsetToRvaCache ********/
+
+AddressTranslator::OffsetToRvaCache::OffsetToRvaCache(
+ const AddressTranslator& translator)
+ : translator_(translator) {}
+
+rva_t AddressTranslator::OffsetToRvaCache::Convert(offset_t offset) const {
+ if (offset >= translator_.fake_offset_begin_) {
+ // Rely on |translator_| to handle this special case.
+ return translator_.OffsetToRva(offset);
+ }
+ if (cached_unit_ && cached_unit_->CoversOffset(offset))
+ return cached_unit_->OffsetToRvaUnsafe(offset);
+ const AddressTranslator::Unit* unit = translator_.OffsetToUnit(offset);
+ if (!unit)
+ return kInvalidRva;
+ cached_unit_ = unit;
+ return unit->OffsetToRvaUnsafe(offset);
+}
+
+/******** AddressTranslator::RvaToOffsetCache ********/
+
+AddressTranslator::RvaToOffsetCache::RvaToOffsetCache(
+ const AddressTranslator& translator)
+ : translator_(translator) {}
+
+bool AddressTranslator::RvaToOffsetCache::IsValid(rva_t rva) const {
+ if (!cached_unit_ || !cached_unit_->CoversRva(rva)) {
+ const AddressTranslator::Unit* unit = translator_.RvaToUnit(rva);
+ if (!unit)
+ return false;
+ cached_unit_ = unit;
+ }
+ return true;
+}
+
+offset_t AddressTranslator::RvaToOffsetCache::Convert(rva_t rva) const {
+ if (!cached_unit_ || !cached_unit_->CoversRva(rva)) {
+ const AddressTranslator::Unit* unit = translator_.RvaToUnit(rva);
+ if (!unit)
+ return kInvalidOffset;
+ cached_unit_ = unit;
+ }
+ return cached_unit_->RvaToOffsetUnsafe(rva, translator_.fake_offset_begin_);
+}
+
+/******** AddressTranslator ********/
+
+AddressTranslator::AddressTranslator() = default;
+
+AddressTranslator::~AddressTranslator() = default;
+
+AddressTranslator::Status AddressTranslator::Initialize(
+ std::vector<Unit>&& units) {
+ for (Unit& unit : units) {
+ // Check for overflows and fail if found.
+ if (!RangeIsBounded<offset_t>(unit.offset_begin, unit.offset_size,
+ kOffsetBound) ||
+ !RangeIsBounded<rva_t>(unit.rva_begin, unit.rva_size, kRvaBound)) {
+ return kErrorOverflow;
+ }
+ // If |rva_size < offset_size|: Just shrink |offset_size| to accommodate.
+ unit.offset_size = std::min(unit.offset_size, unit.rva_size);
+ // Now |rva_size >= offset_size|. Note that |rva_size > offset_size| is
+ // allowed; these lead to dangling RVA.
+ }
+
+ // Remove all empty units.
+ units.erase(std::remove_if(units.begin(), units.end(),
+ [](const Unit& unit) { return unit.IsEmpty(); }),
+ units.end());
+
+ // Sort |units| by RVA, then uniquefy.
+ std::sort(units.begin(), units.end(), [](const Unit& a, const Unit& b) {
+ return std::tie(a.rva_begin, a.rva_size) <
+ std::tie(b.rva_begin, b.rva_size);
+ });
+ units.erase(std::unique(units.begin(), units.end()), units.end());
+
+ // Scan for RVA range overlaps, validate, and merge wherever possible.
+ if (units.size() > 1) {
+ // Traverse with two iterators: |slow| stays behind and modifies Units that
+ // absorb all overlapping (or tangent if suitable) Units; |fast| explores
+ // new Units as candidates for consistency checks and potential merge into
+ // |slow|.
+ auto slow = units.begin();
+
+ // All |it| with |slow| < |it| < |fast| contain garbage.
+ for (auto fast = slow + 1; fast != units.end(); ++fast) {
+ // Comment notation: S = slow offset, F = fast offset, O = overlap offset,
+ // s = slow RVA, f = fast RVA, o = overlap RVA.
+ DCHECK_GE(fast->rva_begin, slow->rva_begin);
+ if (slow->rva_end() < fast->rva_begin) {
+ // ..ssssss..ffffff..: Disjoint: Can advance |slow|.
+ *(++slow) = *fast;
+ continue;
+ }
+
+ // ..ssssffff..: Tangent: Merge is optional.
+ // ..sssooofff.. / ..sssooosss..: Overlap: Merge is required.
+ bool merge_is_optional = slow->rva_end() == fast->rva_begin;
+
+ // Check whether |fast| and |slow| have identical RVA -> offset shift.
+ // If not, then merge cannot be resolved. Examples:
+ // ..ssssffff.. -> ..SSSSFFFF..: Good, can merge.
+ // ..ssssffff.. -> ..SSSS..FFFF..: Non-fatal: don't merge.
+ // ..ssssffff.. -> ..FFFF..SSSS..: Non-fatal: don't merge.
+ // ..ssssffff.. -> ..SSOOFF..: Fatal: Ignore for now (handled later).
+ // ..sssooofff.. -> ..SSSOOOFFF..: Good, can merge.
+ // ..sssooofff.. -> ..SSSSSOFFFFF..: Fatal.
+ // ..sssooofff.. -> ..FFOOOOSS..: Fatal.
+ // ..sssooofff.. -> ..SSSOOOF..: Good, notice |fast| has dangling RVAs.
+ // ..oooooo.. -> ..OOOOOO..: Good, can merge.
+ if (fast->offset_begin < slow->offset_begin ||
+ fast->offset_begin - slow->offset_begin !=
+ fast->rva_begin - slow->rva_begin) {
+ if (merge_is_optional) {
+ *(++slow) = *fast;
+ continue;
+ }
+ return kErrorBadOverlap;
+ }
+
+ // Check whether dangling RVAs (if they exist) are consistent. Examples:
+ // ..sssooofff.. -> ..SSSOOOF..: Good, can merge.
+ // ..sssooosss.. -> ..SSSOOOS..: Good, can merge.
+ // ..sssooofff.. -> ..SSSOO..: Good, can merge.
+ // ..sssooofff.. -> ..SSSOFFF..: Fatal.
+ // ..sssooosss.. -> ..SSSOOFFFF..: Fatal.
+ // ..oooooo.. -> ..OOO..: Good, can merge.
+ // Idea of check: Suppose |fast| has dangling RVA, then
+ // |[fast->rva_start, fast->rva_start + fast->offset_start)| ->
+ // |[fast->offset_start, **fast->offset_end()**)|, with remaining RVA
+ // mapping to fake offsets. This means |fast->offset_end()| must be >=
+ // |slow->offset_end()|, and failure to do so resluts in error. The
+ // argument for |slow| havng dangling RVA is symmetric.
+ if ((fast->HasDanglingRva() && fast->offset_end() < slow->offset_end()) ||
+ (slow->HasDanglingRva() && slow->offset_end() < fast->offset_end())) {
+ if (merge_is_optional) {
+ *(++slow) = *fast;
+ continue;
+ }
+ return kErrorBadOverlapDanglingRva;
+ }
+
+ // Merge |fast| into |slow|.
+ slow->rva_size =
+ std::max(slow->rva_size, fast->rva_end() - slow->rva_begin);
+ slow->offset_size =
+ std::max(slow->offset_size, fast->offset_end() - slow->offset_begin);
+ }
+ ++slow;
+ units.erase(slow, units.end());
+ }
+
+ // After resolving RVA overlaps, any offset overlap would imply error.
+ std::sort(units.begin(), units.end(), [](const Unit& a, const Unit& b) {
+ return a.offset_begin < b.offset_begin;
+ });
+
+ if (units.size() > 1) {
+ auto previous = units.begin();
+ for (auto current = previous + 1; current != units.end(); ++current) {
+ if (previous->offset_end() > current->offset_begin)
+ return kErrorBadOverlap;
+ previous = current;
+ }
+ }
+
+ // For to fake offset heuristics: Compute exclusive upper bounds for offsets
+ // and RVAs.
+ offset_t offset_bound = 0;
+ rva_t rva_bound = 0;
+ for (const Unit& unit : units) {
+ offset_bound = std::max(offset_bound, unit.offset_end());
+ rva_bound = std::max(rva_bound, unit.rva_end());
+ }
+
+ // Compute pessimistic range and see if it still fits within space of valid
+ // offsets. This limits image size to one half of |kOffsetBound|, and is a
+ // main drawback for the current heuristic to convert dangling RVA to fake
+ // offsets.
+ if (!RangeIsBounded(offset_bound, rva_bound, kOffsetBound))
+ return kErrorFakeOffsetBeginTooLarge;
+
+ // Success. Store results. |units| is currently sorted by offset, so assign.
+ units_sorted_by_offset_.assign(units.begin(), units.end());
+
+ // Sort |units| by RVA, and just store it directly
+ std::sort(units.begin(), units.end(), [](const Unit& a, const Unit& b) {
+ return a.rva_begin < b.rva_begin;
+ });
+ units_sorted_by_rva_ = std::move(units);
+
+ fake_offset_begin_ = offset_bound;
+ return kSuccess;
+}
+
+rva_t AddressTranslator::OffsetToRva(offset_t offset) const {
+ if (offset >= fake_offset_begin_) {
+ // Handle dangling RVA: First shift it to regular RVA space.
+ rva_t rva = offset - fake_offset_begin_;
+ // If result is indeed a dangling RVA, return it; else return |kInvalidRva|.
+ const Unit* unit = RvaToUnit(rva);
+ return (unit && unit->HasDanglingRva() && unit->CoversDanglingRva(rva))
+ ? rva
+ : kInvalidRva;
+ }
+ const Unit* unit = OffsetToUnit(offset);
+ return unit ? unit->OffsetToRvaUnsafe(offset) : kInvalidRva;
+}
+
+offset_t AddressTranslator::RvaToOffset(rva_t rva) const {
+ const Unit* unit = RvaToUnit(rva);
+ // This also handles dangling RVA.
+ return unit ? unit->RvaToOffsetUnsafe(rva, fake_offset_begin_)
+ : kInvalidOffset;
+}
+
+const AddressTranslator::Unit* AddressTranslator::OffsetToUnit(
+ offset_t offset) const {
+ // Finds first Unit with |offset_begin| > |offset|, rewind by 1 to find the
+ // last Unit with |offset_begin| >= |offset| (if it exists).
+ auto it = std::upper_bound(
+ units_sorted_by_offset_.begin(), units_sorted_by_offset_.end(), offset,
+ [](offset_t a, const Unit& b) { return a < b.offset_begin; });
+ if (it == units_sorted_by_offset_.begin())
+ return nullptr;
+ --it;
+ return it->CoversOffset(offset) ? &(*it) : nullptr;
+}
+
+const AddressTranslator::Unit* AddressTranslator::RvaToUnit(rva_t rva) const {
+ auto it = std::upper_bound(
+ units_sorted_by_rva_.begin(), units_sorted_by_rva_.end(), rva,
+ [](rva_t a, const Unit& b) { return a < b.rva_begin; });
+ if (it == units_sorted_by_rva_.begin())
+ return nullptr;
+ --it;
+ return it->CoversRva(rva) ? &(*it) : nullptr;
+}
+
+} // namespace zucchini
diff --git a/address_translator.h b/address_translator.h
new file mode 100644
index 0000000..821b9ad
--- /dev/null
+++ b/address_translator.h
@@ -0,0 +1,198 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_ADDRESS_TRANSLATOR_H_
+#define COMPONENTS_ZUCCHINI_ADDRESS_TRANSLATOR_H_
+
+#include <stdint.h>
+
+#include <tuple>
+#include <vector>
+
+#include "base/macros.h"
+#include "components/zucchini/algorithm.h"
+#include "components/zucchini/image_utils.h"
+
+namespace zucchini {
+
+// There are several ways to reason about addresses in an image:
+// - Offset: Position relative to start of image.
+// - VA (Virtual Address): Virtual memory address of a loaded image. This is
+// subject to relocation by the OS.
+// - RVA (Relative Virtual Address): VA relative to some base address. This is
+// the preferred way to specify pointers in an image.
+//
+// Zucchini is primarily concerned with offsets and RVAs. Executable images like
+// PE and ELF are organized into sections. Each section specifies offset and RVA
+// ranges as:
+// {Offset start, offset size, RVA start, RVA size}.
+// This constitutes a basic unit to translate between offsets and RVAs. Note:
+// |offset size| < |RVA size| is possible. For example, the .bss section can can
+// have zero-filled statically-allocated data that have no corresponding bytes
+// on image (to save space). This poses a problem for Zucchini, which stores
+// addresses as offsets: now we'd have "dangling RVAs" that don't map to
+// offsets! Some ways to handling this are:
+// 1. Ignore all dangling RVAs. This simplifies the algorithm, but also means
+// some reference targets would escape detection and processing.
+// 2. Create distinct "fake offsets" to accommodate dangling RVAs. Image data
+// must not be read on these fake offsets, which are only valid as target
+// addresses for reference matching.
+// As for |RVA size| < |offset size|, the extra portion just gets ignored.
+//
+// Status: Zucchini implements (2) in a simple way: dangling RVAs are mapped to
+// fake offsets by adding a large value. This value can be chosen as an
+// exclusive upper bound of all offsets (i.e., image size). This allows them to
+// be easily detected and processed as a special-case.
+// TODO(huangs): Investigate option (1), now that the refactored code makes
+// experimentation easier.
+// TODO(huangs): Make AddressTranslator smarter: Allocate unused |offset_t|
+// ranges and create "fake" units to accommodate dangling RVAs. Then
+// AddressTranslator can be simplified.
+
+// Virtual Address relative to some base address (RVA). There's distinction
+// between "valid RVA" and "existent RVA":
+// - Valid RVA: An RVA that's reasonably small, i.e., below |kRvaBound|.
+// - Existent RVA: An RVA that has semantic meaning in an image, and may
+// translate to an offset in an image or (if a dangling RVA) a fake offset.
+// All existent RVAs are valid RVAs.
+using rva_t = uint32_t;
+// Divide by 2 to match |kOffsetBound|.
+constexpr rva_t kRvaBound = static_cast<rva_t>(-1) / 2;
+constexpr rva_t kInvalidRva = static_cast<rva_t>(-1);
+
+// A utility to translate between offsets and RVAs in an image.
+class AddressTranslator {
+ public:
+ // A basic unit for address translation, roughly maps to a section, but may
+ // be processed (e.g., merged) as an optimization.
+ struct Unit {
+ offset_t offset_end() const { return offset_begin + offset_size; }
+ rva_t rva_end() const { return rva_begin + rva_size; }
+ bool IsEmpty() const {
+ // |rva_size == 0| and |offset_size > 0| means Unit hasn't been trimmed
+ // yet, and once it is then it's empty.
+ // |rva_size > 0| and |offset_size == 0| means Unit has dangling RVA, but
+ // is not empty.
+ return rva_size == 0;
+ }
+ bool CoversOffset(offset_t offset) const {
+ return RangeCovers(offset_begin, offset_size, offset);
+ }
+ bool CoversRva(rva_t rva) const {
+ return RangeCovers(rva_begin, rva_size, rva);
+ }
+ bool CoversDanglingRva(rva_t rva) const {
+ return CoversRva(rva) && rva - rva_begin >= offset_size;
+ }
+ // Assumes valid |offset| (*cannot* be fake offset).
+ rva_t OffsetToRvaUnsafe(offset_t offset) const {
+ return offset - offset_begin + rva_begin;
+ }
+ // Assumes valid |rva| (*can* be danging RVA).
+ offset_t RvaToOffsetUnsafe(rva_t rva, offset_t fake_offset_begin) const {
+ rva_t delta = rva - rva_begin;
+ return delta < offset_size ? delta + offset_begin
+ : fake_offset_begin + rva;
+ }
+ bool HasDanglingRva() const { return rva_size > offset_size; }
+ friend bool operator==(const Unit& a, const Unit& b) {
+ return std::tie(a.offset_begin, a.offset_size, a.rva_begin, a.rva_size) ==
+ std::tie(b.offset_begin, b.offset_size, b.rva_begin, b.rva_size);
+ }
+
+ offset_t offset_begin;
+ offset_t offset_size;
+ rva_t rva_begin;
+ rva_t rva_size;
+ };
+
+ // An adaptor for AddressTranslator::OffsetToRva() that caches the last Unit
+ // found, to reduce the number of OffsetToUnit() calls for clustered queries.
+ class OffsetToRvaCache {
+ public:
+ // Embeds |translator| for use. Now object lifetime is tied to |translator|
+ // lifetime.
+ explicit OffsetToRvaCache(const AddressTranslator& translator);
+
+ rva_t Convert(offset_t offset) const;
+
+ private:
+ const AddressTranslator& translator_;
+ mutable const AddressTranslator::Unit* cached_unit_ = nullptr;
+
+ DISALLOW_COPY_AND_ASSIGN(OffsetToRvaCache);
+ };
+
+ // An adaptor for AddressTranslator::RvaToOffset() that caches the last Unit
+ // found, to reduce the number of RvaToUnit() calls for clustered queries.
+ class RvaToOffsetCache {
+ public:
+ // Embeds |translator| for use. Now object lifetime is tied to |translator|
+ // lifetime.
+ explicit RvaToOffsetCache(const AddressTranslator& translator);
+
+ bool IsValid(rva_t rva) const;
+ offset_t Convert(rva_t rva) const;
+
+ private:
+ const AddressTranslator& translator_;
+ mutable const AddressTranslator::Unit* cached_unit_ = nullptr;
+
+ DISALLOW_COPY_AND_ASSIGN(RvaToOffsetCache);
+ };
+
+ enum Status {
+ kSuccess = 0,
+ kErrorOverflow,
+ kErrorBadOverlap,
+ kErrorBadOverlapDanglingRva,
+ kErrorFakeOffsetBeginTooLarge,
+ };
+
+ AddressTranslator();
+ ~AddressTranslator();
+
+ // Consumes |units| to populate data in this class. Performs consistency
+ // checks and overlapping Units. Returns Status to indicate success.
+ Status Initialize(std::vector<Unit>&& units);
+
+ // Returns the (possibly dangling) RVA corresponding to |offset|, or
+ // kInvalidRva if not found.
+ rva_t OffsetToRva(offset_t offset) const;
+
+ // Returns the (possibly fake) offset corresponding to |rva|, or
+ // kInvalidOffset if not found (i.e., |rva| is non-existent).
+ offset_t RvaToOffset(rva_t rva) const;
+
+ // For testing.
+ offset_t fake_offset_begin() const { return fake_offset_begin_; }
+
+ const std::vector<Unit>& units_sorted_by_offset() const {
+ return units_sorted_by_offset_;
+ }
+
+ const std::vector<Unit>& units_sorted_by_rva() const {
+ return units_sorted_by_rva_;
+ }
+
+ private:
+ // Helper to find the Unit that contains given |offset| or |rva|. Returns null
+ // if not found.
+ const Unit* OffsetToUnit(offset_t offset) const;
+ const Unit* RvaToUnit(rva_t rva) const;
+
+ // Storage of Units. All offset ranges are non-empty and disjoint. Likewise
+ // for all RVA ranges.
+ std::vector<Unit> units_sorted_by_offset_;
+ std::vector<Unit> units_sorted_by_rva_;
+
+ // Conversion factor to translate between dangling RVAs and fake offsets.
+ offset_t fake_offset_begin_;
+
+ DISALLOW_COPY_AND_ASSIGN(AddressTranslator);
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_ADDRESS_TRANSLATOR_H_
diff --git a/address_translator_unittest.cc b/address_translator_unittest.cc
new file mode 100644
index 0000000..0aeff77
--- /dev/null
+++ b/address_translator_unittest.cc
@@ -0,0 +1,556 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/address_translator.h"
+
+#include <algorithm>
+#include <string>
+#include <utility>
+
+#include "base/format_macros.h"
+#include "base/strings/stringprintf.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+namespace {
+
+// Test case structs. The convention of EXPECT() specifies "expectd" value
+// before ""actual". However, AddressTranslator interfaces explicitly state "X
+// to Y". So it is clearer in test cases to specify "input" before "expect".
+struct OffsetToRvaTestCase {
+ offset_t input;
+ rva_t expect;
+};
+
+struct RvaToOffsetTestCase {
+ rva_t input;
+ offset_t expect;
+};
+
+class TestAddressTranslator : public AddressTranslator {
+ public:
+ using AddressTranslator::AddressTranslator;
+
+ // Initialize() alternative that parses a visual representation of offset and
+ // RVA ranges. Illustrative example ("special" means '.' or '!'):
+ // "..AAA...|....aaaa" => "..AAA..." for offsets, and "....aaaa" for RVAs:
+ // - "..AAA...": First non-period character is at 2, so |offset_begin| = 2.
+ // - "..AAA...": There are 3 non-special characters, so |offset_size| = +3.
+ // - "....aaaa": First non-period character is at 4, so |rva_begin| = 4.
+ // - "....aaaa": There are 4 non-special characters, so |rva_size| = +4.
+ // For the special case of length-0 range, '!' can be used. For example,
+ // "...!...." specifies |begin| = 3 and |size| = +0.
+ AddressTranslator::Status InitializeWithStrings(
+ const std::vector<std::string>& specs) {
+ std::vector<Unit> units;
+ units.reserve(specs.size());
+ for (const std::string& s : specs) {
+ size_t sep = s.find('|');
+ CHECK_NE(sep, std::string::npos);
+ std::string s1 = s.substr(0, sep);
+ std::string s2 = s.substr(sep + 1);
+
+ auto first_non_blank = [](const std::string& t) {
+ auto is_blank = [](char ch) { return ch == '.'; };
+ return std::find_if_not(t.begin(), t.end(), is_blank) - t.begin();
+ };
+ auto count_non_special = [](const std::string& t) {
+ auto is_special = [](char ch) { return ch == '.' || ch == '!'; };
+ return t.size() - std::count_if(t.begin(), t.end(), is_special);
+ };
+ units.push_back({static_cast<offset_t>(first_non_blank(s1)),
+ static_cast<offset_t>(count_non_special(s1)),
+ static_cast<rva_t>(first_non_blank(s2)),
+ static_cast<rva_t>(count_non_special(s2))});
+ }
+ return Initialize(std::move(units));
+ }
+};
+
+// Simple test: Initialize TestAddressTranslator using |specs|, and match
+// |expected| results re. success or failure.
+void SimpleTest(const std::vector<std::string>& specs,
+ AddressTranslator::Status expected,
+ const std::string& case_name) {
+ TestAddressTranslator translator;
+ auto result = translator.InitializeWithStrings(specs);
+ EXPECT_EQ(expected, result) << case_name;
+}
+
+// Test AddressTranslator::Initialize's Unit overlap and error checks over
+// multiple test cases, each case consists of a fixed unit (specified as
+// string), and a variable string taken from an list.
+class TwoUnitOverlapTester {
+ public:
+ struct TestCase {
+ std::string unit_str;
+ AddressTranslator::Status expected;
+ };
+
+ static void RunTest(const std::string& unit_str1,
+ const std::vector<TestCase>& test_cases) {
+ for (size_t i = 0; i < test_cases.size(); ++i) {
+ const auto& test_case = test_cases[i];
+ const std::string& unit_str2 = test_case.unit_str;
+ const std::string str =
+ base::StringPrintf("Case #%" PRIuS ": %s", i, unit_str2.c_str());
+ SimpleTest({unit_str1, unit_str2}, test_case.expected, str);
+ // Switch order. Expect same results.
+ SimpleTest({unit_str2, unit_str1}, test_case.expected, str);
+ }
+ }
+};
+
+} // namespace
+
+TEST(AddressTranslatorTest, Empty) {
+ using AT = AddressTranslator;
+ TestAddressTranslator translator;
+ EXPECT_EQ(AT::kSuccess,
+ translator.Initialize(std::vector<AddressTranslator::Unit>()));
+ offset_t fake_offset_begin = translator.fake_offset_begin();
+
+ // Optimized versions.
+ AddressTranslator::OffsetToRvaCache offset_to_rva(translator);
+ AddressTranslator::RvaToOffsetCache rva_to_offset(translator);
+
+ EXPECT_EQ(kInvalidRva, translator.OffsetToRva(0U));
+ EXPECT_EQ(kInvalidRva, translator.OffsetToRva(100U));
+ EXPECT_EQ(kInvalidRva, offset_to_rva.Convert(0U));
+ EXPECT_EQ(kInvalidRva, offset_to_rva.Convert(100U));
+
+ EXPECT_EQ(kInvalidOffset, translator.RvaToOffset(0U));
+ EXPECT_EQ(kInvalidOffset, translator.RvaToOffset(100U));
+ EXPECT_EQ(kInvalidOffset, rva_to_offset.Convert(0U));
+ EXPECT_EQ(kInvalidOffset, rva_to_offset.Convert(100U));
+
+ EXPECT_EQ(kInvalidRva, translator.OffsetToRva(fake_offset_begin));
+ EXPECT_EQ(kInvalidRva, offset_to_rva.Convert(fake_offset_begin));
+}
+
+TEST(AddressTranslatorTest, Single) {
+ using AT = AddressTranslator;
+ TestAddressTranslator translator;
+ // Offsets to RVA: [10, 30) -> [100, 120).
+ EXPECT_EQ(AT::kSuccess, translator.Initialize({{10U, +20U, 100U, +20U}}));
+ offset_t fake_offset_begin = translator.fake_offset_begin();
+
+ // Optimized versions.
+ AddressTranslator::OffsetToRvaCache offset_to_rva(translator);
+ AddressTranslator::RvaToOffsetCache rva_to_offset(translator);
+ EXPECT_EQ(30U, fake_offset_begin); // Test implementation detail.
+
+ // Offsets to RVAs.
+ OffsetToRvaTestCase test_cases1[] = {
+ {0U, kInvalidRva}, {9U, kInvalidRva}, {10U, 100U},
+ {20U, 110U}, {29U, 119U}, {30U, kInvalidRva},
+ };
+ for (auto& test_case : test_cases1) {
+ EXPECT_EQ(test_case.expect, translator.OffsetToRva(test_case.input));
+ EXPECT_EQ(test_case.expect, offset_to_rva.Convert(test_case.input));
+ }
+
+ // RVAs to offsets.
+ RvaToOffsetTestCase test_cases2[] = {
+ {0U, kInvalidOffset}, {99U, kInvalidOffset}, {100U, 10U},
+ {110U, 20U}, {119U, 29U}, {120U, kInvalidOffset},
+ };
+ for (auto& test_case : test_cases2) {
+ EXPECT_EQ(test_case.expect, translator.RvaToOffset(test_case.input));
+ EXPECT_EQ(test_case.expect, rva_to_offset.Convert(test_case.input));
+ }
+}
+
+TEST(AddressTranslatorTest, SingleDanglingRva) {
+ using AT = AddressTranslator;
+ TestAddressTranslator translator;
+ // Offsets to RVA: [10, 30) -> [100, 120 + 7), so has dangling RVAs.
+ EXPECT_EQ(AT::kSuccess,
+ translator.Initialize({{10U, +20U, 100U, +20U + 7U}}));
+ offset_t fake_offset_begin = translator.fake_offset_begin();
+
+ EXPECT_EQ(30U, fake_offset_begin); // Test implementation detail.
+
+ // Optimized versions.
+ AddressTranslator::OffsetToRvaCache offset_to_rva(translator);
+ AddressTranslator::RvaToOffsetCache rva_to_offset(translator);
+
+ // Offsets to RVAs.
+ OffsetToRvaTestCase test_cases1[] = {
+ {0U, kInvalidRva},
+ {9U, kInvalidRva},
+ {10U, 100U},
+ {20U, 110U},
+ {29U, 119U},
+ {30U, kInvalidRva},
+ // Fake offsets to dangling RVAs.
+ {fake_offset_begin + 100U, kInvalidRva},
+ {fake_offset_begin + 119U, kInvalidRva},
+ {fake_offset_begin + 120U, 120U},
+ {fake_offset_begin + 126U, 126U},
+ {fake_offset_begin + 127U, kInvalidRva},
+ };
+ for (auto& test_case : test_cases1) {
+ EXPECT_EQ(test_case.expect, translator.OffsetToRva(test_case.input));
+ EXPECT_EQ(test_case.expect, offset_to_rva.Convert(test_case.input));
+ }
+
+ // RVAs to offsets.
+ RvaToOffsetTestCase test_cases2[] = {
+ {0U, kInvalidOffset},
+ {99U, kInvalidOffset},
+ {100U, 10U},
+ {110U, 20U},
+ {119U, 29U},
+ // Dangling RVAs to fake offsets.
+ {120U, fake_offset_begin + 120U},
+ {126U, fake_offset_begin + 126U},
+ {127U, kInvalidOffset},
+ };
+ for (auto& test_case : test_cases2) {
+ EXPECT_EQ(test_case.expect, translator.RvaToOffset(test_case.input));
+ EXPECT_EQ(test_case.expect, rva_to_offset.Convert(test_case.input));
+ }
+}
+
+TEST(AddressTranslatorTest, BasicUsage) {
+ using AT = AddressTranslator;
+ TestAddressTranslator translator;
+ // Offsets covered: [10, 30), [40, 70), [70, 110).
+ // Map to RVAs: [200, 220 + 5), [300, 330), [100, 140), so has dangling RVAs.
+ auto result = translator.Initialize({
+ {10U, +20U, 200U, +20U + 5U}, // Has dangling RVAs.
+ {40U, +30U, 300U, +20U}, // Extra offset truncated and ignored.
+ {50U, +20U, 310U, +20U}, // Overlap with previous: Merged.
+ {70U, +40U, 100U, +20U}, // Tangent with previous but inconsistent; extra
+ // offset truncated and ignored.
+ {90U, +20U, 120U, +20U}, // Tangent with previous and consistent: Merged.
+ });
+ EXPECT_EQ(AT::kSuccess, result);
+ offset_t fake_offset_begin = translator.fake_offset_begin();
+ EXPECT_EQ(110U, fake_offset_begin); // Test implementation detail.
+
+ // Optimized versions.
+ AddressTranslator::OffsetToRvaCache offset_to_rva(translator);
+ AddressTranslator::RvaToOffsetCache rva_to_offset(translator);
+
+ // Offsets to RVAs.
+ OffsetToRvaTestCase test_cases1[] = {
+ {0U, kInvalidRva},
+ {9U, kInvalidRva},
+ {10U, 200U},
+ {20U, 210U},
+ {29U, 219U},
+ {30U, kInvalidRva},
+ {39U, kInvalidRva},
+ {40U, 300U},
+ {55U, 315U},
+ {69U, 329U},
+ {70U, 100U},
+ {90U, 120U},
+ {109U, 139U},
+ {110U, kInvalidRva},
+ // Fake offsets to dangling RVAs.
+ {fake_offset_begin + 220U, 220U},
+ {fake_offset_begin + 224U, 224U},
+ {fake_offset_begin + 225U, kInvalidRva},
+ };
+ for (auto& test_case : test_cases1) {
+ EXPECT_EQ(test_case.expect, translator.OffsetToRva(test_case.input));
+ EXPECT_EQ(test_case.expect, offset_to_rva.Convert(test_case.input));
+ }
+
+ // RVAs to offsets.
+ RvaToOffsetTestCase test_cases2[] = {
+ {0U, kInvalidOffset},
+ {99U, kInvalidOffset},
+ {100U, 70U},
+ {120U, 90U},
+ {139U, 109U},
+ {140U, kInvalidOffset},
+ {199U, kInvalidOffset},
+ {200U, 10U},
+ {210U, 20U},
+ {219U, 29U},
+ {225U, kInvalidOffset},
+ {299U, kInvalidOffset},
+ {300U, 40U},
+ {315U, 55U},
+ {329U, 69U},
+ {330U, kInvalidOffset},
+ // Dangling RVAs to fake offsets.
+ {220U, fake_offset_begin + 220U},
+ {224U, fake_offset_begin + 224U},
+ {225U, kInvalidOffset},
+ };
+ for (auto& test_case : test_cases2) {
+ EXPECT_EQ(test_case.expect, translator.RvaToOffset(test_case.input));
+ EXPECT_EQ(test_case.expect, rva_to_offset.Convert(test_case.input));
+ }
+}
+
+TEST(AddressTranslatorTest, Overflow) {
+ using AT = AddressTranslator;
+ // Test assumes that offset_t and rva_t to be 32-bit.
+ static_assert(sizeof(offset_t) == 4 && sizeof(rva_t) == 4,
+ "Needs to update test.");
+ {
+ AddressTranslator translator1;
+ EXPECT_EQ(AT::kErrorOverflow,
+ translator1.Initialize({{0, +0xC0000000U, 0, +0xC0000000U}}));
+ }
+ {
+ AddressTranslator translator2;
+ EXPECT_EQ(AT::kErrorOverflow,
+ translator2.Initialize({{0, +0, 0, +0xC0000000U}}));
+ }
+ {
+ // Units are okay, owing to but limitations of the heuristic to convert
+ // dangling RVA to fake offset, AddressTranslator::Initialize() fails.
+ AddressTranslator translator3;
+ EXPECT_EQ(AT::kErrorFakeOffsetBeginTooLarge,
+ translator3.Initialize(
+ {{32, +0, 32, +0x50000000U}, {0x50000000U, +16, 0, +16}}));
+ }
+}
+
+// Sanity test for TestAddressTranslator::InitializeWithStrings();
+TEST(AddressTranslatorTest, AddUnitAsString) {
+ using AT = AddressTranslator;
+ {
+ TestAddressTranslator translator1;
+ EXPECT_EQ(AT::kSuccess, translator1.InitializeWithStrings({"..A..|.aaa."}));
+ AddressTranslator::Unit unit1 = translator1.units_sorted_by_offset()[0];
+ EXPECT_EQ(2U, unit1.offset_begin);
+ EXPECT_EQ(+1U, unit1.offset_size);
+ EXPECT_EQ(1U, unit1.rva_begin);
+ EXPECT_EQ(+3U, unit1.rva_size);
+ }
+ {
+ TestAddressTranslator translator2;
+ EXPECT_EQ(AT::kSuccess,
+ translator2.InitializeWithStrings({".....!...|.bbbbbb..."}));
+ AddressTranslator::Unit unit2 = translator2.units_sorted_by_offset()[0];
+ EXPECT_EQ(5U, unit2.offset_begin);
+ EXPECT_EQ(+0U, unit2.offset_size);
+ EXPECT_EQ(1U, unit2.rva_begin);
+ EXPECT_EQ(+6U, unit2.rva_size);
+ }
+}
+
+// AddressTranslator::Initialize() lists Unit merging examples in comments. The
+// format is different from that used by InitializeWithStrings(), but adapting
+// them is easy, so we may as well do so.
+TEST(AddressTranslatorTest, OverlapFromComment) {
+ using AT = AddressTranslator;
+ constexpr auto OK = AT::kSuccess;
+ struct {
+ const char* rva_str; // RVA comes first in this case.
+ const char* offset_str;
+ AT::Status expected;
+ } test_cases[] = {
+ {"..ssssffff..", "..SSSSFFFF..", OK},
+ {"..ssssffff..", "..SSSS..FFFF..", OK},
+ {"..ssssffff..", "..FFFF..SSSS..", OK},
+ {"..ssssffff..", "..SSOOFF..", AT::kErrorBadOverlap},
+ {"..sssooofff..", "..SSSOOOFFF..", OK},
+ {"..sssooofff..", "..SSSSSOFFFFF..", AT::kErrorBadOverlap},
+ {"..sssooofff..", "..FFOOOOSS..", AT::kErrorBadOverlap},
+ {"..sssooofff..", "..SSSOOOF..", OK},
+ {"..sssooofff..", "..SSSOOOF..", OK},
+ {"..sssooosss..", "..SSSOOOS..", OK},
+ {"..sssooofff..", "..SSSOO..", OK},
+ {"..sssooofff..", "..SSSOFFF..", AT::kErrorBadOverlapDanglingRva},
+ {"..sssooosss..", "..SSSOOSSSS..", AT::kErrorBadOverlapDanglingRva},
+ {"..oooooo..", "..OOO..", OK},
+ };
+
+ auto to_period = [](std::string s, char ch) { // |s| passed by value.
+ std::replace(s.begin(), s.end(), ch, '.');
+ return s;
+ };
+
+ size_t idx = 0;
+ for (const auto& test_case : test_cases) {
+ std::string base_str =
+ std::string(test_case.offset_str) + "|" + test_case.rva_str;
+ std::string unit_str1 = to_period(to_period(base_str, 'S'), 's');
+ std::string unit_str2 = to_period(to_period(base_str, 'F'), 'f');
+ SimpleTest({unit_str1, unit_str2}, test_case.expected,
+ base::StringPrintf("Case #%" PRIuS, idx));
+ ++idx;
+ }
+}
+
+TEST(AddressTranslatorTest, Overlap) {
+ using AT = AddressTranslator;
+ constexpr auto OK = AT::kSuccess;
+ constexpr const char* unit_str1 = "....AAA.......|.....aaa......";
+
+ std::vector<TwoUnitOverlapTester::TestCase> test_cases = {
+ //....AAA.......|.....aaa...... The first Unit. NOLINT
+ {"....BBB.......|.....bbb......", OK},
+ {"..BBB.........|...bbb........", OK},
+ {"......BBB.....|.......bbb....", OK},
+ {"..BBBBBBBBB...|...bbb........", OK}, // Extra offset get truncated.
+ {"......BBBBBBBB|.......bbb....", OK},
+ {"....BBB.......|.......bbb....", AT::kErrorBadOverlap},
+ {"..BBB.........|.......bbb....", AT::kErrorBadOverlap},
+ {".......BBB....|.......bbb....", AT::kErrorBadOverlap},
+ //....AAA.......|.....aaa...... The first Unit. NOLINT
+ {"....BBB.......|..........bbb.", AT::kErrorBadOverlap},
+ {"..........BBB.|.......bbb....", AT::kErrorBadOverlap},
+ {"......BBB.....|.....bbb......", AT::kErrorBadOverlap},
+ {"......BBB.....|..bbb.........", AT::kErrorBadOverlap},
+ {"......BBB.....|bbb...........", AT::kErrorBadOverlap},
+ {"BBB...........|bbb...........", OK}, // Disjoint.
+ {"........BBB...|.........bbb..", OK}, // Disjoint.
+ {"BBB...........|..........bbb.", OK}, // Disjoint, offset elsewhere.
+ //....AAA.......|.....aaa...... The first Unit. NOLINT
+ {".BBB..........|..bbb.........", OK}, // Tangent.
+ {".......BBB....|........bbb...", OK}, // Tangent.
+ {".BBB..........|........bbb...", OK}, // Tangent, offset elsewhere.
+ {"BBBBBB........|bbb...........", OK}, // Repeat, with extra offsets.
+ {"........BBBB..|.........bbb..", OK},
+ {"BBBBBB........|..........bbb.", OK},
+ {".BBBBBB.......|..bbb.........", OK},
+ {".......BBBBB..|........bbb...", OK},
+ //....AAA.......|.....aaa...... The first Unit. NOLINT
+ {".BBB..........|........bbb...", OK}, // Tangent, offset elsewhere.
+ {"..BBB.........|........bbb...", AT::kErrorBadOverlap},
+ {"...BB.........|....bb........", OK},
+ {"....BB........|.....bb.......", OK},
+ {".......BB.....|........bb....", OK},
+ {"...BBBBBB.....|....bbbbbb....", OK},
+ {"..BBBBBB......|...bbbbbb.....", OK},
+ {"......BBBBBB..|.......bbbbbb.", OK},
+ //....AAA.......|.....aaa...... The first Unit. NOLINT
+ {"BBBBBBBBBBBBBB|bbbbbbbbbbbbbb", AT::kErrorBadOverlap},
+ {"B.............|b.............", OK},
+ {"B.............|.............b", OK},
+ {"....B.........|.....b........", OK},
+ {"....B.........|......b.......", AT::kErrorBadOverlap},
+ {"....B.........|......b.......", AT::kErrorBadOverlap},
+ {"....BBB.......|.....bb.......", OK},
+ {"....BBBB......|.....bbb......", OK},
+ //....AAA.......|.....aaa...... The first Unit. NOLINT
+ {".........BBBBB|.b............", OK},
+ {"....AAA.......|.....!........", OK},
+ {"....!.........|.....!........", OK}, // Empty units gets deleted early.
+ {"....!.........|..........!...", OK}, // Forgiving!
+ };
+
+ TwoUnitOverlapTester::RunTest(unit_str1, test_cases);
+}
+
+TEST(AddressTranslatorTest, OverlapOffsetMultiple) {
+ using AT = AddressTranslator;
+ // Simple case. Note that RVA ranges don't get merged.
+ SimpleTest({"A..|a....", //
+ ".A.|..a..", //
+ "..A|....a"},
+ AT::kSuccess, "Case #0");
+
+ // Offset range 1 overlaps 2 and 3, but truncation takes place to trim down
+ // offset ranges, so still successful.
+ SimpleTest({"..A|a....", //
+ ".AA|..a..", //
+ "AAA|....a"},
+ AT::kSuccess, "Case #1");
+
+ // Offset range 2 and 3 overlap, so fail.
+ SimpleTest({"A..|a....", //
+ ".A.|..a..", //
+ ".A.|....a"},
+ AT::kErrorBadOverlap, "Case #2");
+}
+
+TEST(AddressTranslatorTest, OverlapDangling) {
+ using AT = AddressTranslator;
+ constexpr auto OK = AT::kSuccess;
+ // First Unit has dangling offsets at
+ constexpr const char* unit_str1 = "....AAA.......|.....aaaaaa...";
+
+ std::vector<TwoUnitOverlapTester::TestCase> test_cases = {
+ //....AAA.......|.....aaaaaa... The first Unit. NOLINT
+ {"....BBB.......|.....bbbbbb...", OK},
+ {"....BBB.......|.....bbbbb....", OK},
+ {"....BBB.......|.....bbbb.....", OK},
+ {"....BBB.......|.....bbb......", OK},
+ {".....BBB......|......bbb.....", AT::kErrorBadOverlapDanglingRva},
+ {".....BB.......|......bbb.....", OK},
+ {"....BBB.......|.....bbbbbbbb.", OK},
+ {"..BBBBB.......|...bbbbbbbb...", OK},
+ //....AAA.......|.....aaaaaa... The first Unit. NOLINT
+ {"......!.......|.bbb..........", AT::kErrorBadOverlap},
+ {"..BBBBB.......|...bbbbb......", OK},
+ {".......BBB....|.bbb..........", OK}, // Just tangent: Can go elsewhere.
+ {".......BBB....|.bbbb.........", OK}, // Can be another dangling RVA.
+ {".......!......|.bbbb.........", OK}, // Same with empty.
+ {"......!.......|.......!......", OK}, // Okay, but gets deleted.
+ {"......!.......|.......b......", AT::kErrorBadOverlapDanglingRva},
+ {"......B.......|.......b......", OK},
+ //....AAA.......|.....aaaaaa... The first Unit. NOLINT
+ {"......BBBB....|.......bbbb...", AT::kErrorBadOverlapDanglingRva},
+ {"......BB......|.......bb.....", AT::kErrorBadOverlapDanglingRva},
+ {"......BB......|bb............", AT::kErrorBadOverlap},
+ };
+
+ TwoUnitOverlapTester::RunTest(unit_str1, test_cases);
+}
+
+// Tests implementation since algorithm is tricky.
+TEST(AddressTranslatorTest, Merge) {
+ using AT = AddressTranslator;
+ // Merge a bunch of overlapping Units into one big Unit.
+ std::vector<std::string> test_case1 = {
+ "AAA.......|.aaa......", // Comment to prevent wrap by formatter.
+ "AA........|.aa.......", //
+ "..AAA.....|...aaa....", //
+ "....A.....|.....a....", //
+ ".....AAA..|......aaa.", //
+ "........A.|.........a", //
+ };
+ // Try all 6! permutations.
+ std::sort(test_case1.begin(), test_case1.end());
+ do {
+ TestAddressTranslator translator1;
+ EXPECT_EQ(AT::kSuccess, translator1.InitializeWithStrings(test_case1));
+ EXPECT_EQ(9U, translator1.fake_offset_begin());
+
+ AT::Unit expected{0U, +9U, 1U, +9U};
+ EXPECT_EQ(1U, translator1.units_sorted_by_offset().size());
+ EXPECT_EQ(expected, translator1.units_sorted_by_offset()[0]);
+ EXPECT_EQ(1U, translator1.units_sorted_by_rva().size());
+ EXPECT_EQ(expected, translator1.units_sorted_by_rva()[0]);
+ } while (std::next_permutation(test_case1.begin(), test_case1.end()));
+
+ // Merge RVA-adjacent Units into two Units.
+ std::vector<std::string> test_case2 = {
+ ".....A..|.a......", // First Unit.
+ "......A.|..a.....", //
+ "A.......|...a....", // Second Unit: RVA-adjacent to first Unit, but
+ ".A......|....a...", // offset would become inconsistent, so a new
+ "..A.....|.....a..", // Unit gets created.
+ };
+ // Try all 5! permutations.
+ std::sort(test_case2.begin(), test_case2.end());
+ do {
+ TestAddressTranslator translator2;
+ EXPECT_EQ(AT::kSuccess, translator2.InitializeWithStrings(test_case2));
+ EXPECT_EQ(7U, translator2.fake_offset_begin());
+
+ AT::Unit expected1{0U, +3U, 3U, +3U};
+ AT::Unit expected2{5U, +2U, 1U, +2U};
+ EXPECT_EQ(2U, translator2.units_sorted_by_offset().size());
+ EXPECT_EQ(expected1, translator2.units_sorted_by_offset()[0]);
+ EXPECT_EQ(expected2, translator2.units_sorted_by_offset()[1]);
+ EXPECT_EQ(2U, translator2.units_sorted_by_rva().size());
+ EXPECT_EQ(expected2, translator2.units_sorted_by_rva()[0]);
+ EXPECT_EQ(expected1, translator2.units_sorted_by_rva()[1]);
+ } while (std::next_permutation(test_case2.begin(), test_case2.end()));
+}
+
+} // namespace zucchini
diff --git a/algorithm.h b/algorithm.h
new file mode 100644
index 0000000..7143a95
--- /dev/null
+++ b/algorithm.h
@@ -0,0 +1,84 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_ALGORITHM_H_
+#define COMPONENTS_ZUCCHINI_ALGORITHM_H_
+
+#include <stddef.h>
+
+#include <algorithm>
+#include <type_traits>
+#include <vector>
+
+#include "base/logging.h"
+
+// Collection of simple utilities used in for low-level computation.
+
+namespace zucchini {
+
+// Safely determines whether |[begin, begin + size)| is in |[0, bound)|. Note:
+// The special case |[bound, bound)| is not considered to be in |[0, bound)|.
+template <typename T>
+bool RangeIsBounded(T begin, T size, size_t bound) {
+ static_assert(std::is_unsigned<T>::value, "Value type must be unsigned.");
+ return begin < bound && size <= bound - begin;
+}
+
+// Safely determines whether |value| lies in |[begin, begin + size)|. Works
+// properly even if |begin + size| overflows -- although such ranges are
+// considered pathological, and should fail validation elsewhere.
+template <typename T>
+bool RangeCovers(T begin, T size, T value) {
+ static_assert(std::is_unsigned<T>::value, "Value type must be unsigned.");
+ return begin <= value && value - begin < size;
+}
+
+// Returns the integer in inclusive range |[lo, hi]| that's closest to |value|.
+// This departs from the usual usage of semi-inclusive ranges, but is useful
+// because (1) sentinels can use this, (2) a valid output always exists. It is
+// assumed that |lo <= hi|.
+template <class T>
+T InclusiveClamp(T value, T lo, T hi) {
+ static_assert(std::is_unsigned<T>::value, "Value type must be unsigned.");
+ DCHECK_LE(lo, hi);
+ return value <= lo ? lo : (value >= hi ? hi : value);
+}
+
+// Returns the minimum multiple of |m| that's no less than |x|. Assumes |m > 0|
+// and |x| is sufficiently small so that no overflow occurs.
+template <class T>
+constexpr T ceil(T x, T m) {
+ static_assert(std::is_unsigned<T>::value, "Value type must be unsigned.");
+ return T((x + m - 1) / m) * m;
+}
+
+// Sorts values in |container| and removes duplicates.
+template <class T>
+void SortAndUniquify(std::vector<T>* container) {
+ std::sort(container->begin(), container->end());
+ container->erase(std::unique(container->begin(), container->end()),
+ container->end());
+ container->shrink_to_fit();
+}
+
+// Copies bits at |pos| in |v| to all higher bits, and returns the result as the
+// same int type as |v|.
+template <typename T>
+constexpr T SignExtend(int pos, T v) {
+ int kNumBits = sizeof(T) * 8;
+ int kShift = kNumBits - 1 - pos;
+ return static_cast<typename std::make_signed<T>::type>(v << kShift) >> kShift;
+}
+
+// Optimized version where |pos| becomes a template parameter.
+template <int pos, typename T>
+constexpr T SignExtend(T v) {
+ constexpr int kNumBits = sizeof(T) * 8;
+ constexpr int kShift = kNumBits - 1 - pos;
+ return static_cast<typename std::make_signed<T>::type>(v << kShift) >> kShift;
+}
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_ALGORITHM_H_
diff --git a/algorithm_unittest.cc b/algorithm_unittest.cc
new file mode 100644
index 0000000..2c685db
--- /dev/null
+++ b/algorithm_unittest.cc
@@ -0,0 +1,206 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/algorithm.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "base/logging.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+namespace {
+
+// Casting functions to specify signed 8-bit and 16-bit integer constants.
+// For example, signed8(0xFF) == int8_t(-1).
+inline int8_t signed8(uint8_t v) {
+ return *reinterpret_cast<const int8_t*>(&v);
+}
+
+inline int32_t signed16(uint16_t v) {
+ return *reinterpret_cast<const int16_t*>(&v);
+}
+
+} // namespace
+
+TEST(AlgorithmTest, RangeIsBounded) {
+ // Basic tests.
+ EXPECT_TRUE(RangeIsBounded<uint8_t>(0U, +0U, 10U));
+ EXPECT_TRUE(RangeIsBounded<uint8_t>(0U, +10U, 10U));
+ EXPECT_TRUE(RangeIsBounded<uint8_t>(1U, +9U, 10U));
+ EXPECT_FALSE(RangeIsBounded<uint8_t>(1U, +10U, 10U));
+ EXPECT_TRUE(RangeIsBounded<uint8_t>(8U, +1U, 10U));
+ EXPECT_TRUE(RangeIsBounded<uint8_t>(8U, +2U, 10U));
+ EXPECT_TRUE(RangeIsBounded<uint8_t>(9U, +0U, 10U));
+ EXPECT_FALSE(RangeIsBounded<uint8_t>(10U, +0U, 10U)); // !
+ EXPECT_FALSE(RangeIsBounded<uint8_t>(100U, +0U, 10U));
+ EXPECT_FALSE(RangeIsBounded<uint8_t>(100U, +1U, 10U));
+
+ // Test at boundary of overflow.
+ EXPECT_TRUE(RangeIsBounded<uint8_t>(42U, +137U, 255U));
+ EXPECT_TRUE(RangeIsBounded<uint8_t>(0U, +255U, 255U));
+ EXPECT_TRUE(RangeIsBounded<uint8_t>(1U, +254U, 255U));
+ EXPECT_FALSE(RangeIsBounded<uint8_t>(1U, +255U, 255U));
+ EXPECT_TRUE(RangeIsBounded<uint8_t>(254U, +0U, 255U));
+ EXPECT_TRUE(RangeIsBounded<uint8_t>(254U, +1U, 255U));
+ EXPECT_FALSE(RangeIsBounded<uint8_t>(255U, +0U, 255U));
+ EXPECT_FALSE(RangeIsBounded<uint8_t>(255U, +3U, 255U));
+
+ // Test with uint32_t.
+ EXPECT_TRUE(RangeIsBounded<uint32_t>(0U, +0x1000U, 0x2000U));
+ EXPECT_TRUE(RangeIsBounded<uint32_t>(0x0FFFU, +0x1000U, 0x2000U));
+ EXPECT_TRUE(RangeIsBounded<uint32_t>(0x1000U, +0x1000U, 0x2000U));
+ EXPECT_FALSE(RangeIsBounded<uint32_t>(0x1000U, +0x1001U, 0x2000U));
+ EXPECT_TRUE(RangeIsBounded<uint32_t>(0x1FFFU, +1U, 0x2000U));
+ EXPECT_FALSE(RangeIsBounded<uint32_t>(0x2000U, +0U, 0x2000U)); // !
+ EXPECT_FALSE(RangeIsBounded<uint32_t>(0x3000U, +0U, 0x2000U));
+ EXPECT_FALSE(RangeIsBounded<uint32_t>(0x3000U, +1U, 0x2000U));
+ EXPECT_TRUE(RangeIsBounded<uint32_t>(0U, +0xFFFFFFFEU, 0xFFFFFFFFU));
+ EXPECT_TRUE(RangeIsBounded<uint32_t>(0U, +0xFFFFFFFFU, 0xFFFFFFFFU));
+ EXPECT_TRUE(RangeIsBounded<uint32_t>(1U, +0xFFFFFFFEU, 0xFFFFFFFFU));
+ EXPECT_FALSE(RangeIsBounded<uint32_t>(1U, +0xFFFFFFFFU, 0xFFFFFFFFU));
+ EXPECT_TRUE(RangeIsBounded<uint32_t>(0x80000000U, +0x7FFFFFFFU, 0xFFFFFFFFU));
+ EXPECT_FALSE(
+ RangeIsBounded<uint32_t>(0x80000000U, +0x80000000U, 0xFFFFFFFFU));
+ EXPECT_TRUE(RangeIsBounded<uint32_t>(0xFFFFFFFEU, +1U, 0xFFFFFFFFU));
+ EXPECT_FALSE(RangeIsBounded<uint32_t>(0xFFFFFFFFU, +0U, 0xFFFFFFFFU)); // !
+ EXPECT_FALSE(
+ RangeIsBounded<uint32_t>(0xFFFFFFFFU, +0xFFFFFFFFU, 0xFFFFFFFFU));
+}
+
+TEST(AlgorithmTest, RangeCovers) {
+ // Basic tests.
+ EXPECT_TRUE(RangeCovers<uint8_t>(0U, +10U, 0U));
+ EXPECT_TRUE(RangeCovers<uint8_t>(0U, +10U, 5U));
+ EXPECT_TRUE(RangeCovers<uint8_t>(0U, +10U, 9U));
+ EXPECT_FALSE(RangeCovers<uint8_t>(0U, +10U, 10U));
+ EXPECT_FALSE(RangeCovers<uint8_t>(0U, +10U, 100U));
+ EXPECT_FALSE(RangeCovers<uint8_t>(0U, +10U, 255U));
+
+ EXPECT_FALSE(RangeCovers<uint8_t>(42U, +137U, 0U));
+ EXPECT_FALSE(RangeCovers<uint8_t>(42U, +137U, 41U));
+ EXPECT_TRUE(RangeCovers<uint8_t>(42U, +137U, 42U));
+ EXPECT_TRUE(RangeCovers<uint8_t>(42U, +137U, 100U));
+ EXPECT_TRUE(RangeCovers<uint8_t>(42U, +137U, 178U));
+ EXPECT_FALSE(RangeCovers<uint8_t>(42U, +137U, 179U));
+ EXPECT_FALSE(RangeCovers<uint8_t>(42U, +137U, 255U));
+
+ // 0-size ranges.
+ EXPECT_FALSE(RangeCovers<uint8_t>(42U, +0U, 41U));
+ EXPECT_FALSE(RangeCovers<uint8_t>(42U, +0U, 42U));
+ EXPECT_FALSE(RangeCovers<uint8_t>(42U, +0U, 43U));
+
+ // Test at boundary of overflow.
+ EXPECT_TRUE(RangeCovers<uint8_t>(254U, +1U, 254U));
+ EXPECT_FALSE(RangeCovers<uint8_t>(254U, +1U, 255U));
+ EXPECT_FALSE(RangeCovers<uint8_t>(255U, +0U, 255U));
+ EXPECT_TRUE(RangeCovers<uint8_t>(255U, +1U, 255U));
+ EXPECT_FALSE(RangeCovers<uint8_t>(255U, +5U, 0U));
+
+ // Test with unit32_t.
+ EXPECT_FALSE(RangeCovers<uint32_t>(1234567U, +7654321U, 0U));
+ EXPECT_FALSE(RangeCovers<uint32_t>(1234567U, +7654321U, 1234566U));
+ EXPECT_TRUE(RangeCovers<uint32_t>(1234567U, +7654321U, 1234567U));
+ EXPECT_TRUE(RangeCovers<uint32_t>(1234567U, +7654321U, 4444444U));
+ EXPECT_TRUE(RangeCovers<uint32_t>(1234567U, +7654321U, 8888887U));
+ EXPECT_FALSE(RangeCovers<uint32_t>(1234567U, +7654321U, 8888888U));
+ EXPECT_FALSE(RangeCovers<uint32_t>(1234567U, +7654321U, 0x80000000U));
+ EXPECT_FALSE(RangeCovers<uint32_t>(1234567U, +7654321U, 0xFFFFFFFFU));
+ EXPECT_FALSE(RangeCovers<uint32_t>(0xFFFFFFFFU, +0, 0xFFFFFFFFU));
+ EXPECT_TRUE(RangeCovers<uint32_t>(0xFFFFFFFFU, +1, 0xFFFFFFFFU));
+ EXPECT_FALSE(RangeCovers<uint32_t>(0xFFFFFFFFU, +2, 0));
+}
+
+TEST(AlgorithmTest, InclusiveClamp) {
+ EXPECT_EQ(1U, InclusiveClamp<uint32_t>(0U, 1U, 9U));
+ EXPECT_EQ(1U, InclusiveClamp<uint32_t>(1U, 1U, 9U));
+ EXPECT_EQ(5U, InclusiveClamp<uint32_t>(5U, 1U, 9U));
+ EXPECT_EQ(8U, InclusiveClamp<uint32_t>(8U, 1U, 9U));
+ EXPECT_EQ(9U, InclusiveClamp<uint32_t>(9U, 1U, 9U));
+ EXPECT_EQ(9U, InclusiveClamp<uint32_t>(10U, 1U, 9U));
+ EXPECT_EQ(9U, InclusiveClamp<uint32_t>(0xFFFFFFFFU, 1U, 9U));
+ EXPECT_EQ(42U, InclusiveClamp<uint32_t>(0U, 42U, 42U));
+ EXPECT_EQ(42U, InclusiveClamp<uint32_t>(41U, 42U, 42U));
+ EXPECT_EQ(42U, InclusiveClamp<uint32_t>(42U, 42U, 42U));
+ EXPECT_EQ(42U, InclusiveClamp<uint32_t>(43U, 42U, 42U));
+ EXPECT_EQ(0U, InclusiveClamp<uint32_t>(0U, 0U, 0U));
+ EXPECT_EQ(0xFFFFFFFF,
+ InclusiveClamp<uint32_t>(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF));
+}
+
+TEST(AlgorithmTest, Ceil) {
+ EXPECT_EQ(0U, ceil<uint32_t>(0U, 2U));
+ EXPECT_EQ(2U, ceil<uint32_t>(1U, 2U));
+ EXPECT_EQ(2U, ceil<uint32_t>(2U, 2U));
+ EXPECT_EQ(4U, ceil<uint32_t>(3U, 2U));
+ EXPECT_EQ(4U, ceil<uint32_t>(4U, 2U));
+ EXPECT_EQ(11U, ceil<uint32_t>(10U, 11U));
+ EXPECT_EQ(11U, ceil<uint32_t>(11U, 11U));
+ EXPECT_EQ(22U, ceil<uint32_t>(12U, 11U));
+ EXPECT_EQ(22U, ceil<uint32_t>(21U, 11U));
+ EXPECT_EQ(22U, ceil<uint32_t>(22U, 11U));
+ EXPECT_EQ(33U, ceil<uint32_t>(23U, 11U));
+}
+
+TEST(AlgorithmTest, SignExtend) {
+ // 0x6A = 0b0110'1010.
+ EXPECT_EQ(uint8_t(0x00), (SignExtend<uint8_t>(0, 0x6A)));
+ EXPECT_EQ(signed8(0xFE), (SignExtend<int8_t>(1, signed8(0x6A))));
+ EXPECT_EQ(uint8_t(0x02), (SignExtend<uint8_t>(2, 0x6A)));
+ EXPECT_EQ(signed8(0xFA), (SignExtend<int8_t>(3, signed8(0x6A))));
+ EXPECT_EQ(uint8_t(0x0A), (SignExtend<uint8_t>(4, 0x6A)));
+ EXPECT_EQ(signed8(0xEA), (SignExtend<int8_t>(5, signed8(0x6A))));
+ EXPECT_EQ(uint8_t(0xEA), (SignExtend<uint8_t>(6, 0x6A)));
+ EXPECT_EQ(signed8(0x6A), (SignExtend<int8_t>(7, signed8(0x6A))));
+
+ EXPECT_EQ(signed16(0xFFFA), (SignExtend<int16_t>(3, 0x6A)));
+ EXPECT_EQ(uint16_t(0x000A), (SignExtend<uint16_t>(4, 0x6A)));
+
+ EXPECT_EQ(int32_t(0xFFFF8000), (SignExtend<int32_t>(15, 0x00008000)));
+ EXPECT_EQ(uint32_t(0x00008000U), (SignExtend<uint32_t>(16, 0x00008000)));
+ EXPECT_EQ(int32_t(0xFFFFFC00), (SignExtend<int32_t>(10, 0x00000400)));
+ EXPECT_EQ(uint32_t(0xFFFFFFFFU), (SignExtend<uint32_t>(31, 0xFFFFFFFF)));
+
+ EXPECT_EQ(int64_t(0xFFFFFFFFFFFFFE6ALL),
+ (SignExtend<int64_t>(9, 0x000000000000026ALL)));
+ EXPECT_EQ(int64_t(0x000000000000016ALL),
+ (SignExtend<int64_t>(9, 0xFFFFFFFFFFFFFD6ALL)));
+ EXPECT_EQ(uint64_t(0xFFFFFFFFFFFFFE6AULL),
+ (SignExtend<uint64_t>(9, 0x000000000000026AULL)));
+ EXPECT_EQ(uint64_t(0x000000000000016AULL),
+ (SignExtend<uint64_t>(9, 0xFFFFFFFFFFFFFD6AULL)));
+}
+
+TEST(AlgorithmTest, SignExtendTemplated) {
+ // 0x6A = 0b0110'1010.
+ EXPECT_EQ(uint8_t(0x00), (SignExtend<0, uint8_t>(0x6A)));
+ EXPECT_EQ(signed8(0xFE), (SignExtend<1, int8_t>(signed8(0x6A))));
+ EXPECT_EQ(uint8_t(0x02), (SignExtend<2, uint8_t>(0x6A)));
+ EXPECT_EQ(signed8(0xFA), (SignExtend<3, int8_t>(signed8(0x6A))));
+ EXPECT_EQ(uint8_t(0x0A), (SignExtend<4, uint8_t>(0x6A)));
+ EXPECT_EQ(signed8(0xEA), (SignExtend<5, int8_t>(signed8(0x6A))));
+ EXPECT_EQ(uint8_t(0xEA), (SignExtend<6, uint8_t>(0x6A)));
+ EXPECT_EQ(signed8(0x6A), (SignExtend<7, int8_t>(signed8(0x6A))));
+
+ EXPECT_EQ(signed16(0xFFFA), (SignExtend<3, int16_t>(0x6A)));
+ EXPECT_EQ(uint16_t(0x000A), (SignExtend<4, uint16_t>(0x6A)));
+
+ EXPECT_EQ(int32_t(0xFFFF8000), (SignExtend<15, int32_t>(0x00008000)));
+ EXPECT_EQ(uint32_t(0x00008000U), (SignExtend<16, uint32_t>(0x00008000)));
+ EXPECT_EQ(int32_t(0xFFFFFC00), (SignExtend<10, int32_t>(0x00000400)));
+ EXPECT_EQ(uint32_t(0xFFFFFFFFU), (SignExtend<31, uint32_t>(0xFFFFFFFF)));
+
+ EXPECT_EQ(int64_t(0xFFFFFFFFFFFFFE6ALL),
+ (SignExtend<9, int64_t>(0x000000000000026ALL)));
+ EXPECT_EQ(int64_t(0x000000000000016ALL),
+ (SignExtend<9, int64_t>(0xFFFFFFFFFFFFFD6ALL)));
+ EXPECT_EQ(uint64_t(0xFFFFFFFFFFFFFE6AULL),
+ (SignExtend<9, uint64_t>(0x000000000000026AULL)));
+ EXPECT_EQ(uint64_t(0x000000000000016AULL),
+ (SignExtend<9, uint64_t>(0xFFFFFFFFFFFFFD6AULL)));
+}
+
+} // namespace zucchini
diff --git a/binary_data_histogram.cc b/binary_data_histogram.cc
new file mode 100644
index 0000000..785e8ea
--- /dev/null
+++ b/binary_data_histogram.cc
@@ -0,0 +1,91 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/binary_data_histogram.h"
+
+#include <algorithm>
+#include <cmath>
+#include <limits>
+
+#include "base/format_macros.h"
+#include "base/logging.h"
+#include "base/strings/stringprintf.h"
+
+namespace zucchini {
+
+/******** OutlierDetector ********/
+
+OutlierDetector::OutlierDetector() = default;
+
+OutlierDetector::~OutlierDetector() = default;
+
+// For BinaryDataHistogram, |sample| is typically in interval [0, 1].
+void OutlierDetector::Add(double sample) {
+ ++n_;
+ sum_ += sample;
+ sum_of_squares_ += sample * sample;
+}
+
+void OutlierDetector::Prepare() {
+ if (n_ > 0) {
+ mean_ = sum_ / n_;
+ standard_deviation_ = ::sqrt((sum_of_squares_ - sum_ * mean_) /
+ std::max(static_cast<size_t>(1), n_ - 1));
+ }
+}
+
+std::string OutlierDetector::RenderStats() {
+ return base::StringPrintf("Mean = %.5f, StdDev = %.5f over %" PRIuS
+ " samples",
+ mean_, standard_deviation_, n_);
+}
+
+// Constants are chosen for BinaryDataHistogram, where |sample| is typically in
+// [0, 1].
+int OutlierDetector::DecideOutlier(double sample) {
+ // Lower bound to avoid divide-by-zero and penalizing tight clusters.
+ constexpr double kMinTolerance = 0.1;
+ // Number of standard deviations away from mean for value to become outlier.
+ constexpr double kSigmaBound = 1.9;
+ if (n_ <= 1)
+ return 0;
+ double tolerance = std::max(kMinTolerance, standard_deviation_);
+ double num_sigma = (sample - mean_) / tolerance;
+ return num_sigma > kSigmaBound ? 1 : num_sigma < -kSigmaBound ? -1 : 0;
+}
+
+/******** BinaryDataHistogram ********/
+
+BinaryDataHistogram::BinaryDataHistogram() = default;
+
+BinaryDataHistogram::~BinaryDataHistogram() = default;
+
+bool BinaryDataHistogram::Compute(ConstBufferView region) {
+ DCHECK(!histogram_);
+ // Binary data with size < 2 are invalid.
+ if (region.size() < sizeof(uint16_t))
+ return false;
+ DCHECK_LE(region.size(),
+ static_cast<size_t>(std::numeric_limits<int32_t>::max()));
+
+ histogram_ = std::make_unique<int32_t[]>(kNumBins);
+ size_ = region.size();
+ // Number of 2-byte intervals fully contained in |region|.
+ size_t bound = size_ - sizeof(uint16_t) + 1;
+ for (size_t i = 0; i < bound; ++i)
+ ++histogram_[region.read<uint16_t>(i)];
+ return true;
+}
+
+double BinaryDataHistogram::Distance(const BinaryDataHistogram& other) const {
+ DCHECK(IsValid() && other.IsValid());
+ // Compute Manhattan (L1) distance between respective histograms.
+ double total_diff = 0;
+ for (int i = 0; i < kNumBins; ++i)
+ total_diff += std::abs(histogram_[i] - other.histogram_[i]);
+ // Normalize by total size, so result lies in [0, 1].
+ return total_diff / (size_ + other.size_);
+}
+
+} // namespace zucchini
diff --git a/binary_data_histogram.h b/binary_data_histogram.h
new file mode 100644
index 0000000..3950ab7
--- /dev/null
+++ b/binary_data_histogram.h
@@ -0,0 +1,91 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_BINARY_DATA_HISTOGRAM_H_
+#define COMPONENTS_ZUCCHINI_BINARY_DATA_HISTOGRAM_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <memory>
+#include <string>
+
+#include "base/macros.h"
+#include "components/zucchini/buffer_view.h"
+
+namespace zucchini {
+
+// A class to detect outliers in a list of doubles using Chauvenet's criterion:
+// Compute mean and standard deviation of observations, then determine whether
+// a query value lies beyond a fixed number of standard deviations (sigmas) from
+// the mean. The purpose of this test is to reduce the chance of false-positive
+// ensemble matches.
+class OutlierDetector {
+ public:
+ OutlierDetector();
+ ~OutlierDetector();
+
+ // Incorporates |sample| into mean and standard deviation.
+ void Add(double sample);
+
+ // Prepares basic statistics for DecideOutlier() calls. Should be called after
+ // all samples have been added.
+ void Prepare();
+
+ // Renders current statistics as strings for logging.
+ std::string RenderStats();
+
+ // Heuristically decides whether |sample| is an outlier. Returns 1 if |sample|
+ // is "too high", 0 if |sample| is "normal", and -1 if |sample| is "too low".
+ // Must be called after Prepare().
+ int DecideOutlier(double sample);
+
+ private:
+ size_t n_ = 0;
+ double sum_ = 0;
+ double sum_of_squares_ = 0;
+ double mean_ = 0;
+ double standard_deviation_ = 0;
+
+ DISALLOW_COPY_AND_ASSIGN(OutlierDetector);
+};
+
+// A class to compute similarity score between binary data. The heuristic here
+// preprocesses input data to a size-65536 histogram, counting the frequency of
+// consecutive 2-byte sequences. Therefore data with lengths < 2 are considered
+// invalid -- but this is okay for Zucchini's use case.
+class BinaryDataHistogram {
+ public:
+ BinaryDataHistogram();
+ ~BinaryDataHistogram();
+
+ // Attempts to compute the histogram, returns true iff successful.
+ bool Compute(ConstBufferView region);
+
+ bool IsValid() const { return static_cast<bool>(histogram_); }
+
+ // Returns distance to another histogram (heuristics). If two binaries are
+ // identical then their histogram distance is 0. However, the converse is not
+ // true in general. For example, "aba" and "bab" are different, but their
+ // histogram distance is 0 (both histograms are {"ab": 1, "ba": 1}).
+ double Distance(const BinaryDataHistogram& other) const;
+
+ private:
+ enum { kNumBins = 1 << (sizeof(uint16_t) * 8) };
+ static_assert(kNumBins == 65536, "Incorrect constant computation.");
+
+ // Size, in bytes, of the data over which the histogram was computed.
+ size_t size_ = 0;
+
+ // 2^16 buckets holding counts of all 2-byte sequences in the data. The counts
+ // are stored as signed values to simplify computing the distance between two
+ // histograms.
+ std::unique_ptr<int32_t[]> histogram_;
+
+ DISALLOW_COPY_AND_ASSIGN(BinaryDataHistogram);
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_BINARY_DATA_HISTOGRAM_H_
diff --git a/binary_data_histogram_unittest.cc b/binary_data_histogram_unittest.cc
new file mode 100644
index 0000000..ca71010
--- /dev/null
+++ b/binary_data_histogram_unittest.cc
@@ -0,0 +1,132 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/binary_data_histogram.h"
+
+#include <stddef.h>
+
+#include <memory>
+#include <vector>
+
+#include "components/zucchini/buffer_view.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+TEST(OutlierDetectorTest, Basic) {
+ auto make_detector = [](const std::vector<double>& values) {
+ auto detector = std::make_unique<OutlierDetector>();
+ for (double v : values)
+ detector->Add(v);
+ detector->Prepare();
+ return detector;
+ };
+
+ std::unique_ptr<OutlierDetector> detector;
+ // No data: Should at least not cause error.
+ detector = make_detector({});
+ EXPECT_EQ(0, detector->DecideOutlier(0.0));
+ // Single point: Trivially inert.
+ detector = make_detector({0.5});
+ EXPECT_EQ(0, detector->DecideOutlier(0.1));
+ EXPECT_EQ(0, detector->DecideOutlier(0.5));
+ EXPECT_EQ(0, detector->DecideOutlier(0.9));
+ // Two identical points: StdDev is 0, so falls back to built-in tolerance.
+ detector = make_detector({0.5, 0.5});
+ EXPECT_EQ(-1, detector->DecideOutlier(0.3));
+ EXPECT_EQ(0, detector->DecideOutlier(0.499));
+ EXPECT_EQ(0, detector->DecideOutlier(0.5));
+ EXPECT_EQ(0, detector->DecideOutlier(0.501));
+ EXPECT_EQ(1, detector->DecideOutlier(0.7));
+ // Two separate points: Outliner test is pretty lax.
+ detector = make_detector({0.4, 0.6});
+ EXPECT_EQ(-1, detector->DecideOutlier(0.2));
+ EXPECT_EQ(0, detector->DecideOutlier(0.3));
+ EXPECT_EQ(0, detector->DecideOutlier(0.5));
+ EXPECT_EQ(0, detector->DecideOutlier(0.7));
+ EXPECT_EQ(1, detector->DecideOutlier(0.8));
+ // Sharpen distribution by clustering toward norm: Now test is stricter.
+ detector = make_detector({0.4, 0.47, 0.48, 0.49, 0.50, 0.51, 0.52, 0.6});
+ EXPECT_EQ(-1, detector->DecideOutlier(0.3));
+ EXPECT_EQ(0, detector->DecideOutlier(0.4));
+ EXPECT_EQ(0, detector->DecideOutlier(0.5));
+ EXPECT_EQ(0, detector->DecideOutlier(0.6));
+ EXPECT_EQ(1, detector->DecideOutlier(0.7));
+ // Shift numbers around: Mean is 0.3, and data order scrambled.
+ detector = make_detector({0.28, 0.2, 0.31, 0.4, 0.29, 0.32, 0.27, 0.30});
+ EXPECT_EQ(-1, detector->DecideOutlier(0.0));
+ EXPECT_EQ(-1, detector->DecideOutlier(0.1));
+ EXPECT_EQ(0, detector->DecideOutlier(0.2));
+ EXPECT_EQ(0, detector->DecideOutlier(0.3));
+ EXPECT_EQ(0, detector->DecideOutlier(0.4));
+ EXPECT_EQ(1, detector->DecideOutlier(0.5));
+ EXPECT_EQ(1, detector->DecideOutlier(1.0));
+ // Typical usage: Potential outlier would be part of original input data!
+ detector = make_detector({0.3, 0.29, 0.31, 0.0, 0.3, 0.32, 0.3, 0.29, 0.6});
+ EXPECT_EQ(-1, detector->DecideOutlier(0.0));
+ EXPECT_EQ(0, detector->DecideOutlier(0.28));
+ EXPECT_EQ(0, detector->DecideOutlier(0.29));
+ EXPECT_EQ(0, detector->DecideOutlier(0.3));
+ EXPECT_EQ(0, detector->DecideOutlier(0.31));
+ EXPECT_EQ(0, detector->DecideOutlier(0.32));
+ EXPECT_EQ(1, detector->DecideOutlier(0.6));
+}
+
+TEST(BinaryDataHistogramTest, Basic) {
+ constexpr double kUninitScore = -1;
+
+ constexpr uint8_t kTestData[] = {2, 137, 42, 0, 0, 0, 7, 11, 1, 11, 255};
+ const size_t n = sizeof(kTestData);
+ ConstBufferView region(kTestData, n);
+
+ std::vector<BinaryDataHistogram> prefix_histograms(n + 1); // Short to long.
+ std::vector<BinaryDataHistogram> suffix_histograms(n + 1); // Long to short.
+
+ for (size_t i = 0; i <= n; ++i) {
+ ConstBufferView prefix(region.begin(), i);
+ ConstBufferView suffix(region.begin() + i, n - i);
+ // If regions are smaller than 2 bytes then it is invalid. Else valid.
+ EXPECT_EQ(prefix.size() >= 2, prefix_histograms[i].Compute(prefix));
+ EXPECT_EQ(suffix.size() >= 2, suffix_histograms[i].Compute(suffix));
+ // IsValid() returns the same results.
+ EXPECT_EQ(prefix.size() >= 2, prefix_histograms[i].IsValid());
+ EXPECT_EQ(suffix.size() >= 2, suffix_histograms[i].IsValid());
+ }
+
+ // Full-prefix = full-suffix = full data.
+ EXPECT_EQ(0.0, prefix_histograms[n].Distance(suffix_histograms[0]));
+ EXPECT_EQ(0.0, suffix_histograms[0].Distance(prefix_histograms[n]));
+
+ // Testing heuristics without overreliance on implementation details.
+
+ // Strict prefixes, in increasing size. Compare against full data.
+ double prev_prefix_score = kUninitScore;
+ for (size_t i = 2; i < n; ++i) {
+ double score = prefix_histograms[i].Distance(prefix_histograms[n]);
+ // Positivity.
+ EXPECT_GT(score, 0.0);
+ // Symmetry.
+ EXPECT_EQ(score, prefix_histograms[n].Distance(prefix_histograms[i]));
+ // Distance should decrease as prefix gets nearer to full data.
+ if (prev_prefix_score != kUninitScore)
+ EXPECT_LT(score, prev_prefix_score);
+ prev_prefix_score = score;
+ }
+
+ // Strict suffixes, in decreasing size. Compare against full data.
+ double prev_suffix_score = -1;
+ for (size_t i = 1; i <= n - 2; ++i) {
+ double score = suffix_histograms[i].Distance(suffix_histograms[0]);
+ // Positivity.
+ EXPECT_GT(score, 0.0);
+ // Symmetry.
+ EXPECT_EQ(score, suffix_histograms[0].Distance(suffix_histograms[i]));
+ // Distance should increase as suffix gets farther from full data.
+ if (prev_suffix_score != kUninitScore)
+ EXPECT_GT(score, prev_suffix_score);
+ prev_suffix_score = score;
+ }
+}
+
+} // namespace zucchini
diff --git a/buffer_sink.cc b/buffer_sink.cc
new file mode 100644
index 0000000..5b89e3a
--- /dev/null
+++ b/buffer_sink.cc
@@ -0,0 +1,11 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/buffer_sink.h"
+
+namespace zucchini {
+
+BufferSink::BufferSink(MutableBufferView buffer) : MutableBufferView(buffer) {}
+
+} // namespace zucchini
diff --git a/buffer_sink.h b/buffer_sink.h
new file mode 100644
index 0000000..c17f040
--- /dev/null
+++ b/buffer_sink.h
@@ -0,0 +1,68 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_BUFFER_SINK_H_
+#define COMPONENTS_ZUCCHINI_BUFFER_SINK_H_
+
+#include <stdint.h>
+
+#include <algorithm>
+#include <iterator>
+
+#include "base/logging.h"
+#include "components/zucchini/buffer_view.h"
+
+namespace zucchini {
+
+// BufferSink acts like an output stream with convenience methods to serialize
+// data into a contiguous sequence of raw data. The underlying MutableBufferView
+// emulates a cursor to track current write position, and guards against buffer
+// overrun. Where applicable, BufferSink should be passed by pointer to maintain
+// cursor progress across writes.
+class BufferSink : public MutableBufferView {
+ public:
+ using iterator = MutableBufferView::iterator;
+
+ using MutableBufferView::MutableBufferView;
+ BufferSink() = default;
+ explicit BufferSink(MutableBufferView buffer);
+ BufferSink(const BufferSink&) = default;
+ BufferSink& operator=(BufferSink&&) = default;
+
+ // If sufficient space is available, writes the binary representation of
+ // |value| starting at the cursor, while advancing the cursor beyond the
+ // written region, and returns true. Otherwise returns false.
+ template <class T>
+ bool PutValue(const T& value) {
+ DCHECK_NE(begin(), nullptr);
+ if (Remaining() < sizeof(T))
+ return false;
+ *reinterpret_cast<T*>(begin()) = value;
+ remove_prefix(sizeof(T));
+ return true;
+ }
+
+ // If sufficient space is available, writes the raw bytes [|first|, |last|)
+ // starting at the cursor, while advancing the cursor beyond the written
+ // region, and returns true. Otherwise returns false.
+ template <class It>
+ bool PutRange(It first, It last) {
+ static_assert(sizeof(typename std::iterator_traits<It>::value_type) ==
+ sizeof(uint8_t),
+ "value_type should fit in uint8_t");
+ DCHECK_NE(begin(), nullptr);
+ DCHECK(last >= first);
+ if (Remaining() < size_type(last - first))
+ return false;
+ std::copy(first, last, begin());
+ remove_prefix(last - first);
+ return true;
+ }
+
+ size_type Remaining() const { return size(); }
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_BUFFER_SINK_H_
diff --git a/buffer_sink_unittest.cc b/buffer_sink_unittest.cc
new file mode 100644
index 0000000..33b788e
--- /dev/null
+++ b/buffer_sink_unittest.cc
@@ -0,0 +1,71 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/buffer_sink.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+constexpr uint8_t kUninit = 0xFF;
+
+class BufferSinkTest : public testing::Test {
+ protected:
+ BufferSinkTest()
+ : buffer_(10, kUninit), sink_(buffer_.data(), buffer_.size()) {}
+
+ std::vector<uint8_t> buffer_;
+ BufferSink sink_;
+};
+
+TEST_F(BufferSinkTest, PutValue) {
+ EXPECT_EQ(size_t(10), sink_.Remaining());
+
+ EXPECT_TRUE(sink_.PutValue(uint32_t(0x76543210)));
+ EXPECT_EQ(size_t(6), sink_.Remaining());
+
+ EXPECT_TRUE(sink_.PutValue(uint32_t(0xFEDCBA98)));
+ EXPECT_EQ(size_t(2), sink_.Remaining());
+
+ EXPECT_FALSE(sink_.PutValue(uint32_t(0x00)));
+ EXPECT_EQ(size_t(2), sink_.Remaining());
+
+ EXPECT_TRUE(sink_.PutValue(uint16_t(0x0010)));
+ EXPECT_EQ(size_t(0), sink_.Remaining());
+
+ // Assuming little-endian architecture.
+ EXPECT_EQ(std::vector<uint8_t>(
+ {0x10, 0x32, 0x54, 0x76, 0x98, 0xBA, 0xDC, 0xFE, 0x10, 0x00}),
+ buffer_);
+}
+
+TEST_F(BufferSinkTest, PutRange) {
+ std::vector<uint8_t> range = {0x10, 0x32, 0x54, 0x76, 0x98, 0xBA,
+ 0xDC, 0xFE, 0x10, 0x00, 0x42};
+
+ EXPECT_EQ(size_t(10), sink_.Remaining());
+ EXPECT_FALSE(sink_.PutRange(range.begin(), range.end()));
+ EXPECT_EQ(size_t(10), sink_.Remaining());
+
+ EXPECT_TRUE(sink_.PutRange(range.begin(), range.begin() + 8));
+ EXPECT_EQ(size_t(2), sink_.Remaining());
+ EXPECT_EQ(std::vector<uint8_t>({0x10, 0x32, 0x54, 0x76, 0x98, 0xBA, 0xDC,
+ 0xFE, kUninit, kUninit}),
+ buffer_);
+
+ EXPECT_FALSE(sink_.PutRange(range.begin(), range.begin() + 4));
+ EXPECT_EQ(size_t(2), sink_.Remaining());
+
+ // range is not written
+ EXPECT_EQ(std::vector<uint8_t>({0x10, 0x32, 0x54, 0x76, 0x98, 0xBA, 0xDC,
+ 0xFE, kUninit, kUninit}),
+ buffer_);
+}
+
+} // namespace zucchini
diff --git a/buffer_source.cc b/buffer_source.cc
new file mode 100644
index 0000000..721588a
--- /dev/null
+++ b/buffer_source.cc
@@ -0,0 +1,105 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/buffer_source.h"
+
+#include <algorithm>
+
+#include "components/zucchini/algorithm.h"
+
+namespace zucchini {
+
+BufferSource::BufferSource(ConstBufferView buffer) : ConstBufferView(buffer) {}
+
+BufferSource& BufferSource::Skip(size_type n) {
+ remove_prefix(std::min(n, Remaining()));
+ return *this;
+}
+
+bool BufferSource::CheckNextBytes(std::initializer_list<uint8_t> bytes) const {
+ if (Remaining() < bytes.size())
+ return false;
+ return std::mismatch(bytes.begin(), bytes.end(), begin()).first ==
+ bytes.end();
+}
+
+bool BufferSource::ConsumeBytes(std::initializer_list<uint8_t> bytes) {
+ if (!CheckNextBytes(bytes))
+ return false;
+ remove_prefix(bytes.size());
+ return true;
+}
+
+bool BufferSource::GetRegion(size_type count, ConstBufferView* buffer) {
+ DCHECK_NE(begin(), nullptr);
+ if (Remaining() < count)
+ return false;
+ *buffer = ConstBufferView(begin(), count);
+ remove_prefix(count);
+ return true;
+}
+
+// [0aaaaaaa] => 00000000'00000000'00000000'0aaaaaaa
+// [1aaaaaaa 0bbbbbbb] => 00000000'00000000'00bbbbbb'baaaaaaa
+// [1aaaaaaa 1bbbbbbb 0ccccccc] => 00000000'000ccccc'ccbbbbbb'baaaaaaa
+// [1aaaaaaa 1bbbbbbb 1ccccccc 0ddddddd] => 0000dddd'dddccccc'ccbbbbbb'baaaaaaa
+// [1aaaaaaa 1bbbbbbb 1ccccccc 1ddddddd 0???eeee]
+// => eeeedddd'dddccccc'ccbbbbbb'baaaaaaa
+// Note that "???" is discarded. Meanwhile, 1???eeee is invalid.
+bool BufferSource::GetUleb128(uint32_t* ret) {
+ int shift_lim =
+ static_cast<int>(std::min<size_type>(kMaxLeb128Size, size())) * 7;
+ const_iterator cur = cbegin();
+ uint32_t value = 0U;
+ for (int shift = 0; shift < shift_lim; shift += 7, ++cur) {
+ uint32_t b = *cur;
+ // When |shift == 28|, |(b & 0x7F) << shift| discards the "???" bits.
+ value |= static_cast<uint32_t>(b & 0x7F) << shift;
+ if (!(b & 0x80)) {
+ *ret = value;
+ seek(cur + 1);
+ return true;
+ }
+ }
+ return false;
+}
+
+// [0Saaaaaa] => SSSSSSSS'SSSSSSSS'SSSSSSSS'SSaaaaaa
+// [1aaaaaaa 0Sbbbbbb] => SSSSSSSS'SSSSSSSS'SSSbbbbb'baaaaaaa
+// [1aaaaaaa 1bbbbbbb 0Scccccc] => SSSSSSSS'SSSScccc'ccbbbbbb'baaaaaaa
+// [1aaaaaaa 1bbbbbbb 1ccccccc 0Sdddddd] => SSSSSddd'dddccccc'ccbbbbbb'baaaaaaa
+// [1aaaaaaa 1bbbbbbb 1ccccccc 1ddddddd 0???Seee]
+// => Seeedddd'dddccccc'ccbbbbbb'baaaaaaa
+// Note that "???" is discarded. Meanwhile, 1???eeee is invalid.
+bool BufferSource::GetSleb128(int32_t* ret) {
+ int shift_lim =
+ static_cast<int>(std::min<size_type>(kMaxLeb128Size, size())) * 7;
+ const_iterator cur = cbegin();
+ int32_t value = 0;
+ for (int shift = 0; shift < shift_lim; shift += 7, ++cur) {
+ uint32_t b = *cur;
+ // When |shift == 28|, |(b & 0x7F) << shift| discards the "???" bits.
+ value |= static_cast<int32_t>(b & 0x7F) << shift;
+ if (!(b & 0x80)) {
+ *ret = (shift == 28) ? value : SignExtend(shift + 6, value);
+ seek(cur + 1);
+ return true;
+ }
+ }
+ return false;
+}
+
+bool BufferSource::SkipLeb128() {
+ int lim = static_cast<int>(std::min<size_type>(kMaxLeb128Size, size()));
+ const_iterator cur = cbegin();
+ for (int i = 0; i < lim; ++i, ++cur) {
+ if (!(*cur & 0x80)) {
+ seek(cur + 1);
+ return true;
+ }
+ }
+ return false;
+}
+
+} // namespace zucchini
diff --git a/buffer_source.h b/buffer_source.h
new file mode 100644
index 0000000..d2a05b0
--- /dev/null
+++ b/buffer_source.h
@@ -0,0 +1,141 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_BUFFER_SOURCE_H_
+#define COMPONENTS_ZUCCHINI_BUFFER_SOURCE_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <initializer_list>
+#include <type_traits>
+
+#include "base/logging.h"
+#include "components/zucchini/buffer_view.h"
+
+namespace zucchini {
+
+// BufferSource acts like an input stream with convenience methods to parse data
+// from a contiguous sequence of raw data. The underlying ConstBufferView
+// emulates a cursor to track current read position, and guards against buffer
+// overrun. Where applicable, BufferSource should be passed by pointer to
+// maintain cursor progress across reads.
+class BufferSource : public ConstBufferView {
+ public:
+ // LEB128 info: http://dwarfstd.org/doc/dwarf-2.0.0.pdf , Section 7.6.
+ enum : size_t { kMaxLeb128Size = 5 };
+
+ static BufferSource FromRange(const_iterator first, const_iterator last) {
+ return BufferSource(ConstBufferView::FromRange(first, last));
+ }
+
+ using ConstBufferView::ConstBufferView;
+ BufferSource() = default;
+ explicit BufferSource(ConstBufferView buffer);
+ BufferSource(const BufferSource&) = default;
+ BufferSource& operator=(BufferSource&&) = default;
+
+ // Moves the cursor forward by |n| bytes, or to the end if data is exhausted.
+ // Returns a reference to *this, to allow chaining, e.g.:
+ // if (!buffer_source.Skip(1024).GetValue<uint32_t>(&value)) {
+ // ... // Handle error.
+ // }
+ // Notice that Skip() defers error handling to GetValue().
+ BufferSource& Skip(size_type n);
+
+ // Returns true if |value| matches data starting at the cursor when
+ // reinterpreted as the integral type |T|.
+ template <class T>
+ bool CheckNextValue(const T& value) const {
+ static_assert(std::is_integral<T>::value,
+ "Value type must be an integral type");
+ DCHECK_NE(begin(), nullptr);
+ if (Remaining() < sizeof(T))
+ return false;
+ return value == *reinterpret_cast<const T*>(begin());
+ }
+
+ // Returns true if the next bytes.size() bytes at the cursor match those in
+ // |bytes|.
+ bool CheckNextBytes(std::initializer_list<uint8_t> bytes) const;
+
+ // Same as CheckNextBytes(), but moves the cursor by bytes.size() if read is
+ // successfull.
+ bool ConsumeBytes(std::initializer_list<uint8_t> bytes);
+
+ // Tries to reinterpret data as type |T|, starting at the cursor and to write
+ // the result into |value|, while moving the cursor forward by sizeof(T).
+ // Returns true if sufficient data is available, and false otherwise.
+ template <class T>
+ bool GetValue(T* value) {
+ static_assert(std::is_standard_layout<T>::value,
+ "Value type must be a standard layout type");
+
+ DCHECK_NE(begin(), nullptr);
+ if (Remaining() < sizeof(T))
+ return false;
+ *value = *reinterpret_cast<const T*>(begin());
+ remove_prefix(sizeof(T));
+ return true;
+ }
+
+ // Tries to reinterpret data as type |T| at the cursor and to return a
+ // reinterpreted pointer of type |T| pointing into the underlying data, while
+ // moving the cursor forward by sizeof(T). Returns nullptr if insufficient
+ // data is available.
+ template <class T>
+ const T* GetPointer() {
+ static_assert(std::is_standard_layout<T>::value,
+ "Value type must be a standard layout type");
+
+ DCHECK_NE(begin(), nullptr);
+ if (Remaining() < sizeof(T))
+ return nullptr;
+ const T* ptr = reinterpret_cast<const T*>(begin());
+ remove_prefix(sizeof(T));
+ return ptr;
+ }
+
+ // Tries to reinterpret data as an array of type |T| with |count| elements,
+ // starting at the cursor, and to return a reinterpreted pointer of type |T|
+ // pointing into the underlying data, while advancing the cursor beyond the
+ // array. Returns nullptr if insufficient data is available.
+ template <class T>
+ const T* GetArray(size_t count) {
+ static_assert(std::is_standard_layout<T>::value,
+ "Value type must be a standard layout type");
+
+ if (Remaining() / sizeof(T) < count)
+ return nullptr;
+ const T* array = reinterpret_cast<const T*>(begin());
+ remove_prefix(count * sizeof(T));
+ return array;
+ }
+
+ // If sufficient data is available, assigns |buffer| to point to a region of
+ // |size| bytes starting at the cursor, while advancing the cursor beyond the
+ // region, and returns true. Otherwise returns false.
+ bool GetRegion(size_type size, ConstBufferView* buffer);
+
+ // Reads an Unsigned Little Endian Base 128 (uleb128) int at |first_|. If
+ // successful, writes the result to |value|, advances |first_|, and returns
+ // true. Otherwise returns false.
+ bool GetUleb128(uint32_t* value);
+
+ // Reads a Signed Little Endian Base 128 (sleb128) int at |first_|. If
+ // successful, writes the result to |value|, advances |first_|, and returns
+ // true. Otherwise returns false.
+ bool GetSleb128(int32_t* value);
+
+ // Reads uleb128 / sleb128 at |first_| but discards the result. If successful,
+ // advances |first_| and returns true. Otherwise returns false.
+ bool SkipLeb128();
+
+ // Returns the number of bytes remaining from cursor until end.
+ size_type Remaining() const { return size(); }
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_BUFFER_SOURCE_H_
diff --git a/buffer_source_unittest.cc b/buffer_source_unittest.cc
new file mode 100644
index 0000000..e8f00c5
--- /dev/null
+++ b/buffer_source_unittest.cc
@@ -0,0 +1,347 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/buffer_source.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <iterator>
+#include <string>
+#include <tuple>
+#include <vector>
+
+#include "components/zucchini/test_utils.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+using vec = std::vector<uint8_t>;
+
+class BufferSourceTest : public testing::Test {
+ protected:
+ std::vector<uint8_t> bytes_ = ParseHexString("10 32 54 76 98 BA DC FE 10 00");
+
+ BufferSource source_ = {bytes_.data(), bytes_.size()};
+};
+
+TEST_F(BufferSourceTest, Skip) {
+ EXPECT_EQ(bytes_.size(), source_.Remaining());
+ source_.Skip(2);
+ EXPECT_EQ(bytes_.size() - 2, source_.Remaining());
+ source_.Skip(10); // Skipping past end just moves cursor to end.
+ EXPECT_EQ(size_t(0), source_.Remaining());
+}
+
+TEST_F(BufferSourceTest, CheckNextBytes) {
+ EXPECT_TRUE(source_.CheckNextBytes({0x10, 0x32, 0x54, 0x76}));
+ source_.Skip(4);
+ EXPECT_TRUE(source_.CheckNextBytes({0x98, 0xBA, 0xDC, 0xFE}));
+
+ // Cursor has not advanced, so check fails.
+ EXPECT_FALSE(source_.CheckNextBytes({0x10, 0x00}));
+
+ source_.Skip(4);
+ EXPECT_EQ(size_t(2), source_.Remaining());
+
+ // Goes beyond end by 2 bytes.
+ EXPECT_FALSE(source_.CheckNextBytes({0x10, 0x00, 0x00, 0x00}));
+ EXPECT_EQ(size_t(2), source_.Remaining());
+}
+
+TEST_F(BufferSourceTest, ConsumeBytes) {
+ EXPECT_FALSE(source_.ConsumeBytes({0x10, 0x00}));
+ EXPECT_EQ(bytes_.size(), source_.Remaining());
+ EXPECT_TRUE(source_.ConsumeBytes({0x10, 0x32, 0x54, 0x76}));
+ EXPECT_EQ(size_t(6), source_.Remaining());
+ EXPECT_TRUE(source_.ConsumeBytes({0x98, 0xBA, 0xDC, 0xFE}));
+ EXPECT_EQ(size_t(2), source_.Remaining());
+
+ // Goes beyond end by 2 bytes.
+ EXPECT_FALSE(source_.ConsumeBytes({0x10, 0x00, 0x00, 0x00}));
+ EXPECT_EQ(size_t(2), source_.Remaining());
+}
+
+TEST_F(BufferSourceTest, CheckNextValue) {
+ EXPECT_TRUE(source_.CheckNextValue(uint32_t(0x76543210)));
+ EXPECT_FALSE(source_.CheckNextValue(uint32_t(0x0)));
+ EXPECT_TRUE(source_.CheckNextValue(uint64_t(0xFEDCBA9876543210)));
+ EXPECT_FALSE(source_.CheckNextValue(uint64_t(0x0)));
+
+ source_.Skip(8);
+ EXPECT_EQ(size_t(2), source_.Remaining());
+
+ // Goes beyond end by 2 bytes.
+ EXPECT_FALSE(source_.CheckNextValue(uint32_t(0x1000)));
+}
+
+// Supported by MSVC, g++, and clang++.
+// Ensures no gaps in packing.
+#pragma pack(push, 1)
+struct ValueType {
+ uint32_t a;
+ uint16_t b;
+};
+#pragma pack(pop)
+
+TEST_F(BufferSourceTest, GetValueIntegral) {
+ uint32_t value = 0;
+ EXPECT_TRUE(source_.GetValue(&value));
+ EXPECT_EQ(uint32_t(0x76543210), value);
+ EXPECT_EQ(size_t(6), source_.Remaining());
+
+ EXPECT_TRUE(source_.GetValue(&value));
+ EXPECT_EQ(uint32_t(0xFEDCBA98), value);
+ EXPECT_EQ(size_t(2), source_.Remaining());
+
+ EXPECT_FALSE(source_.GetValue(&value));
+ EXPECT_EQ(size_t(2), source_.Remaining());
+}
+
+TEST_F(BufferSourceTest, GetValueAggregate) {
+ ValueType value = {};
+ EXPECT_TRUE(source_.GetValue(&value));
+ EXPECT_EQ(uint32_t(0x76543210), value.a);
+ EXPECT_EQ(uint32_t(0xBA98), value.b);
+ EXPECT_EQ(size_t(4), source_.Remaining());
+}
+
+TEST_F(BufferSourceTest, GetRegion) {
+ ConstBufferView region;
+ EXPECT_TRUE(source_.GetRegion(0, &region));
+ EXPECT_EQ(bytes_.size(), source_.Remaining());
+ EXPECT_TRUE(region.empty());
+
+ EXPECT_TRUE(source_.GetRegion(2, &region));
+ EXPECT_EQ(size_t(2), region.size());
+ EXPECT_EQ(vec({0x10, 0x32}), vec(region.begin(), region.end()));
+ EXPECT_EQ(size_t(8), source_.Remaining());
+
+ EXPECT_FALSE(source_.GetRegion(bytes_.size(), &region));
+ EXPECT_EQ(size_t(8), source_.Remaining());
+ // |region| is left untouched.
+ EXPECT_EQ(vec({0x10, 0x32}), vec(region.begin(), region.end()));
+ EXPECT_EQ(size_t(2), region.size());
+}
+
+TEST_F(BufferSourceTest, GetPointerIntegral) {
+ const uint32_t* ptr = source_.GetPointer<uint32_t>();
+ EXPECT_NE(nullptr, ptr);
+ EXPECT_EQ(uint32_t(0x76543210), *ptr);
+ EXPECT_EQ(size_t(6), source_.Remaining());
+
+ ptr = source_.GetPointer<uint32_t>();
+ EXPECT_NE(nullptr, ptr);
+ EXPECT_EQ(uint32_t(0xFEDCBA98), *ptr);
+ EXPECT_EQ(size_t(2), source_.Remaining());
+
+ EXPECT_EQ(nullptr, source_.GetPointer<uint32_t>());
+ EXPECT_EQ(size_t(2), source_.Remaining());
+}
+
+TEST_F(BufferSourceTest, GetPointerAggregate) {
+ const ValueType* ptr = source_.GetPointer<ValueType>();
+ EXPECT_NE(nullptr, ptr);
+ EXPECT_EQ(uint32_t(0x76543210), ptr->a);
+ EXPECT_EQ(uint32_t(0xBA98), ptr->b);
+ EXPECT_EQ(size_t(4), source_.Remaining());
+}
+
+TEST_F(BufferSourceTest, GetArrayIntegral) {
+ EXPECT_EQ(nullptr, source_.GetArray<uint32_t>(3));
+
+ const uint32_t* ptr = source_.GetArray<uint32_t>(2);
+ EXPECT_NE(nullptr, ptr);
+ EXPECT_EQ(uint32_t(0x76543210), ptr[0]);
+ EXPECT_EQ(uint32_t(0xFEDCBA98), ptr[1]);
+ EXPECT_EQ(size_t(2), source_.Remaining());
+}
+
+TEST_F(BufferSourceTest, GetArrayAggregate) {
+ const ValueType* ptr = source_.GetArray<ValueType>(2);
+ EXPECT_EQ(nullptr, ptr);
+
+ ptr = source_.GetArray<ValueType>(1);
+
+ EXPECT_NE(nullptr, ptr);
+ EXPECT_EQ(uint32_t(0x76543210), ptr[0].a);
+ EXPECT_EQ(uint32_t(0xBA98), ptr[0].b);
+ EXPECT_EQ(size_t(4), source_.Remaining());
+}
+
+TEST_F(BufferSourceTest, GetUleb128) {
+ using size_type = BufferSource::size_type;
+ // Result = {success, value, bytes_consumed}.
+ using Result = std::tuple<bool, uint32_t, size_type>;
+
+ constexpr uint32_t kUnInit = 0xCCCCCCCC; // Arbitrary value.
+ constexpr Result kBad{false, kUnInit, 0U};
+
+ auto run = [kUnInit](const std::string hex_string) -> Result {
+ std::vector<uint8_t> bytes = ParseHexString(hex_string);
+ BufferSource source(ConstBufferView{bytes.data(), bytes.size()});
+ BufferSource::iterator base = source.begin();
+ // Initialize |value| to |kUnInit| to ensure no write on failure.
+ uint32_t value = kUnInit;
+ bool success = source.GetUleb128(&value);
+ return {success, value, source.begin() - base};
+ };
+
+ auto good = [](uint32_t value, size_type bytes_consumed) -> Result {
+ return Result{true, value, bytes_consumed};
+ };
+
+ EXPECT_EQ(good(0x0U, 1U), run("00"));
+ EXPECT_EQ(good(0x20U, 1U), run("20"));
+ EXPECT_EQ(good(0x42U, 1U), run("42"));
+ EXPECT_EQ(good(0x7FU, 1U), run("7F"));
+ EXPECT_EQ(kBad, run("80")); // Out of data.
+ EXPECT_EQ(good(0x0U, 2U), run("80 00")); // Redundant code.
+ EXPECT_EQ(good(0x80U, 2U), run("80 01"));
+ EXPECT_EQ(good(0x7FU, 2U), run("FF 00")); // Redundant (unsigned).
+ EXPECT_EQ(good(0x3FFFU, 2U), run("FF 7F"));
+ EXPECT_EQ(good(0x0U, 1U), run("00 80")); // Only reads byte 0.
+ EXPECT_EQ(kBad, run("80 80")); // Out of data.
+ EXPECT_EQ(kBad, run("F1 88")); // Out of data.
+ EXPECT_EQ(good(0x0U, 3U), run("80 80 00")); // Redundant code.
+ EXPECT_EQ(good(0x4000U, 3U), run("80 80 01"));
+ EXPECT_EQ(good(0x00100000U, 3U), run("80 80 40"));
+ EXPECT_EQ(good(0x001FFFFFU, 3U), run("FF FF 7F"));
+ EXPECT_EQ(good(0x0U, 1U), run("00 00 80")); // Only reads byte 0.
+ EXPECT_EQ(kBad, run("80 80 80")); // Out of data.
+ EXPECT_EQ(kBad, run("AB CD EF")); // Out of data.
+ EXPECT_EQ(good(0x0U, 4U), run("80 80 80 00")); // Redundant code.
+ EXPECT_EQ(good(0x00100000U, 4U), run("80 80 C0 00"));
+ EXPECT_EQ(good(0x00200000U, 4U), run("80 80 80 01"));
+ EXPECT_EQ(good(0x08000000U, 4U), run("80 80 80 40"));
+ EXPECT_EQ(good(0x001FC07FU, 4U), run("FF 80 FF 00"));
+ EXPECT_EQ(good(0x0U, 5U), run("80 80 80 80 00")); // Redundant code.
+ EXPECT_EQ(good(0x10000000U, 5U), run("80 80 80 80 01"));
+ EXPECT_EQ(good(0x10204081U, 5U), run("81 81 81 81 01"));
+ EXPECT_EQ(good(0x7FFFFFFFU, 5U), run("FF FF FF FF 07"));
+ EXPECT_EQ(good(0x80000000U, 5U), run("80 80 80 80 08"));
+ EXPECT_EQ(good(0xFFFFFFFFU, 5U), run("FF FF FF FF 0F"));
+ EXPECT_EQ(kBad, run("FF FF FF FF 80")); // Too long / out of data.
+ EXPECT_EQ(good(0x0FFFFFFFU, 5U), run("FF FF FF FF 10")); // "1" discarded.
+ EXPECT_EQ(good(0x00000000U, 5U), run("80 80 80 80 20")); // "2" discarded.
+ EXPECT_EQ(good(0xA54A952AU, 5U), run("AA AA AA AA 7A")); // "7" discarded.
+ EXPECT_EQ(kBad, run("FF FF FF FF FF 00")); // Too long.
+}
+
+TEST_F(BufferSourceTest, GetSleb128) {
+ using size_type = BufferSource::size_type;
+ // Result = {success, value, bytes_consumed}.
+ using Result = std::tuple<bool, int32_t, size_type>;
+
+ constexpr int32_t kUnInit = 0xCCCCCCCC; // Arbitrary value.
+ constexpr Result kBad{false, kUnInit, 0U};
+
+ auto run = [kUnInit](const std::string hex_string) -> Result {
+ std::vector<uint8_t> bytes = ParseHexString(hex_string);
+ BufferSource source(ConstBufferView{bytes.data(), bytes.size()});
+ BufferSource::iterator base = source.begin();
+ // Initialize |value| to |kUnInit| to ensure no write on failure.
+ int32_t value = kUnInit;
+ bool success = source.GetSleb128(&value);
+ return {success, value, source.begin() - base};
+ };
+
+ auto good = [](int32_t value, size_type bytes_consumed) -> Result {
+ return Result{true, value, bytes_consumed};
+ };
+
+ EXPECT_EQ(good(0x0, 1U), run("00"));
+ EXPECT_EQ(good(0x20U, 1U), run("20"));
+ EXPECT_EQ(good(-0x3E, 1U), run("42"));
+ EXPECT_EQ(good(-0x1, 1U), run("7F"));
+ EXPECT_EQ(kBad, run("80")); // Out of data.
+ EXPECT_EQ(good(0x0, 2U), run("80 00")); // Redundant code.
+ EXPECT_EQ(good(0x80, 2U), run("80 01"));
+ EXPECT_EQ(good(0x7F, 2U), run("FF 00")); // Not redudnant.
+ EXPECT_EQ(good(-0x1, 2U), run("FF 7F")); // Redundant code.
+ EXPECT_EQ(good(0x0, 1U), run("00 80")); // Only reads byte 0.
+ EXPECT_EQ(kBad, run("80 80")); // Out of data.
+ EXPECT_EQ(kBad, run("F1 88")); // Out of data.
+ EXPECT_EQ(good(0x0, 3U), run("80 80 00")); // Redundant code.
+ EXPECT_EQ(good(0x4000, 3U), run("80 80 01"));
+ EXPECT_EQ(good(-0x100000, 3U), run("80 80 40"));
+ EXPECT_EQ(good(-0x1, 3U), run("FF FF 7F")); // Redundant code.
+ EXPECT_EQ(good(0x0, 1U), run("00 00 80")); // Only reads byte 0.
+ EXPECT_EQ(kBad, run("80 80 80")); // Out of data.
+ EXPECT_EQ(kBad, run("AB CD EF")); // Out of data.
+ EXPECT_EQ(good(0x0, 4U), run("80 80 80 00")); // Redundant code.
+ EXPECT_EQ(good(0x00100000, 4U), run("80 80 C0 00"));
+ EXPECT_EQ(good(0x00200000, 4U), run("80 80 80 01"));
+ EXPECT_EQ(good(-static_cast<int32_t>(0x08000000), 4U), run("80 80 80 40"));
+ EXPECT_EQ(good(0x001FC07F, 4U), run("FF 80 FF 00"));
+ EXPECT_EQ(good(0x0, 5U), run("80 80 80 80 00")); // Redundant code.
+ EXPECT_EQ(good(0x10000000, 5U), run("80 80 80 80 01"));
+ EXPECT_EQ(good(0x10204081, 5U), run("81 81 81 81 01"));
+ EXPECT_EQ(good(0x7FFFFFFF, 5U), run("FF FF FF FF 07"));
+ EXPECT_EQ(good(-static_cast<int32_t>(0x80000000), 5U), run("80 80 80 80 08"));
+ EXPECT_EQ(good(-0x1, 5U), run("FF FF FF FF 0F")); // Redundant code.
+ EXPECT_EQ(kBad, run("FF FF FF FF 80")); // Too long / out of data.
+ EXPECT_EQ(good(0x0FFFFFFF, 5U), run("FF FF FF FF 10")); // "1" discarded.
+ EXPECT_EQ(good(0x00000000, 5U), run("80 80 80 80 20")); // "2" discarded.
+ EXPECT_EQ(good(-0x5AB56AD6, 5U), run("AA AA AA AA 7A")); // "7" discarded.
+ EXPECT_EQ(kBad, run("FF FF FF FF FF 00")); // Too long.
+}
+
+TEST_F(BufferSourceTest, SkipLeb128) {
+ using size_type = BufferSource::size_type;
+ // Result = {success, value, bytes_consumed}.
+ using Result = std::tuple<bool, size_type>;
+
+ constexpr Result kBad{false, 0U};
+
+ auto run = [](const std::string hex_string) -> Result {
+ std::vector<uint8_t> bytes = ParseHexString(hex_string);
+ BufferSource source(ConstBufferView{bytes.data(), bytes.size()});
+ BufferSource::iterator base = source.begin();
+ bool success = source.SkipLeb128();
+ return {success, source.begin() - base};
+ };
+
+ auto good = [](size_type bytes_consumed) -> Result {
+ return Result{true, bytes_consumed};
+ };
+
+ EXPECT_EQ(good(1U), run("00"));
+ EXPECT_EQ(good(1U), run("20"));
+ EXPECT_EQ(good(1U), run("42"));
+ EXPECT_EQ(good(1U), run("7F"));
+ EXPECT_EQ(kBad, run("80")); // Out of data.
+ EXPECT_EQ(good(2U), run("80 00")); // Redundant code.
+ EXPECT_EQ(good(2U), run("80 01"));
+ EXPECT_EQ(good(2U), run("FF 00")); // Redundant (unsigned).
+ EXPECT_EQ(good(2U), run("FF 7F"));
+ EXPECT_EQ(good(1U), run("00 80")); // Only reads byte 0.
+ EXPECT_EQ(kBad, run("80 80")); // Out of data.
+ EXPECT_EQ(kBad, run("F1 88")); // Out of data.
+ EXPECT_EQ(good(3U), run("80 80 00")); // Redundant code.
+ EXPECT_EQ(good(3U), run("80 80 01"));
+ EXPECT_EQ(good(3U), run("80 80 40"));
+ EXPECT_EQ(good(3U), run("FF FF 7F"));
+ EXPECT_EQ(good(1U), run("00 00 80")); // Only reads byte 0.
+ EXPECT_EQ(kBad, run("80 80 80")); // Out of data.
+ EXPECT_EQ(kBad, run("AB CD EF")); // Out of data.
+ EXPECT_EQ(good(4U), run("80 80 80 00")); // Redundant code.
+ EXPECT_EQ(good(4U), run("80 80 C0 00"));
+ EXPECT_EQ(good(4U), run("80 80 80 01"));
+ EXPECT_EQ(good(4U), run("80 80 80 40"));
+ EXPECT_EQ(good(4U), run("FF 80 FF 00"));
+ EXPECT_EQ(good(5U), run("80 80 80 80 00")); // Redundant code.
+ EXPECT_EQ(good(5U), run("80 80 80 80 01"));
+ EXPECT_EQ(good(5U), run("81 81 81 81 01"));
+ EXPECT_EQ(good(5U), run("FF FF FF FF 07"));
+ EXPECT_EQ(good(5U), run("80 80 80 80 08"));
+ EXPECT_EQ(good(5U), run("FF FF FF FF 0F"));
+ EXPECT_EQ(kBad, run("FF FF FF FF 80")); // Too long / out of data.
+ EXPECT_EQ(good(5U), run("FF FF FF FF 10")); // "1" discarded.
+ EXPECT_EQ(good(5U), run("80 80 80 80 20")); // "2" discarded.
+ EXPECT_EQ(good(5U), run("AA AA AA AA 7A")); // "7" discarded.
+ EXPECT_EQ(kBad, run("FF FF FF FF FF 00")); // Too long.
+}
+
+} // namespace zucchini
diff --git a/buffer_view.h b/buffer_view.h
new file mode 100644
index 0000000..a7dfd17
--- /dev/null
+++ b/buffer_view.h
@@ -0,0 +1,201 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_BUFFER_VIEW_H_
+#define COMPONENTS_ZUCCHINI_BUFFER_VIEW_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <type_traits>
+
+#include "base/logging.h"
+#include "components/zucchini/algorithm.h"
+
+namespace zucchini {
+
+// Describes a region within a buffer, with starting offset and size.
+struct BufferRegion {
+ // The region data are stored as |offset| and |size|, but often it is useful
+ // to represent it as an interval [lo(), hi()) = [offset, offset + size).
+ size_t lo() const { return offset; }
+ size_t hi() const { return offset + size; }
+
+ // Returns whether the Region fits in |[0, container_size)|. Special case:
+ // a size-0 region starting at |container_size| does not fit.
+ bool FitsIn(size_t container_size) const {
+ return offset < container_size && container_size - offset >= size;
+ }
+
+ // Returns |v| clipped to the inclusive range |[lo(), hi()]|.
+ size_t InclusiveClamp(size_t v) const {
+ return zucchini::InclusiveClamp(v, lo(), hi());
+ }
+ friend bool operator==(const BufferRegion& a, const BufferRegion& b) {
+ return a.offset == b.offset && a.size == b.size;
+ }
+ friend bool operator!=(const BufferRegion& a, const BufferRegion& b) {
+ return !(a == b);
+ }
+
+ // Region data use size_t to match BufferViewBase::size_type, to make it
+ // convenient to index into buffer view.
+ size_t offset;
+ size_t size;
+};
+
+namespace internal {
+
+// TODO(huangs): Rename to BasicBufferView.
+// BufferViewBase should not be used directly; it is an implementation used for
+// both BufferView and MutableBufferView.
+template <class T>
+class BufferViewBase {
+ public:
+ using value_type = T;
+ using reference = T&;
+ using pointer = T*;
+ using iterator = T*;
+ using const_iterator = typename std::add_const<T>::type*;
+ using size_type = std::size_t;
+ using difference_type = std::ptrdiff_t;
+
+ static BufferViewBase FromRange(iterator first, iterator last) {
+ DCHECK_GE(last, first);
+ BufferViewBase ret;
+ ret.first_ = first;
+ ret.last_ = last;
+ return ret;
+ }
+
+ BufferViewBase() = default;
+
+ BufferViewBase(iterator first, size_type size)
+ : first_(first), last_(first_ + size) {
+ DCHECK_GE(last_, first_);
+ }
+
+ template <class U>
+ BufferViewBase(const BufferViewBase<U>& that)
+ : first_(that.begin()), last_(that.end()) {}
+
+ template <class U>
+ BufferViewBase(BufferViewBase<U>&& that)
+ : first_(that.begin()), last_(that.end()) {}
+
+ BufferViewBase(const BufferViewBase&) = default;
+ BufferViewBase& operator=(const BufferViewBase&) = default;
+
+ // Iterators
+
+ iterator begin() const { return first_; }
+ iterator end() const { return last_; }
+ const_iterator cbegin() const { return begin(); }
+ const_iterator cend() const { return end(); }
+
+ // Capacity
+
+ bool empty() const { return first_ == last_; }
+ size_type size() const { return last_ - first_; }
+
+ // Returns whether the buffer is large enough to cover |region|.
+ bool covers(const BufferRegion& region) const {
+ return region.FitsIn(size());
+ }
+
+ // Element access
+
+ // Returns the raw value at specified location |pos|.
+ // If |pos| is not within the range of the buffer, the process is terminated.
+ reference operator[](size_type pos) const {
+ CHECK_LT(pos, size());
+ return first_[pos];
+ }
+
+ // Returns a sub-buffer described by |region|.
+ BufferViewBase operator[](BufferRegion region) const {
+ DCHECK_LE(region.offset, size());
+ DCHECK_LE(region.size, size() - region.offset);
+ return {begin() + region.offset, region.size};
+ }
+
+ template <class U>
+ const U& read(size_type pos) const {
+ CHECK_LE(pos + sizeof(U), size());
+ return *reinterpret_cast<const U*>(begin() + pos);
+ }
+
+ template <class U>
+ void write(size_type pos, const U& value) {
+ CHECK_LE(pos + sizeof(U), size());
+ *reinterpret_cast<U*>(begin() + pos) = value;
+ }
+
+ template <class U>
+ bool can_access(size_type pos) const {
+ return pos < size() && size() - pos >= sizeof(U);
+ }
+
+ // Returns a BufferRegion describing the full view, with offset = 0. If the
+ // BufferViewBase is derived from another, this does *not* return the
+ // original region used for its definition (hence "local").
+ BufferRegion local_region() const { return BufferRegion{0, size()}; }
+
+ bool equals(BufferViewBase other) const {
+ return size() == other.size() && std::equal(begin(), end(), other.begin());
+ }
+
+ // Modifiers
+
+ void shrink(size_type new_size) {
+ DCHECK_LE(first_ + new_size, last_);
+ last_ = first_ + new_size;
+ }
+
+ // Moves the start of the view forward by n bytes.
+ void remove_prefix(size_type n) {
+ DCHECK_LE(n, size());
+ first_ += n;
+ }
+
+ // Moves the start of the view to |it|, which is in range [begin(), end()).
+ void seek(iterator it) {
+ DCHECK_GE(it, begin());
+ DCHECK_LE(it, end());
+ first_ = it;
+ }
+
+ // Given |origin| that contains |*this|, minimally increase |first_| (possibly
+ // by 0) so that |first_ <= last_|, and |first_ - origin.first_| is a multiple
+ // of |alignment|. On success, updates |first_| and returns true. Otherwise
+ // returns false.
+ bool AlignOn(BufferViewBase origin, size_type alignment) {
+ DCHECK_GT(alignment, 0U);
+ DCHECK_LE(origin.first_, first_);
+ DCHECK_GE(origin.last_, last_);
+ size_type aligned_size =
+ ceil(static_cast<size_type>(first_ - origin.first_), alignment);
+ if (aligned_size > static_cast<size_type>(last_ - origin.first_))
+ return false;
+ first_ = origin.first_ + aligned_size;
+ return true;
+ }
+
+ private:
+ iterator first_ = nullptr;
+ iterator last_ = nullptr;
+};
+
+} // namespace internal
+
+// Classes to encapsulate a contiguous sequence of raw data, without owning the
+// encapsulated memory regions. These are intended to be used as value types.
+
+using ConstBufferView = internal::BufferViewBase<const uint8_t>;
+using MutableBufferView = internal::BufferViewBase<uint8_t>;
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_BUFFER_VIEW_H_
diff --git a/buffer_view_unittest.cc b/buffer_view_unittest.cc
new file mode 100644
index 0000000..cfb3d9b
--- /dev/null
+++ b/buffer_view_unittest.cc
@@ -0,0 +1,242 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/buffer_view.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <iterator>
+#include <type_traits>
+#include <vector>
+
+#include "base/test/gtest_util.h"
+#include "components/zucchini/test_utils.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+class BufferViewTest : public testing::Test {
+ protected:
+ // Some tests might modify this.
+ std::vector<uint8_t> bytes_ = ParseHexString("10 32 54 76 98 BA DC FE 10 00");
+};
+
+TEST_F(BufferViewTest, Size) {
+ for (size_t len = 0; len <= bytes_.size(); ++len) {
+ EXPECT_EQ(len, ConstBufferView(bytes_.data(), len).size());
+ EXPECT_EQ(len, MutableBufferView(bytes_.data(), len).size());
+ }
+}
+
+TEST_F(BufferViewTest, Empty) {
+ // Empty view.
+ EXPECT_TRUE(ConstBufferView(bytes_.data(), 0).empty());
+ EXPECT_TRUE(MutableBufferView(bytes_.data(), 0).empty());
+
+ for (size_t len = 1; len <= bytes_.size(); ++len) {
+ EXPECT_FALSE(ConstBufferView(bytes_.data(), len).empty());
+ EXPECT_FALSE(MutableBufferView(bytes_.data(), len).empty());
+ }
+}
+
+TEST_F(BufferViewTest, FromRange) {
+ constexpr size_t kSize = 10;
+ uint8_t raw_data[kSize] = {0x10, 0x32, 0x54, 0x76, 0x98,
+ 0xBA, 0xDC, 0xFE, 0x10, 0x00};
+ ConstBufferView buffer =
+ ConstBufferView::FromRange(std::begin(raw_data), std::end(raw_data));
+ EXPECT_EQ(bytes_.size(), buffer.size());
+ EXPECT_EQ(std::begin(raw_data), buffer.begin());
+
+ MutableBufferView mutable_buffer =
+ MutableBufferView::FromRange(std::begin(raw_data), std::end(raw_data));
+ EXPECT_EQ(bytes_.size(), mutable_buffer.size());
+ EXPECT_EQ(std::begin(raw_data), mutable_buffer.begin());
+
+ EXPECT_DCHECK_DEATH(
+ ConstBufferView::FromRange(std::end(raw_data), std::begin(raw_data)));
+
+ EXPECT_DCHECK_DEATH(MutableBufferView::FromRange(std::begin(raw_data) + 1,
+ std::begin(raw_data)));
+}
+
+TEST_F(BufferViewTest, Subscript) {
+ ConstBufferView view(bytes_.data(), bytes_.size());
+
+ EXPECT_EQ(0x10, view[0]);
+ static_assert(!std::is_assignable<decltype(view[0]), uint8_t>::value,
+ "BufferView values should not be mutable.");
+
+ MutableBufferView mutable_view(bytes_.data(), bytes_.size());
+
+ EXPECT_EQ(bytes_.data(), &mutable_view[0]);
+ mutable_view[0] = 42;
+ EXPECT_EQ(42, mutable_view[0]);
+}
+
+TEST_F(BufferViewTest, SubRegion) {
+ ConstBufferView view(bytes_.data(), bytes_.size());
+
+ ConstBufferView sub_view = view[{2, 4}];
+ EXPECT_EQ(view.begin() + 2, sub_view.begin());
+ EXPECT_EQ(size_t(4), sub_view.size());
+}
+
+TEST_F(BufferViewTest, Shrink) {
+ ConstBufferView buffer(bytes_.data(), bytes_.size());
+
+ buffer.shrink(bytes_.size());
+ EXPECT_EQ(bytes_.size(), buffer.size());
+ buffer.shrink(2);
+ EXPECT_EQ(size_t(2), buffer.size());
+ EXPECT_DCHECK_DEATH(buffer.shrink(bytes_.size()));
+}
+
+TEST_F(BufferViewTest, Read) {
+ ConstBufferView buffer(bytes_.data(), bytes_.size());
+
+ EXPECT_EQ(0x10U, buffer.read<uint8_t>(0));
+ EXPECT_EQ(0x54U, buffer.read<uint8_t>(2));
+
+ EXPECT_EQ(0x3210U, buffer.read<uint16_t>(0));
+ EXPECT_EQ(0x7654U, buffer.read<uint16_t>(2));
+
+ EXPECT_EQ(0x76543210U, buffer.read<uint32_t>(0));
+ EXPECT_EQ(0xBA987654U, buffer.read<uint32_t>(2));
+
+ EXPECT_EQ(0xFEDCBA9876543210ULL, buffer.read<uint64_t>(0));
+
+ EXPECT_EQ(0x00, buffer.read<uint8_t>(9));
+ EXPECT_DEATH(buffer.read<uint8_t>(10), "");
+
+ EXPECT_EQ(0x0010FEDCU, buffer.read<uint32_t>(6));
+ EXPECT_DEATH(buffer.read<uint32_t>(7), "");
+}
+
+TEST_F(BufferViewTest, Write) {
+ MutableBufferView buffer(bytes_.data(), bytes_.size());
+
+ buffer.write<uint32_t>(0, 0x01234567);
+ buffer.write<uint32_t>(4, 0x89ABCDEF);
+ EXPECT_EQ(ParseHexString("67 45 23 01 EF CD AB 89 10 00"),
+ std::vector<uint8_t>(buffer.begin(), buffer.end()));
+
+ buffer.write<uint8_t>(9, 0xFF);
+ EXPECT_DEATH(buffer.write<uint8_t>(10, 0xFF), "");
+
+ buffer.write<uint32_t>(6, 0xFFFFFFFF);
+ EXPECT_DEATH(buffer.write<uint32_t>(7, 0xFFFFFFFF), "");
+}
+
+TEST_F(BufferViewTest, CanAccess) {
+ MutableBufferView buffer(bytes_.data(), bytes_.size());
+ EXPECT_TRUE(buffer.can_access<uint32_t>(0));
+ EXPECT_TRUE(buffer.can_access<uint32_t>(6));
+ EXPECT_FALSE(buffer.can_access<uint32_t>(7));
+ EXPECT_FALSE(buffer.can_access<uint32_t>(10));
+ EXPECT_FALSE(buffer.can_access<uint32_t>(0xFFFFFFFFU));
+
+ EXPECT_TRUE(buffer.can_access<uint8_t>(0));
+ EXPECT_TRUE(buffer.can_access<uint8_t>(7));
+ EXPECT_TRUE(buffer.can_access<uint8_t>(9));
+ EXPECT_FALSE(buffer.can_access<uint8_t>(10));
+ EXPECT_FALSE(buffer.can_access<uint8_t>(0xFFFFFFFF));
+}
+
+TEST_F(BufferViewTest, LocalRegion) {
+ ConstBufferView view(bytes_.data(), bytes_.size());
+
+ BufferRegion region = view.local_region();
+ EXPECT_EQ(0U, region.offset);
+ EXPECT_EQ(bytes_.size(), region.size);
+}
+
+TEST_F(BufferViewTest, Covers) {
+ EXPECT_FALSE(ConstBufferView().covers({0, 0}));
+ EXPECT_FALSE(ConstBufferView().covers({0, 1}));
+
+ ConstBufferView view(bytes_.data(), bytes_.size());
+
+ EXPECT_TRUE(view.covers({0, 0}));
+ EXPECT_TRUE(view.covers({0, 1}));
+ EXPECT_TRUE(view.covers({0, bytes_.size()}));
+ EXPECT_FALSE(view.covers({0, bytes_.size() + 1}));
+ EXPECT_FALSE(view.covers({1, bytes_.size()}));
+
+ EXPECT_TRUE(view.covers({bytes_.size() - 1, 0}));
+ EXPECT_TRUE(view.covers({bytes_.size() - 1, 1}));
+ EXPECT_FALSE(view.covers({bytes_.size() - 1, 2}));
+ EXPECT_FALSE(view.covers({bytes_.size(), 0}));
+ EXPECT_FALSE(view.covers({bytes_.size(), 1}));
+
+ EXPECT_FALSE(view.covers({1, size_t(-1)}));
+ EXPECT_FALSE(view.covers({size_t(-1), 1}));
+ EXPECT_FALSE(view.covers({size_t(-1), size_t(-1)}));
+}
+
+TEST_F(BufferViewTest, Equals) {
+ // Almost identical to |bytes_|, except at 2 places: v v
+ std::vector<uint8_t> bytes2 = ParseHexString("10 32 54 76 98 AB CD FE 10 00");
+ ConstBufferView view1(bytes_.data(), bytes_.size());
+ ConstBufferView view2(&bytes2[0], bytes2.size());
+
+ EXPECT_TRUE(view1.equals(view1));
+ EXPECT_TRUE(view2.equals(view2));
+ EXPECT_FALSE(view1.equals(view2));
+ EXPECT_FALSE(view2.equals(view1));
+
+ EXPECT_TRUE((view1[{0, 0}]).equals(view2[{0, 0}]));
+ EXPECT_TRUE((view1[{0, 0}]).equals(view2[{5, 0}]));
+ EXPECT_TRUE((view1[{0, 5}]).equals(view2[{0, 5}]));
+ EXPECT_FALSE((view1[{0, 6}]).equals(view2[{0, 6}]));
+ EXPECT_FALSE((view1[{0, 7}]).equals(view1[{0, 6}]));
+ EXPECT_TRUE((view1[{5, 3}]).equals(view1[{5, 3}]));
+ EXPECT_FALSE((view1[{5, 1}]).equals(view1[{5, 3}]));
+ EXPECT_TRUE((view2[{0, 1}]).equals(view2[{8, 1}]));
+ EXPECT_FALSE((view2[{1, 1}]).equals(view2[{8, 1}]));
+}
+
+TEST_F(BufferViewTest, AlignOn) {
+ using size_type = ConstBufferView::size_type;
+ ConstBufferView image(bytes_.data(), bytes_.size());
+ ConstBufferView view = image;
+ ASSERT_EQ(10U, view.size());
+
+ auto get_pos = [&image, &view]() -> size_type {
+ EXPECT_TRUE(view.begin() >= image.begin()); // Iterator compare.
+ return static_cast<size_type>(view.begin() - image.begin());
+ };
+
+ EXPECT_EQ(0U, get_pos());
+ view.remove_prefix(1U);
+ EXPECT_EQ(1U, get_pos());
+ view.remove_prefix(4U);
+ EXPECT_EQ(5U, get_pos());
+
+ // Align.
+ EXPECT_TRUE(view.AlignOn(image, 1U)); // Trival case.
+ EXPECT_EQ(5U, get_pos());
+
+ EXPECT_TRUE(view.AlignOn(image, 2U));
+ EXPECT_EQ(6U, get_pos());
+ EXPECT_TRUE(view.AlignOn(image, 2U));
+ EXPECT_EQ(6U, get_pos());
+
+ EXPECT_TRUE(view.AlignOn(image, 4U));
+ EXPECT_EQ(8U, get_pos());
+ EXPECT_TRUE(view.AlignOn(image, 2U));
+ EXPECT_EQ(8U, get_pos());
+
+ view.remove_prefix(1U);
+ EXPECT_EQ(9U, get_pos());
+
+ // Pos is at 9, align to 4 would yield 12, but size is 10, so this fails.
+ EXPECT_FALSE(view.AlignOn(image, 4U));
+ EXPECT_EQ(9U, get_pos());
+ EXPECT_TRUE(view.AlignOn(image, 2U));
+ EXPECT_EQ(10U, get_pos());
+}
+
+} // namespace zucchini
diff --git a/crc32.cc b/crc32.cc
new file mode 100644
index 0000000..8a40296
--- /dev/null
+++ b/crc32.cc
@@ -0,0 +1,43 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/crc32.h"
+
+#include <array>
+
+#include "base/logging.h"
+
+namespace zucchini {
+
+namespace {
+
+std::array<uint32_t, 256> MakeCrc32Table() {
+ constexpr uint32_t kCrc32Poly = 0xEDB88320;
+
+ std::array<uint32_t, 256> crc32Table;
+ for (uint32_t i = 0; i < 256; ++i) {
+ uint32_t r = i;
+ for (int j = 0; j < 8; ++j)
+ r = (r >> 1) ^ (kCrc32Poly & ~((r & 1) - 1));
+ crc32Table[i] = r;
+ }
+ return crc32Table;
+}
+
+} // namespace
+
+// Minimalistic CRC-32 implementation for Zucchini usage. Adapted from LZMA SDK
+// (found at third_party/lzma_sdk/7zCrc.c), which is public domain.
+uint32_t CalculateCrc32(const uint8_t* first, const uint8_t* last) {
+ DCHECK_GE(last, first);
+
+ static const std::array<uint32_t, 256> kCrc32Table = MakeCrc32Table();
+
+ uint32_t ret = 0xFFFFFFFF;
+ for (; first != last; ++first)
+ ret = kCrc32Table[(ret ^ *first) & 0xFF] ^ (ret >> 8);
+ return ret ^ 0xFFFFFFFF;
+}
+
+} // namespace zucchini
diff --git a/crc32.h b/crc32.h
new file mode 100644
index 0000000..c729f5b
--- /dev/null
+++ b/crc32.h
@@ -0,0 +1,17 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_CRC32_H_
+#define COMPONENTS_ZUCCHINI_CRC32_H_
+
+#include <stdint.h>
+
+namespace zucchini {
+
+// Calculates CRC-32 of the given range [|first|, |last|).
+uint32_t CalculateCrc32(const uint8_t* first, const uint8_t* last);
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_CRC32_H_
diff --git a/crc32_unittest.cc b/crc32_unittest.cc
new file mode 100644
index 0000000..5ec85a8
--- /dev/null
+++ b/crc32_unittest.cc
@@ -0,0 +1,47 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/crc32.h"
+
+#include <stdint.h>
+
+#include <iterator>
+
+#include "base/test/gtest_util.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+constexpr uint8_t bytes[] = {0x10, 0x32, 0x54, 0x76, 0x98,
+ 0xBA, 0xDC, 0xFE, 0x10, 0x00};
+
+TEST(Crc32Test, All) {
+ // Results can be verified with any CRC-32 calculator found online.
+
+ // Empty region.
+ EXPECT_EQ(0x00000000U, CalculateCrc32(std::begin(bytes), std::begin(bytes)));
+
+ // Single byte.
+ EXPECT_EQ(0xCFB5FFE9U,
+ CalculateCrc32(std::begin(bytes), std::begin(bytes) + 1));
+
+ // Same byte (0x10) appearing at different location.
+ EXPECT_EQ(0xCFB5FFE9U,
+ CalculateCrc32(std::begin(bytes) + 8, std::begin(bytes) + 9));
+
+ // Single byte of 0.
+ EXPECT_EQ(0xD202EF8DU,
+ CalculateCrc32(std::begin(bytes) + 9, std::end(bytes)));
+
+ // Whole region.
+ EXPECT_EQ(0xA86FD7D6U, CalculateCrc32(std::begin(bytes), std::end(bytes)));
+
+ // Whole region excluding 0 at end.
+ EXPECT_EQ(0x0762F38BU,
+ CalculateCrc32(std::begin(bytes), std::begin(bytes) + 9));
+
+ EXPECT_DCHECK_DEATH(CalculateCrc32(std::begin(bytes) + 1, std::begin(bytes)));
+}
+
+} // namespace zucchini
diff --git a/disassembler.cc b/disassembler.cc
new file mode 100644
index 0000000..18527a7
--- /dev/null
+++ b/disassembler.cc
@@ -0,0 +1,36 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/disassembler.h"
+
+#include "base/logging.h"
+
+namespace zucchini {
+
+std::unique_ptr<ReferenceReader> ReferenceGroup::GetReader(
+ offset_t lower,
+ offset_t upper,
+ Disassembler* disasm) const {
+ DCHECK_LE(lower, upper);
+ DCHECK_LE(upper, disasm->size());
+ return (disasm->*reader_factory_)(lower, upper);
+}
+
+std::unique_ptr<ReferenceReader> ReferenceGroup::GetReader(
+ Disassembler* disasm) const {
+ return (disasm->*reader_factory_)(0, static_cast<offset_t>(disasm->size()));
+}
+
+std::unique_ptr<ReferenceWriter> ReferenceGroup::GetWriter(
+ MutableBufferView image,
+ Disassembler* disasm) const {
+ DCHECK_EQ(image.begin(), disasm->GetImage().begin());
+ DCHECK_EQ(image.size(), disasm->size());
+ return (disasm->*writer_factory_)(image);
+}
+
+Disassembler::Disassembler() = default;
+Disassembler::~Disassembler() = default;
+
+} // namespace zucchini
diff --git a/disassembler.h b/disassembler.h
new file mode 100644
index 0000000..8d41eaa
--- /dev/null
+++ b/disassembler.h
@@ -0,0 +1,133 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_DISASSEMBLER_H_
+#define COMPONENTS_ZUCCHINI_DISASSEMBLER_H_
+
+#include <stddef.h>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "base/macros.h"
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/image_utils.h"
+
+namespace zucchini {
+
+class Disassembler;
+
+// A ReferenceGroup is associated with a specific |type| and has convenience
+// methods to obtain readers and writers for that type. A ReferenceGroup does
+// not store references; it is a lightweight class that communicates with the
+// disassembler to operate on them.
+class ReferenceGroup {
+ public:
+ // Member function pointer used to obtain a ReferenceReader.
+ using ReaderFactory = std::unique_ptr<ReferenceReader> (
+ Disassembler::*)(offset_t lower, offset_t upper);
+
+ // Member function pointer used to obtain a ReferenceWriter.
+ using WriterFactory = std::unique_ptr<ReferenceWriter> (Disassembler::*)(
+ MutableBufferView image);
+
+ ReferenceGroup() = default;
+
+ // RefinedGeneratorFactory and RefinedReceptorFactory don't have to be
+ // identical to GeneratorFactory and ReceptorFactory, but they must be
+ // convertible. As a result, they can be pointer to member function of a
+ // derived Disassembler.
+ template <class RefinedReaderFactory, class RefinedWriterFactory>
+ ReferenceGroup(ReferenceTypeTraits traits,
+ RefinedReaderFactory reader_factory,
+ RefinedWriterFactory writer_factory)
+ : traits_(traits),
+ reader_factory_(static_cast<ReaderFactory>(reader_factory)),
+ writer_factory_(static_cast<WriterFactory>(writer_factory)) {}
+
+ // Returns a reader for all references in the binary.
+ // Invalidates any other writer or reader previously obtained for |disasm|.
+ std::unique_ptr<ReferenceReader> GetReader(Disassembler* disasm) const;
+
+ // Returns a reader for references whose bytes are entirely contained in
+ // |[lower, upper)|.
+ // Invalidates any other writer or reader previously obtained for |disasm|.
+ std::unique_ptr<ReferenceReader> GetReader(offset_t lower,
+ offset_t upper,
+ Disassembler* disasm) const;
+
+ // Returns a writer for references in |image|, assuming that |image| was the
+ // same one initially parsed by |disasm|.
+ // Invalidates any other writer or reader previously obtained for |disasm|.
+ std::unique_ptr<ReferenceWriter> GetWriter(MutableBufferView image,
+ Disassembler* disasm) const;
+
+ // Returns traits describing the reference type.
+ const ReferenceTypeTraits& traits() const { return traits_; }
+
+ // Shorthand for traits().width.
+ offset_t width() const { return traits().width; }
+
+ // Shorthand for traits().type_tag.
+ TypeTag type_tag() const { return traits().type_tag; }
+
+ // Shorthand for traits().pool_tag.
+ PoolTag pool_tag() const { return traits().pool_tag; }
+
+ private:
+ ReferenceTypeTraits traits_;
+ ReaderFactory reader_factory_ = nullptr;
+ WriterFactory writer_factory_ = nullptr;
+};
+
+// A Disassembler is used to encapsulate architecture specific operations, to:
+// - Describe types of references found in the architecture using traits.
+// - Extract references contained in an image file.
+// - Correct target for some references.
+class Disassembler {
+ public:
+ // Attempts to parse |image| and create an architecture-specifc Disassembler,
+ // as determined by DIS, which is inherited from Disassembler. Returns an
+ // instance of DIS if successful, and null otherwise.
+ template <class DIS>
+ static std::unique_ptr<DIS> Make(ConstBufferView image) {
+ auto disasm = std::make_unique<DIS>();
+ if (!disasm->Parse(image))
+ return nullptr;
+ return disasm;
+ }
+
+ virtual ~Disassembler();
+
+ // Returns the type of executable handled by the Disassembler.
+ virtual ExecutableType GetExeType() const = 0;
+
+ // Returns a more detailed description of the executable type.
+ virtual std::string GetExeTypeString() const = 0;
+
+ // Creates and returns a vector that contains all groups of references.
+ // Groups must be aggregated by pool.
+ virtual std::vector<ReferenceGroup> MakeReferenceGroups() const = 0;
+
+ ConstBufferView GetImage() const { return image_; }
+ size_t size() const { return image_.size(); }
+
+ protected:
+ Disassembler();
+
+ // Parses |image| and initializes internal states. Returns true on success.
+ // This must be called once and before any other operation.
+ virtual bool Parse(ConstBufferView image) = 0;
+
+ // Raw image data. After Parse(), a Disassembler should shrink this to contain
+ // only the portion containing the executable file it recognizes.
+ ConstBufferView image_;
+
+ DISALLOW_COPY_AND_ASSIGN(Disassembler);
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_DISASSEMBLER_H_
diff --git a/disassembler_no_op.cc b/disassembler_no_op.cc
new file mode 100644
index 0000000..69d0eb8
--- /dev/null
+++ b/disassembler_no_op.cc
@@ -0,0 +1,28 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/disassembler_no_op.h"
+
+namespace zucchini {
+
+DisassemblerNoOp::DisassemblerNoOp() = default;
+DisassemblerNoOp::~DisassemblerNoOp() = default;
+
+ExecutableType DisassemblerNoOp::GetExeType() const {
+ return kExeTypeNoOp;
+}
+
+std::string DisassemblerNoOp::GetExeTypeString() const {
+ return "(Unknown)";
+}
+
+std::vector<ReferenceGroup> DisassemblerNoOp::MakeReferenceGroups() const {
+ return std::vector<ReferenceGroup>();
+}
+
+bool DisassemblerNoOp::Parse(ConstBufferView image) {
+ return true;
+}
+
+} // namespace zucchini
diff --git a/disassembler_no_op.h b/disassembler_no_op.h
new file mode 100644
index 0000000..280e8df
--- /dev/null
+++ b/disassembler_no_op.h
@@ -0,0 +1,40 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_DISASSEMBLER_NO_OP_H_
+#define COMPONENTS_ZUCCHINI_DISASSEMBLER_NO_OP_H_
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "base/macros.h"
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/disassembler.h"
+#include "components/zucchini/image_utils.h"
+
+namespace zucchini {
+
+// This disassembler works on any file and does not look for reference.
+class DisassemblerNoOp : public Disassembler {
+ public:
+ DisassemblerNoOp();
+ ~DisassemblerNoOp() override;
+
+ // Disassembler:
+ ExecutableType GetExeType() const override;
+ std::string GetExeTypeString() const override;
+ std::vector<ReferenceGroup> MakeReferenceGroups() const override;
+
+ private:
+ friend Disassembler;
+
+ bool Parse(ConstBufferView image) override;
+
+ DISALLOW_COPY_AND_ASSIGN(DisassemblerNoOp);
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_DISASSEMBLER_NO_OP_H_
diff --git a/disassembler_win32.cc b/disassembler_win32.cc
new file mode 100644
index 0000000..5bdc503
--- /dev/null
+++ b/disassembler_win32.cc
@@ -0,0 +1,392 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/disassembler_win32.h"
+
+#include <stddef.h>
+
+#include <algorithm>
+
+#include "base/logging.h"
+#include "base/numerics/safe_conversions.h"
+#include "components/zucchini/abs32_utils.h"
+#include "components/zucchini/algorithm.h"
+#include "components/zucchini/buffer_source.h"
+#include "components/zucchini/rel32_finder.h"
+#include "components/zucchini/rel32_utils.h"
+#include "components/zucchini/reloc_utils.h"
+
+namespace zucchini {
+
+namespace {
+
+// Decides whether |image| points to a Win32 PE file. If this is a possibility,
+// assigns |source| to enable further parsing, and returns true. Otherwise
+// leaves |source| at an undefined state and returns false.
+template <class Traits>
+bool ReadWin32Header(ConstBufferView image, BufferSource* source) {
+ *source = BufferSource(image);
+
+ // Check "MZ" magic of DOS header.
+ if (!source->CheckNextBytes({'M', 'Z'}))
+ return false;
+
+ const auto* dos_header = source->GetPointer<pe::ImageDOSHeader>();
+ if (!dos_header || (dos_header->e_lfanew & 7) != 0)
+ return false;
+
+ // Offset to PE header is in DOS header.
+ *source = std::move(BufferSource(image).Skip(dos_header->e_lfanew));
+ // Check 'PE\0\0' magic from PE header.
+ if (!source->ConsumeBytes({'P', 'E', 0, 0}))
+ return false;
+
+ return true;
+}
+
+template <class Traits>
+const pe::ImageDataDirectory* ReadDataDirectory(
+ const typename Traits::ImageOptionalHeader* optional_header,
+ size_t index) {
+ if (index >= optional_header->number_of_rva_and_sizes)
+ return nullptr;
+ return &optional_header->data_directory[index];
+}
+
+// Decides whether |section| (assumed value) is a section that contains code.
+template <class Traits>
+bool IsWin32CodeSection(const pe::ImageSectionHeader& section) {
+ return (section.characteristics & kCodeCharacteristics) ==
+ kCodeCharacteristics;
+}
+
+} // namespace
+
+/******** Win32X86Traits ********/
+
+// static
+constexpr Bitness Win32X86Traits::kBitness;
+constexpr ExecutableType Win32X86Traits::kExeType;
+const char Win32X86Traits::kExeTypeString[] = "Windows PE x86";
+
+/******** Win32X64Traits ********/
+
+// static
+constexpr Bitness Win32X64Traits::kBitness;
+constexpr ExecutableType Win32X64Traits::kExeType;
+const char Win32X64Traits::kExeTypeString[] = "Windows PE x64";
+
+/******** DisassemblerWin32 ********/
+
+// static.
+template <class Traits>
+bool DisassemblerWin32<Traits>::QuickDetect(ConstBufferView image) {
+ BufferSource source;
+ return ReadWin32Header<Traits>(image, &source);
+}
+
+template <class Traits>
+DisassemblerWin32<Traits>::DisassemblerWin32() = default;
+
+template <class Traits>
+DisassemblerWin32<Traits>::~DisassemblerWin32() = default;
+
+template <class Traits>
+ExecutableType DisassemblerWin32<Traits>::GetExeType() const {
+ return Traits::kExeType;
+}
+
+template <class Traits>
+std::string DisassemblerWin32<Traits>::GetExeTypeString() const {
+ return Traits::kExeTypeString;
+}
+
+template <class Traits>
+std::vector<ReferenceGroup> DisassemblerWin32<Traits>::MakeReferenceGroups()
+ const {
+ return {
+ {ReferenceTypeTraits{2, TypeTag(kReloc), PoolTag(kReloc)},
+ &DisassemblerWin32::MakeReadRelocs, &DisassemblerWin32::MakeWriteRelocs},
+ {ReferenceTypeTraits{Traits::kVAWidth, TypeTag(kAbs32), PoolTag(kAbs32)},
+ &DisassemblerWin32::MakeReadAbs32, &DisassemblerWin32::MakeWriteAbs32},
+ {ReferenceTypeTraits{4, TypeTag(kRel32), PoolTag(kRel32)},
+ &DisassemblerWin32::MakeReadRel32, &DisassemblerWin32::MakeWriteRel32},
+ };
+}
+
+template <class Traits>
+std::unique_ptr<ReferenceReader> DisassemblerWin32<Traits>::MakeReadRelocs(
+ offset_t lo,
+ offset_t hi) {
+ ParseAndStoreRelocBlocks();
+
+ RelocRvaReaderWin32 reloc_rva_reader(image_, reloc_region_,
+ reloc_block_offsets_, lo, hi);
+ CHECK_GE(image_.size(), Traits::kVAWidth);
+ offset_t offset_bound =
+ base::checked_cast<offset_t>(image_.size() - Traits::kVAWidth + 1);
+ return std::make_unique<RelocReaderWin32>(std::move(reloc_rva_reader),
+ Traits::kRelocType, offset_bound,
+ translator_);
+}
+
+template <class Traits>
+std::unique_ptr<ReferenceReader> DisassemblerWin32<Traits>::MakeReadAbs32(
+ offset_t lo,
+ offset_t hi) {
+ ParseAndStoreAbs32();
+ Abs32RvaExtractorWin32 abs_rva_extractor(
+ image_, {Traits::kBitness, image_base_}, abs32_locations_, lo, hi);
+ return std::make_unique<Abs32ReaderWin32>(std::move(abs_rva_extractor),
+ translator_);
+}
+
+template <class Traits>
+std::unique_ptr<ReferenceReader> DisassemblerWin32<Traits>::MakeReadRel32(
+ offset_t lo,
+ offset_t hi) {
+ ParseAndStoreRel32();
+ return std::make_unique<Rel32ReaderX86>(image_, lo, hi, &rel32_locations_,
+ translator_);
+}
+
+template <class Traits>
+std::unique_ptr<ReferenceWriter> DisassemblerWin32<Traits>::MakeWriteRelocs(
+ MutableBufferView image) {
+ ParseAndStoreRelocBlocks();
+ return std::make_unique<RelocWriterWin32>(Traits::kRelocType, image,
+ reloc_region_, reloc_block_offsets_,
+ translator_);
+}
+
+template <class Traits>
+std::unique_ptr<ReferenceWriter> DisassemblerWin32<Traits>::MakeWriteAbs32(
+ MutableBufferView image) {
+ return std::make_unique<Abs32WriterWin32>(
+ image, AbsoluteAddress(Traits::kBitness, image_base_), translator_);
+}
+
+template <class Traits>
+std::unique_ptr<ReferenceWriter> DisassemblerWin32<Traits>::MakeWriteRel32(
+ MutableBufferView image) {
+ return std::make_unique<Rel32WriterX86>(image, translator_);
+}
+
+template <class Traits>
+bool DisassemblerWin32<Traits>::Parse(ConstBufferView image) {
+ image_ = image;
+ return ParseHeader();
+}
+
+template <class Traits>
+bool DisassemblerWin32<Traits>::ParseHeader() {
+ BufferSource source;
+
+ if (!ReadWin32Header<Traits>(image_, &source))
+ return false;
+
+ auto* coff_header = source.GetPointer<pe::ImageFileHeader>();
+ if (!coff_header ||
+ coff_header->size_of_optional_header <
+ offsetof(typename Traits::ImageOptionalHeader, data_directory)) {
+ return false;
+ }
+
+ auto* optional_header =
+ source.GetPointer<typename Traits::ImageOptionalHeader>();
+ if (!optional_header || optional_header->magic != Traits::kMagic)
+ return false;
+
+ const size_t kDataDirBase =
+ offsetof(typename Traits::ImageOptionalHeader, data_directory);
+ size_t size_of_optional_header = coff_header->size_of_optional_header;
+ if (size_of_optional_header < kDataDirBase)
+ return false;
+
+ const size_t data_dir_bound =
+ (size_of_optional_header - kDataDirBase) / sizeof(pe::ImageDataDirectory);
+ if (optional_header->number_of_rva_and_sizes > data_dir_bound)
+ return false;
+
+ base_relocation_table_ = ReadDataDirectory<Traits>(
+ optional_header, pe::kIndexOfBaseRelocationTable);
+ if (!base_relocation_table_)
+ return false;
+
+ image_base_ = optional_header->image_base;
+
+ // |optional_header->size_of_image| is the size of the image when loaded into
+ // memory, and not the actual size on disk.
+ rva_t rva_bound = optional_header->size_of_image;
+ if (rva_bound >= kRvaBound)
+ return false;
+
+ // An exclusive upper bound of all offsets used in the image. This gets
+ // updated as sections get visited.
+ offset_t offset_bound =
+ base::checked_cast<offset_t>(source.begin() - image_.begin());
+
+ // Extract |sections_|.
+ size_t sections_count = coff_header->number_of_sections;
+ auto* sections_array =
+ source.GetArray<pe::ImageSectionHeader>(sections_count);
+ if (!sections_array)
+ return false;
+ sections_.assign(sections_array, sections_array + sections_count);
+
+ // Prepare |units| for offset-RVA translation.
+ std::vector<AddressTranslator::Unit> units;
+ units.reserve(sections_count);
+
+ // Visit each section, validate, and add address translation data to |units|.
+ bool has_text_section = false;
+ decltype(pe::ImageSectionHeader::virtual_address) prev_virtual_address = 0;
+ for (size_t i = 0; i < sections_count; ++i) {
+ const pe::ImageSectionHeader& section = sections_[i];
+ // Apply strict checks on section bounds.
+ if (!image_.covers(
+ {section.file_offset_of_raw_data, section.size_of_raw_data})) {
+ return false;
+ }
+ if (!RangeIsBounded(section.virtual_address, section.virtual_size,
+ rva_bound)) {
+ return false;
+ }
+
+ // PE sections should be sorted by RVAs. For robustness, we don't rely on
+ // this, so even if unsorted we don't care. Output warning though.
+ if (prev_virtual_address > section.virtual_address)
+ LOG(WARNING) << "RVA anomaly found for Section " << i;
+ prev_virtual_address = section.virtual_address;
+
+ // Add |section| data for offset-RVA translation.
+ units.push_back({section.file_offset_of_raw_data, section.size_of_raw_data,
+ section.virtual_address, section.virtual_size});
+
+ offset_t end_offset =
+ section.file_offset_of_raw_data + section.size_of_raw_data;
+ offset_bound = std::max(end_offset, offset_bound);
+ if (IsWin32CodeSection<Traits>(section))
+ has_text_section = true;
+ }
+
+ if (offset_bound > image_.size())
+ return false;
+ if (!has_text_section)
+ return false;
+
+ // Initialize |translator_| for offset-RVA translations. Any inconsistency
+ // (e.g., 2 offsets correspond to the same RVA) would invalidate the PE file.
+ if (translator_.Initialize(std::move(units)) != AddressTranslator::kSuccess)
+ return false;
+
+ // Resize |image_| to include only contents claimed by sections. Note that
+ // this may miss digital signatures at end of PE files, but for patching this
+ // is of minor concern.
+ image_.shrink(offset_bound);
+
+ return true;
+}
+
+template <class Traits>
+bool DisassemblerWin32<Traits>::ParseAndStoreRelocBlocks() {
+ if (has_parsed_relocs_)
+ return true;
+ has_parsed_relocs_ = true;
+ DCHECK(reloc_block_offsets_.empty());
+
+ offset_t relocs_offset =
+ translator_.RvaToOffset(base_relocation_table_->virtual_address);
+ size_t relocs_size = base_relocation_table_->size;
+ reloc_region_ = {relocs_offset, relocs_size};
+ // Reject bogus relocs. Note that empty relocs are allowed!
+ if (!image_.covers(reloc_region_))
+ return false;
+
+ // Precompute offsets of all reloc blocks.
+ return RelocRvaReaderWin32::FindRelocBlocks(image_, reloc_region_,
+ &reloc_block_offsets_);
+}
+
+// TODO(huangs): Print warning if too few abs32 references are found.
+// Empirically, file size / # relocs is < 100, so take 200 as the
+// threshold for warning.
+template <class Traits>
+bool DisassemblerWin32<Traits>::ParseAndStoreAbs32() {
+ if (has_parsed_abs32_)
+ return true;
+ has_parsed_abs32_ = true;
+
+ ParseAndStoreRelocBlocks();
+
+ std::unique_ptr<ReferenceReader> relocs = MakeReadRelocs(0, offset_t(size()));
+ for (auto ref = relocs->GetNext(); ref.has_value(); ref = relocs->GetNext())
+ abs32_locations_.push_back(ref->target);
+
+ abs32_locations_.shrink_to_fit();
+ std::sort(abs32_locations_.begin(), abs32_locations_.end());
+
+ // Abs32 reference bodies must not overlap. If found, simply remove them.
+ size_t num_removed =
+ RemoveOverlappingAbs32Locations(Traits::kBitness, &abs32_locations_);
+ LOG_IF(WARNING, num_removed) << "Found and removed " << num_removed
+ << " abs32 locations with overlapping bodies.";
+ return true;
+}
+
+template <class Traits>
+bool DisassemblerWin32<Traits>::ParseAndStoreRel32() {
+ if (has_parsed_rel32_)
+ return true;
+ has_parsed_rel32_ = true;
+
+ ParseAndStoreAbs32();
+
+ AddressTranslator::OffsetToRvaCache location_offset_to_rva(translator_);
+ AddressTranslator::RvaToOffsetCache target_rva_checker(translator_);
+
+ for (const pe::ImageSectionHeader& section : sections_) {
+ if (!IsWin32CodeSection<Traits>(section))
+ continue;
+
+ rva_t start_rva = section.virtual_address;
+ rva_t end_rva = start_rva + section.virtual_size;
+
+ ConstBufferView region =
+ image_[{section.file_offset_of_raw_data, section.size_of_raw_data}];
+ Abs32GapFinder gap_finder(image_, region, abs32_locations_,
+ Traits::kVAWidth);
+ typename Traits::RelFinder finder(image_);
+ // Iterate over gaps between abs32 references, to avoid collision.
+ for (auto gap = gap_finder.GetNext(); gap.has_value();
+ gap = gap_finder.GetNext()) {
+ finder.Reset(gap.value());
+ // Iterate over heuristically detected rel32 references, validate, and add
+ // to |rel32_locations_|.
+ for (auto rel32 = finder.GetNext(); rel32.has_value();
+ rel32 = finder.GetNext()) {
+ offset_t rel32_offset = offset_t(rel32->location - image_.begin());
+ rva_t rel32_rva = location_offset_to_rva.Convert(rel32_offset);
+ rva_t target_rva = rel32_rva + 4 + image_.read<uint32_t>(rel32_offset);
+ if (target_rva_checker.IsValid(target_rva) &&
+ (rel32->can_point_outside_section ||
+ (start_rva <= target_rva && target_rva < end_rva))) {
+ finder.Accept();
+ rel32_locations_.push_back(rel32_offset);
+ }
+ }
+ }
+ }
+ rel32_locations_.shrink_to_fit();
+ // |sections_| entries are usually sorted by offset, but there's no guarantee.
+ // So sort explicitly, to be sure.
+ std::sort(rel32_locations_.begin(), rel32_locations_.end());
+ return true;
+}
+
+// Explicit instantiation for supported classes.
+template class DisassemblerWin32<Win32X86Traits>;
+template class DisassemblerWin32<Win32X64Traits>;
+
+} // namespace zucchini
diff --git a/disassembler_win32.h b/disassembler_win32.h
new file mode 100644
index 0000000..8e410ee
--- /dev/null
+++ b/disassembler_win32.h
@@ -0,0 +1,129 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_DISASSEMBLER_WIN32_H_
+#define COMPONENTS_ZUCCHINI_DISASSEMBLER_WIN32_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "base/macros.h"
+#include "components/zucchini/address_translator.h"
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/disassembler.h"
+#include "components/zucchini/image_utils.h"
+#include "components/zucchini/type_win_pe.h"
+
+namespace zucchini {
+
+class Rel32FinderX86;
+class Rel32FinderX64;
+
+struct Win32X86Traits {
+ static constexpr Bitness kBitness = kBit32;
+ static constexpr ExecutableType kExeType = kExeTypeWin32X86;
+ enum : uint16_t { kMagic = 0x10B };
+ enum : uint16_t { kRelocType = 3 };
+ enum : offset_t { kVAWidth = 4 };
+ static const char kExeTypeString[];
+
+ using ImageOptionalHeader = pe::ImageOptionalHeader;
+ using RelFinder = Rel32FinderX86;
+ using Address = uint32_t;
+};
+
+struct Win32X64Traits {
+ static constexpr Bitness kBitness = kBit64;
+ static constexpr ExecutableType kExeType = kExeTypeWin32X64;
+ enum : uint16_t { kMagic = 0x20B };
+ enum : uint16_t { kRelocType = 10 };
+ enum : offset_t { kVAWidth = 8 };
+ static const char kExeTypeString[];
+
+ using ImageOptionalHeader = pe::ImageOptionalHeader64;
+ using RelFinder = Rel32FinderX64;
+ using Address = uint64_t;
+};
+
+template <class Traits>
+class DisassemblerWin32 : public Disassembler {
+ public:
+ enum ReferenceType : uint8_t { kReloc, kAbs32, kRel32, kTypeCount };
+
+ // Applies quick checks to determine whether |image| *may* point to the start
+ // of an executable. Returns true iff the check passes.
+ static bool QuickDetect(ConstBufferView image);
+
+ DisassemblerWin32();
+ ~DisassemblerWin32() override;
+
+ // Disassembler:
+ ExecutableType GetExeType() const override;
+ std::string GetExeTypeString() const override;
+ std::vector<ReferenceGroup> MakeReferenceGroups() const override;
+
+ // Functions that return reader / writer for references.
+ std::unique_ptr<ReferenceReader> MakeReadRelocs(offset_t lo, offset_t hi);
+ std::unique_ptr<ReferenceReader> MakeReadAbs32(offset_t lo, offset_t hi);
+ std::unique_ptr<ReferenceReader> MakeReadRel32(offset_t lo, offset_t hi);
+ std::unique_ptr<ReferenceWriter> MakeWriteRelocs(MutableBufferView image);
+ std::unique_ptr<ReferenceWriter> MakeWriteAbs32(MutableBufferView image);
+ std::unique_ptr<ReferenceWriter> MakeWriteRel32(MutableBufferView image);
+
+ private:
+ friend Disassembler;
+
+ // Disassembler:
+ bool Parse(ConstBufferView image) override;
+
+ // Parses the file header. Returns true iff successful.
+ bool ParseHeader();
+
+ // Parsers to extract references. These are lazily called, and return whether
+ // parsing was successful (failures are non-fatal).
+ bool ParseAndStoreRelocBlocks();
+ bool ParseAndStoreAbs32();
+ bool ParseAndStoreRel32();
+
+ // In-memory copy of sections.
+ std::vector<pe::ImageSectionHeader> sections_;
+
+ // Image base address to translate between RVA and VA.
+ typename Traits::Address image_base_ = 0;
+
+ // Pointer to data Directory entry of the relocation table.
+ const pe::ImageDataDirectory* base_relocation_table_ = nullptr;
+
+ // Translator between offsets and RVAs.
+ AddressTranslator translator_;
+
+ // Reference storage.
+ BufferRegion reloc_region_;
+ std::vector<offset_t> reloc_block_offsets_;
+ offset_t reloc_end_ = 0;
+ std::vector<offset_t> abs32_locations_;
+ std::vector<offset_t> rel32_locations_;
+
+ // Initialization states of reference storage, used for lazy initialization.
+ // TODO(huangs): Investigate whether lazy initialization is useful for memory
+ // reduction. This is a carryover from Courgette. To be sure we should run
+ // experiment after Zucchini is able to do ensemble patching.
+ bool has_parsed_relocs_ = false;
+ bool has_parsed_abs32_ = false;
+ bool has_parsed_rel32_ = false;
+
+ DISALLOW_COPY_AND_ASSIGN(DisassemblerWin32);
+};
+
+using DisassemblerWin32X86 = DisassemblerWin32<Win32X86Traits>;
+using DisassemblerWin32X64 = DisassemblerWin32<Win32X64Traits>;
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_DISASSEMBLER_WIN32_H_
diff --git a/element_detection.cc b/element_detection.cc
new file mode 100644
index 0000000..d6bba5f
--- /dev/null
+++ b/element_detection.cc
@@ -0,0 +1,84 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/element_detection.h"
+
+#include <utility>
+
+#include "base/logging.h"
+#include "components/zucchini/disassembler.h"
+#include "components/zucchini/disassembler_no_op.h"
+#include "components/zucchini/disassembler_win32.h"
+
+namespace zucchini {
+
+namespace {
+
+// Impose a minimal program size to eliminate pathological cases.
+constexpr size_t kMinProgramSize = 16;
+
+} // namespace
+
+/******** Utility Functions ********/
+
+std::unique_ptr<Disassembler> MakeDisassemblerWithoutFallback(
+ ConstBufferView image) {
+ if (DisassemblerWin32X86::QuickDetect(image)) {
+ auto disasm = Disassembler::Make<DisassemblerWin32X86>(image);
+ if (disasm && disasm->size() >= kMinProgramSize)
+ return disasm;
+ }
+
+ if (DisassemblerWin32X64::QuickDetect(image)) {
+ auto disasm = Disassembler::Make<DisassemblerWin32X64>(image);
+ if (disasm && disasm->size() >= kMinProgramSize)
+ return disasm;
+ }
+
+ return nullptr;
+}
+
+std::unique_ptr<Disassembler> MakeDisassemblerOfType(ConstBufferView image,
+ ExecutableType exe_type) {
+ switch (exe_type) {
+ case kExeTypeWin32X86:
+ return Disassembler::Make<DisassemblerWin32X86>(image);
+ case kExeTypeWin32X64:
+ return Disassembler::Make<DisassemblerWin32X64>(image);
+ case kExeTypeNoOp:
+ return Disassembler::Make<DisassemblerNoOp>(image);
+ default:
+ return nullptr;
+ }
+}
+
+base::Optional<Element> DetectElementFromDisassembler(ConstBufferView image) {
+ std::unique_ptr<Disassembler> disasm = MakeDisassemblerWithoutFallback(image);
+ if (disasm)
+ return Element({0, disasm->size()}, disasm->GetExeType());
+ return base::nullopt;
+}
+
+/******** ProgramScanner ********/
+
+ElementFinder::ElementFinder(ConstBufferView image, ElementDetector&& detector)
+ : image_(image), detector_(std::move(detector)) {}
+
+ElementFinder::~ElementFinder() = default;
+
+base::Optional<Element> ElementFinder::GetNext() {
+ for (; pos_ < image_.size(); ++pos_) {
+ ConstBufferView test_image =
+ ConstBufferView::FromRange(image_.begin() + pos_, image_.end());
+ base::Optional<Element> element = detector_.Run(test_image);
+ if (element) {
+ element->offset += pos_;
+ pos_ = element->EndOffset();
+ return element;
+ }
+ }
+ return base::nullopt;
+}
+
+} // namespace zucchini
diff --git a/element_detection.h b/element_detection.h
new file mode 100644
index 0000000..f90c033
--- /dev/null
+++ b/element_detection.h
@@ -0,0 +1,60 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_ELEMENT_DETECTION_H_
+#define COMPONENTS_ZUCCHINI_ELEMENT_DETECTION_H_
+
+#include <stddef.h>
+
+#include <memory>
+
+#include "base/callback.h"
+#include "base/macros.h"
+#include "base/optional.h"
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/image_utils.h"
+
+namespace zucchini {
+
+class Disassembler;
+
+// Attempts to detect an executable located at start of |image|. If found,
+// returns the corresponding disassembler. Otherwise returns null.
+std::unique_ptr<Disassembler> MakeDisassemblerWithoutFallback(
+ ConstBufferView image);
+
+// Attempts to create a disassembler corresponding to |exe_type| and initialize
+// it with |image|, On failure, returns null.
+std::unique_ptr<Disassembler> MakeDisassemblerOfType(ConstBufferView image,
+ ExecutableType exe_type);
+
+// Attempts to detect an element associated with |image| and returns it, or
+// returns nullopt if no element is detected.
+using ElementDetector =
+ base::RepeatingCallback<base::Optional<Element>(ConstBufferView image)>;
+
+// Implementation of ElementDetector using disassemblers.
+base::Optional<Element> DetectElementFromDisassembler(ConstBufferView image);
+
+// A class to scan through an image and iteratively detect elements.
+class ElementFinder {
+ public:
+ ElementFinder(ConstBufferView image, ElementDetector&& detector);
+ ~ElementFinder();
+
+ // Scans for the next executable using |detector|. Returns the next element
+ // found, or nullopt if no more element can be found.
+ base::Optional<Element> GetNext();
+
+ private:
+ ConstBufferView image_;
+ ElementDetector detector_;
+ offset_t pos_ = 0;
+
+ DISALLOW_COPY_AND_ASSIGN(ElementFinder);
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_ELEMENT_DETECTION_H_
diff --git a/element_detection_unittest.cc b/element_detection_unittest.cc
new file mode 100644
index 0000000..2200c0b
--- /dev/null
+++ b/element_detection_unittest.cc
@@ -0,0 +1,78 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/element_detection.h"
+
+#include <vector>
+
+#include "base/bind.h"
+#include "components/zucchini/buffer_view.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+namespace {
+
+using ElementVector = std::vector<Element>;
+
+} // namespace
+
+TEST(ElementDetectionTest, ElementFinderEmpty) {
+ std::vector<uint8_t> buffer(10, 0);
+ ElementFinder finder(
+ ConstBufferView(buffer.data(), buffer.size()),
+ base::BindRepeating([](ConstBufferView image) -> base::Optional<Element> {
+ return base::nullopt;
+ }));
+ EXPECT_EQ(base::nullopt, finder.GetNext());
+}
+
+ElementVector TestElementFinder(std::vector<uint8_t> buffer) {
+ ConstBufferView image(buffer.data(), buffer.size());
+
+ ElementFinder finder(
+ image,
+ base::BindRepeating(
+ [](ConstBufferView image,
+ ConstBufferView region) -> base::Optional<Element> {
+ EXPECT_GE(region.begin(), image.begin());
+ EXPECT_LE(region.end(), image.end());
+ EXPECT_GE(region.size(), 0U);
+
+ if (region[0] != 0) {
+ offset_t length = 1;
+ while (length < region.size() && region[length] == region[0])
+ ++length;
+ return Element{{0, length},
+ static_cast<ExecutableType>(region[0])};
+ }
+ return base::nullopt;
+ },
+ image));
+ std::vector<Element> elements;
+ for (auto element = finder.GetNext(); element; element = finder.GetNext()) {
+ elements.push_back(*element);
+ }
+ return elements;
+}
+
+TEST(ElementDetectionTest, ElementFinder) {
+ EXPECT_EQ(ElementVector(), TestElementFinder({}));
+ EXPECT_EQ(ElementVector(), TestElementFinder({0, 0}));
+ EXPECT_EQ(ElementVector({{{0, 2}, kExeTypeWin32X86}}),
+ TestElementFinder({1, 1}));
+ EXPECT_EQ(
+ ElementVector({{{0, 2}, kExeTypeWin32X86}, {{2, 2}, kExeTypeWin32X64}}),
+ TestElementFinder({1, 1, 2, 2}));
+ EXPECT_EQ(ElementVector({{{1, 2}, kExeTypeWin32X86}}),
+ TestElementFinder({0, 1, 1, 0}));
+ EXPECT_EQ(
+ ElementVector({{{1, 2}, kExeTypeWin32X86}, {{3, 3}, kExeTypeWin32X64}}),
+ TestElementFinder({0, 1, 1, 2, 2, 2}));
+ EXPECT_EQ(
+ ElementVector({{{1, 2}, kExeTypeWin32X86}, {{4, 3}, kExeTypeWin32X64}}),
+ TestElementFinder({0, 1, 1, 0, 2, 2, 2}));
+}
+
+} // namespace zucchini
diff --git a/encoded_view.cc b/encoded_view.cc
new file mode 100644
index 0000000..5b55b51
--- /dev/null
+++ b/encoded_view.cc
@@ -0,0 +1,77 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/encoded_view.h"
+
+#include <algorithm>
+#include <utility>
+
+#include "base/logging.h"
+
+namespace zucchini {
+
+EncodedView::EncodedView(const ImageIndex& image_index)
+ : image_index_(image_index), pool_infos_(image_index.PoolCount()) {}
+EncodedView::~EncodedView() = default;
+
+EncodedView::value_type EncodedView::Projection(offset_t location) const {
+ DCHECK_LT(location, image_index_.size());
+
+ // Find out what lies at |location|.
+ TypeTag type = image_index_.LookupType(location);
+
+ // |location| points into raw data.
+ if (type == kNoTypeTag) {
+ // The projection is the identity function on raw content.
+ return image_index_.GetRawValue(location);
+ }
+
+ // |location| points into a Reference.
+ const ReferenceSet& ref_set = image_index_.refs(type);
+ IndirectReference ref = ref_set.at(location);
+ DCHECK_GE(location, ref.location);
+ DCHECK_LT(location, ref.location + ref_set.width());
+
+ // |location| is not the first byte of the reference.
+ if (location != ref.location) {
+ // Trailing bytes of a reference are all projected to the same value.
+ return kReferencePaddingProjection;
+ }
+
+ PoolTag pool_tag = ref_set.pool_tag();
+
+ // Targets with an associated Label will use its Label index in projection.
+ DCHECK_EQ(image_index_.pool(pool_tag).size(),
+ pool_infos_[pool_tag.value()].labels.size());
+ uint32_t label = pool_infos_[pool_tag.value()].labels[ref.target_key];
+
+ // Projection is done on (|target|, |type|), shifted by
+ // kBaseReferenceProjection to avoid collisions with raw content.
+ value_type projection = label;
+ projection *= image_index_.TypeCount();
+ projection += type.value();
+ return projection + kBaseReferenceProjection;
+}
+
+size_t EncodedView::Cardinality() const {
+ size_t max_width = 0;
+ for (const auto& pool_info : pool_infos_)
+ max_width = std::max(max_width, pool_info.bound);
+ return max_width * image_index_.TypeCount() + kBaseReferenceProjection;
+}
+
+void EncodedView::SetLabels(PoolTag pool,
+ std::vector<uint32_t>&& labels,
+ size_t bound) {
+ DCHECK_EQ(labels.size(), image_index_.pool(pool).size());
+ DCHECK(labels.empty() || *max_element(labels.begin(), labels.end()) < bound);
+ pool_infos_[pool.value()].labels = std::move(labels);
+ pool_infos_[pool.value()].bound = bound;
+}
+
+EncodedView::PoolInfo::PoolInfo() = default;
+EncodedView::PoolInfo::PoolInfo(PoolInfo&&) = default;
+EncodedView::PoolInfo::~PoolInfo() = default;
+
+} // namespace zucchini
diff --git a/encoded_view.h b/encoded_view.h
new file mode 100644
index 0000000..7ecf59e
--- /dev/null
+++ b/encoded_view.h
@@ -0,0 +1,182 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_ENCODED_VIEW_H_
+#define COMPONENTS_ZUCCHINI_ENCODED_VIEW_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <iterator>
+#include <vector>
+
+#include "base/macros.h"
+#include "components/zucchini/image_index.h"
+#include "components/zucchini/image_utils.h"
+
+namespace zucchini {
+
+// Zucchini-gen performs semantics-aware matching:
+// - Same-typed reference target in "old" and "new" can be associated.
+// Associated targets are assigned an identifier called "label" (and for
+// unassociated targets, label = 0).
+// - EncodedView maps each offset in "old" and "new" images to a "projected
+// value", which can be:
+// - Raw byte value (0-255) for non-references.
+// - Reference "projected value" (> 256) that depends on target {type, label}
+// at each reference's location (byte 0).
+// - Reference padding value (256) at the body of each reference (bytes 1+).
+// - The projected values for "old" and "new" are used to build the equivalence
+// map.
+
+constexpr size_t kReferencePaddingProjection = 256;
+constexpr size_t kBaseReferenceProjection = 257;
+
+// A Range (providing begin and end iterators) that adapts ImageIndex to make
+// image data appear as an Encoded Image, that is encoded data under a higher
+// level of abstraction than raw bytes. In particular:
+// - First byte of each reference become a projection of its type and label.
+// - Subsequent bytes of each reference becomes |kReferencePaddingProjection|.
+// - Non-reference raw bytes remain as raw bytes.
+class EncodedView {
+ public:
+ // RandomAccessIterator whose values are the results of Projection().
+ class Iterator {
+ public:
+ using iterator_category = std::random_access_iterator_tag;
+ using value_type = size_t;
+ using difference_type = ptrdiff_t;
+ using reference = size_t;
+ using pointer = size_t*;
+
+ Iterator(const EncodedView& encoded_view, difference_type pos)
+ : encoded_view_(encoded_view), pos_(pos) {}
+
+ value_type operator*() const {
+ return encoded_view_.Projection(static_cast<offset_t>(pos_));
+ }
+
+ value_type operator[](difference_type n) const {
+ return encoded_view_.Projection(static_cast<offset_t>(pos_ + n));
+ }
+
+ Iterator& operator++() {
+ ++pos_;
+ return *this;
+ }
+
+ Iterator operator++(int) {
+ Iterator tmp = *this;
+ ++pos_;
+ return tmp;
+ }
+
+ Iterator& operator--() {
+ --pos_;
+ return *this;
+ }
+
+ Iterator operator--(int) {
+ Iterator tmp = *this;
+ --pos_;
+ return tmp;
+ }
+
+ Iterator& operator+=(difference_type n) {
+ pos_ += n;
+ return *this;
+ }
+
+ Iterator& operator-=(difference_type n) {
+ pos_ -= n;
+ return *this;
+ }
+
+ friend bool operator==(Iterator a, Iterator b) { return a.pos_ == b.pos_; }
+
+ friend bool operator!=(Iterator a, Iterator b) { return !(a == b); }
+
+ friend bool operator<(Iterator a, Iterator b) { return a.pos_ < b.pos_; }
+
+ friend bool operator>(Iterator a, Iterator b) { return b < a; }
+
+ friend bool operator<=(Iterator a, Iterator b) { return !(b < a); }
+
+ friend bool operator>=(Iterator a, Iterator b) { return !(a < b); }
+
+ friend difference_type operator-(Iterator a, Iterator b) {
+ return a.pos_ - b.pos_;
+ }
+
+ friend Iterator operator+(Iterator it, difference_type n) {
+ it += n;
+ return it;
+ }
+
+ friend Iterator operator-(Iterator it, difference_type n) {
+ it -= n;
+ return it;
+ }
+
+ private:
+ const EncodedView& encoded_view_;
+ difference_type pos_;
+ };
+
+ using value_type = size_t;
+ using size_type = offset_t;
+ using difference_type = ptrdiff_t;
+ using const_iterator = Iterator;
+
+ // |image_index| is the annotated image being adapted, and is required to
+ // remain valid for the lifetime of the object.
+ explicit EncodedView(const ImageIndex& image_index);
+ ~EncodedView();
+
+ // Projects |location| to a scalar value that describes the content at a
+ // higher level of abstraction.
+ value_type Projection(offset_t location) const;
+
+ bool IsToken(offset_t location) const {
+ return image_index_.IsToken(location);
+ }
+
+ // Returns the cardinality of the projection, i.e., the upper bound on
+ // values returned by Projection().
+ value_type Cardinality() const;
+
+ // Associates |labels| to targets for a given |pool|, replacing previous
+ // association. Values in |labels| must be smaller than |bound|.
+ void SetLabels(PoolTag pool, std::vector<uint32_t>&& labels, size_t bound);
+ const ImageIndex& image_index() const { return image_index_; }
+
+ // Range functions.
+ size_type size() const { return size_type(image_index_.size()); }
+ const_iterator begin() const {
+ return const_iterator{*this, difference_type(0)};
+ }
+ const_iterator end() const {
+ return const_iterator{*this, difference_type(size())};
+ }
+
+ private:
+ struct PoolInfo {
+ PoolInfo();
+ PoolInfo(PoolInfo&&);
+ ~PoolInfo();
+
+ // |labels| translates IndirectReference target_key to label.
+ std::vector<uint32_t> labels;
+ size_t bound = 0;
+ };
+
+ const ImageIndex& image_index_;
+ std::vector<PoolInfo> pool_infos_;
+
+ DISALLOW_COPY_AND_ASSIGN(EncodedView);
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_ENCODED_VIEW_H_
diff --git a/encoded_view_unittest.cc b/encoded_view_unittest.cc
new file mode 100644
index 0000000..96d9dc4
--- /dev/null
+++ b/encoded_view_unittest.cc
@@ -0,0 +1,202 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/encoded_view.h"
+
+#include <iterator>
+#include <numeric>
+#include <vector>
+
+#include "components/zucchini/image_index.h"
+#include "components/zucchini/test_disassembler.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+namespace {
+
+constexpr size_t PADDING = kReferencePaddingProjection;
+
+template <class It1, class It2>
+void TestInputIterator(It1 first_expected,
+ It1 last_expected,
+ It2 first_input,
+ It2 last_input) {
+ while (first_expected != last_expected && first_input != last_input) {
+ EXPECT_EQ(*first_expected, *first_input);
+ ++first_expected;
+ ++first_input;
+ }
+ EXPECT_EQ(last_input, first_input);
+ EXPECT_EQ(last_expected, first_expected);
+}
+
+template <class It1, class It2>
+void TestForwardIterator(It1 first_expected,
+ It1 last_expected,
+ It2 first_input,
+ It2 last_input) {
+ TestInputIterator(first_expected, last_expected, first_input, last_input);
+
+ while (first_expected != last_expected && first_input != last_input) {
+ EXPECT_EQ(*(first_expected++), *(first_input++));
+ }
+ EXPECT_EQ(last_input, first_input);
+ EXPECT_EQ(last_expected, first_expected);
+}
+
+template <class It1, class It2>
+void TestBidirectionalIterator(It1 first_expected,
+ It1 last_expected,
+ It2 first_input,
+ It2 last_input) {
+ TestForwardIterator(first_expected, last_expected, first_input, last_input);
+
+ while (first_expected != last_expected && first_input != last_input) {
+ EXPECT_EQ(*(--last_expected), *(--last_input));
+ }
+ EXPECT_EQ(last_input, first_input);
+ EXPECT_EQ(last_expected, first_expected);
+}
+
+template <class It1, class It2>
+void TestRandomAccessIterator(It1 first_expected,
+ It1 last_expected,
+ It2 first_input,
+ It2 last_input) {
+ TestBidirectionalIterator(first_expected, last_expected, first_input,
+ last_input);
+
+ using difference_type = typename std::iterator_traits<It1>::difference_type;
+
+ difference_type expected_size = last_expected - first_expected;
+ difference_type input_size = last_input - first_input;
+ EXPECT_EQ(expected_size, input_size);
+
+ for (difference_type i = 0; i < expected_size; ++i) {
+ EXPECT_EQ(*(first_expected + i), *(first_input + i));
+ EXPECT_EQ(first_expected[i], first_input[i]);
+
+ EXPECT_EQ(0 < i, first_input < first_input + i);
+ EXPECT_EQ(0 > i, first_input > first_input + i);
+ EXPECT_EQ(0 <= i, first_input <= first_input + i);
+ EXPECT_EQ(0 >= i, first_input >= first_input + i);
+
+ EXPECT_EQ(expected_size < i, last_input < first_input + i);
+ EXPECT_EQ(expected_size > i, last_input > first_input + i);
+ EXPECT_EQ(expected_size <= i, last_input <= first_input + i);
+ EXPECT_EQ(expected_size >= i, last_input >= first_input + i);
+
+ It2 input = first_input;
+ input += i;
+ EXPECT_EQ(*input, first_expected[i]);
+ input -= i;
+ EXPECT_EQ(first_input, input);
+ input += i;
+
+ EXPECT_EQ(0 < i, first_input < input);
+ EXPECT_EQ(0 > i, first_input > input);
+ EXPECT_EQ(0 <= i, first_input <= input);
+ EXPECT_EQ(0 >= i, first_input >= input);
+
+ EXPECT_EQ(expected_size < i, last_input < input);
+ EXPECT_EQ(expected_size > i, last_input > input);
+ EXPECT_EQ(expected_size <= i, last_input <= input);
+ EXPECT_EQ(expected_size >= i, last_input >= input);
+ }
+}
+
+} // namespace
+
+class EncodedViewTest : public testing::Test {
+ protected:
+ EncodedViewTest()
+ : buffer_(20),
+ image_index_(ConstBufferView(buffer_.data(), buffer_.size())) {
+ std::iota(buffer_.begin(), buffer_.end(), 0);
+ TestDisassembler disasm({2, TypeTag(0), PoolTag(0)},
+ {{1, 0}, {8, 1}, {10, 2}},
+ {4, TypeTag(1), PoolTag(0)}, {{3, 3}},
+ {3, TypeTag(2), PoolTag(1)}, {{12, 4}, {17, 5}});
+ image_index_.Initialize(&disasm);
+ }
+
+ void CheckView(std::vector<size_t> expected,
+ const EncodedView& encoded_view) const {
+ for (offset_t i = 0; i < encoded_view.size(); ++i) {
+ EXPECT_EQ(expected[i], encoded_view.Projection(i)) << i;
+ }
+ TestRandomAccessIterator(expected.begin(), expected.end(),
+ encoded_view.begin(), encoded_view.end());
+ }
+
+ std::vector<uint8_t> buffer_;
+ ImageIndex image_index_;
+};
+
+TEST_F(EncodedViewTest, Unlabeled) {
+ EncodedView encoded_view(image_index_);
+
+ encoded_view.SetLabels(PoolTag(0), {0, 0, 0, 0}, 1);
+ encoded_view.SetLabels(PoolTag(1), {0, 0}, 1);
+
+ std::vector<size_t> expected = {
+ 0, // raw
+ kBaseReferenceProjection + 0 + 0 * 3, // ref 0
+ PADDING,
+ kBaseReferenceProjection + 1 + 0 * 3, // ref 1
+ PADDING,
+ PADDING,
+ PADDING,
+ 7, // raw
+ kBaseReferenceProjection + 0 + 0 * 3, // ref 0
+ PADDING,
+ kBaseReferenceProjection + 0 + 0 * 3, // ref 0
+ PADDING,
+ kBaseReferenceProjection + 2 + 0 * 3, // ref 2
+ PADDING,
+ PADDING,
+ 15, // raw
+ 16,
+ kBaseReferenceProjection + 2 + 0 * 3, // ref 2
+ PADDING,
+ PADDING,
+ };
+ EXPECT_EQ(kBaseReferenceProjection + 3 * 1, encoded_view.Cardinality());
+ CheckView(expected, encoded_view);
+}
+
+TEST_F(EncodedViewTest, Labeled) {
+ EncodedView encoded_view(image_index_);
+
+ encoded_view.SetLabels(PoolTag(0), {0, 2, 1, 2}, 3);
+ encoded_view.SetLabels(PoolTag(1), {0, 0}, 1);
+
+ std::vector<size_t> expected = {
+ 0, // raw
+ kBaseReferenceProjection + 0 + 0 * 3, // ref 0
+ PADDING,
+ kBaseReferenceProjection + 1 + 2 * 3, // ref 1
+ PADDING,
+ PADDING,
+ PADDING,
+ 7, // raw
+ kBaseReferenceProjection + 0 + 2 * 3, // ref 0
+ PADDING,
+ kBaseReferenceProjection + 0 + 1 * 3, // ref 0
+ PADDING,
+ kBaseReferenceProjection + 2 + 0 * 3, // ref 2
+ PADDING,
+ PADDING,
+ 15, // raw
+ 16,
+ kBaseReferenceProjection + 2 + 0 * 3, // ref 2
+ PADDING,
+ PADDING,
+ };
+ EXPECT_EQ(kBaseReferenceProjection + 3 * 3, encoded_view.Cardinality());
+ CheckView(expected, encoded_view);
+}
+
+} // namespace zucchini
diff --git a/ensemble_matcher.cc b/ensemble_matcher.cc
new file mode 100644
index 0000000..eebbae9
--- /dev/null
+++ b/ensemble_matcher.cc
@@ -0,0 +1,24 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/ensemble_matcher.h"
+
+#include <limits>
+
+#include "base/logging.h"
+#include "base/strings/stringprintf.h"
+
+namespace zucchini {
+
+/******** EnsembleMatcher ********/
+
+EnsembleMatcher::EnsembleMatcher() = default;
+
+EnsembleMatcher::~EnsembleMatcher() = default;
+
+void EnsembleMatcher::Trim() {
+ // TODO(huangs): Add MultiDex handling logic when we add DEX support.
+}
+
+} // namespace zucchini
diff --git a/ensemble_matcher.h b/ensemble_matcher.h
new file mode 100644
index 0000000..bc89883
--- /dev/null
+++ b/ensemble_matcher.h
@@ -0,0 +1,62 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_ENSEMBLE_MATCHER_H_
+#define COMPONENTS_ZUCCHINI_ENSEMBLE_MATCHER_H_
+
+#include <stddef.h>
+
+#include <vector>
+
+#include "base/macros.h"
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/element_detection.h"
+#include "components/zucchini/image_utils.h"
+
+namespace zucchini {
+
+// A base class for ensemble matching strategies, which identify Elements in a
+// "new" and "old" archives, and match each "new" Element to an "old" Element.
+// Matched pairs can then be passed to Disassembler for architecture-specific
+// patching. Notes:
+// - A matched Element pair must have the same ExecutableType.
+// - Special case: Exact matches are ignored, since they can be patched directly
+// without architecture-specific patching.
+// - Multiple "new" Elements may match a common "old" Element.
+// - A "new" Element may have no match. This can happen when no viable match
+// exists, or when an exact match is skipped.
+class EnsembleMatcher {
+ public:
+ EnsembleMatcher();
+ virtual ~EnsembleMatcher();
+
+ // Interface to main matching feature. Returns whether match was successful.
+ // This should be called at most once per instace.
+ virtual bool RunMatch(ConstBufferView old_image,
+ ConstBufferView new_image) = 0;
+
+ // Accessors to RunMatch() results.
+ const std::vector<ElementMatch>& matches() const { return matches_; }
+
+ size_t num_identical() const { return num_identical_; }
+
+ protected:
+ // Post-processes |matches_| to remove potentially unfavorable entries.
+ void Trim();
+
+ // Storage of matched elements: A list of matched pairs, where the list of
+ // "new" elements have increasing offsets and don't overlap. May be empty.
+ std::vector<ElementMatch> matches_;
+
+ // Number of identical matches found in match candidates. These should be
+ // excluded from |matches_|.
+ size_t num_identical_ = 0;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(EnsembleMatcher);
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_ENSEMBLE_MATCHER_H_
diff --git a/equivalence_map.cc b/equivalence_map.cc
new file mode 100644
index 0000000..b3181ab
--- /dev/null
+++ b/equivalence_map.cc
@@ -0,0 +1,482 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/equivalence_map.h"
+
+#include <algorithm>
+#include <utility>
+
+#include "base/logging.h"
+#include "components/zucchini/encoded_view.h"
+#include "components/zucchini/patch_reader.h"
+#include "components/zucchini/suffix_array.h"
+
+namespace zucchini {
+
+/******** Utility Functions ********/
+
+double GetTokenSimilarity(
+ const ImageIndex& old_image_index,
+ const ImageIndex& new_image_index,
+ const std::vector<TargetsAffinity>& targets_affinities,
+ offset_t src,
+ offset_t dst) {
+ DCHECK(old_image_index.IsToken(src));
+ DCHECK(new_image_index.IsToken(dst));
+
+ TypeTag old_type = old_image_index.LookupType(src);
+ TypeTag new_type = new_image_index.LookupType(dst);
+ if (old_type != new_type)
+ return kMismatchFatal;
+
+ // Raw comparison.
+ if (!old_image_index.IsReference(src) && !new_image_index.IsReference(dst)) {
+ return old_image_index.GetRawValue(src) == new_image_index.GetRawValue(dst)
+ ? 1.0
+ : -1.5;
+ }
+
+ const ReferenceSet& old_ref_set = old_image_index.refs(old_type);
+ const ReferenceSet& new_ref_set = new_image_index.refs(new_type);
+ IndirectReference old_reference = old_ref_set.at(src);
+ IndirectReference new_reference = new_ref_set.at(dst);
+ PoolTag pool_tag = old_ref_set.pool_tag();
+
+ double affinity = targets_affinities[pool_tag.value()].AffinityBetween(
+ old_reference.target_key, new_reference.target_key);
+
+ // Both targets are not associated, which implies a weak match.
+ if (affinity == 0.0)
+ return 0.5 * old_ref_set.width();
+
+ // At least one target is associated, so values are compared.
+ return affinity > 0.0 ? old_ref_set.width() : -2.0;
+}
+
+double GetEquivalenceSimilarity(
+ const ImageIndex& old_image_index,
+ const ImageIndex& new_image_index,
+ const std::vector<TargetsAffinity>& targets_affinities,
+ const Equivalence& equivalence) {
+ double similarity = 0.0;
+ for (offset_t k = 0; k < equivalence.length; ++k) {
+ // Non-tokens are joined with the nearest previous token: skip until we
+ // cover the unit.
+ if (!new_image_index.IsToken(equivalence.dst_offset + k))
+ continue;
+
+ similarity += GetTokenSimilarity(
+ old_image_index, new_image_index, targets_affinities,
+ equivalence.src_offset + k, equivalence.dst_offset + k);
+ if (similarity == kMismatchFatal)
+ return kMismatchFatal;
+ }
+ return similarity;
+}
+
+EquivalenceCandidate ExtendEquivalenceForward(
+ const ImageIndex& old_image_index,
+ const ImageIndex& new_image_index,
+ const std::vector<TargetsAffinity>& targets_affinities,
+ const EquivalenceCandidate& candidate,
+ double min_similarity) {
+ Equivalence equivalence = candidate.eq;
+ offset_t best_k = equivalence.length;
+ double current_similarity = candidate.similarity;
+ double best_similarity = current_similarity;
+ double current_penalty = min_similarity;
+ for (offset_t k = best_k;
+ equivalence.src_offset + k < old_image_index.size() &&
+ equivalence.dst_offset + k < new_image_index.size();
+ ++k) {
+ // Mismatch in type, |candidate| cannot be extended further.
+ if (old_image_index.LookupType(equivalence.src_offset + k) !=
+ new_image_index.LookupType(equivalence.dst_offset + k)) {
+ break;
+ }
+
+ if (!new_image_index.IsToken(equivalence.dst_offset + k)) {
+ // Non-tokens are joined with the nearest previous token: skip until we
+ // cover the unit, and extend |best_k| if applicable.
+ if (best_k == k)
+ best_k = k + 1;
+ continue;
+ }
+
+ double similarity = GetTokenSimilarity(
+ old_image_index, new_image_index, targets_affinities,
+ equivalence.src_offset + k, equivalence.dst_offset + k);
+ current_similarity += similarity;
+ current_penalty = std::max(0.0, current_penalty) - similarity;
+
+ if (current_similarity < 0.0 || current_penalty >= min_similarity)
+ break;
+ if (current_similarity >= best_similarity) {
+ best_similarity = current_similarity;
+ best_k = k + 1;
+ }
+ }
+ equivalence.length = best_k;
+ return {equivalence, best_similarity};
+}
+
+EquivalenceCandidate ExtendEquivalenceBackward(
+ const ImageIndex& old_image_index,
+ const ImageIndex& new_image_index,
+ const std::vector<TargetsAffinity>& targets_affinities,
+ const EquivalenceCandidate& candidate,
+ double min_similarity) {
+ Equivalence equivalence = candidate.eq;
+ offset_t best_k = 0;
+ double current_similarity = candidate.similarity;
+ double best_similarity = current_similarity;
+ double current_penalty = 0.0;
+ for (offset_t k = 1;
+ k <= equivalence.dst_offset && k <= equivalence.src_offset; ++k) {
+ // Mismatch in type, |candidate| cannot be extended further.
+ if (old_image_index.LookupType(equivalence.src_offset - k) !=
+ new_image_index.LookupType(equivalence.dst_offset - k)) {
+ break;
+ }
+
+ // Non-tokens are joined with the nearest previous token: skip until we
+ // reach the next token.
+ if (!new_image_index.IsToken(equivalence.dst_offset - k))
+ continue;
+
+ DCHECK_EQ(old_image_index.LookupType(equivalence.src_offset - k),
+ new_image_index.LookupType(equivalence.dst_offset -
+ k)); // Sanity check.
+ double similarity = GetTokenSimilarity(
+ old_image_index, new_image_index, targets_affinities,
+ equivalence.src_offset - k, equivalence.dst_offset - k);
+
+ current_similarity += similarity;
+ current_penalty = std::max(0.0, current_penalty) - similarity;
+
+ if (current_similarity < 0.0 || current_penalty >= min_similarity)
+ break;
+ if (current_similarity >= best_similarity) {
+ best_similarity = current_similarity;
+ best_k = k;
+ }
+ }
+
+ equivalence.dst_offset -= best_k;
+ equivalence.src_offset -= best_k;
+ equivalence.length += best_k;
+ return {equivalence, best_similarity};
+}
+
+EquivalenceCandidate VisitEquivalenceSeed(
+ const ImageIndex& old_image_index,
+ const ImageIndex& new_image_index,
+ const std::vector<TargetsAffinity>& targets_affinities,
+ offset_t src,
+ offset_t dst,
+ double min_similarity) {
+ EquivalenceCandidate candidate{{src, dst, 0}, 0.0}; // Empty.
+ if (!old_image_index.IsToken(src))
+ return candidate;
+ candidate =
+ ExtendEquivalenceForward(old_image_index, new_image_index,
+ targets_affinities, candidate, min_similarity);
+ if (candidate.similarity < min_similarity)
+ return candidate; // Not worth exploring any more.
+ return ExtendEquivalenceBackward(old_image_index, new_image_index,
+ targets_affinities, candidate,
+ min_similarity);
+}
+
+/******** OffsetMapper ********/
+
+OffsetMapper::OffsetMapper(std::vector<Equivalence>&& equivalences)
+ : equivalences_(std::move(equivalences)) {
+ DCHECK(std::is_sorted(equivalences_.begin(), equivalences_.end(),
+ [](const Equivalence& a, const Equivalence& b) {
+ return a.src_offset < b.src_offset;
+ }));
+}
+
+OffsetMapper::OffsetMapper(EquivalenceSource&& equivalence_source) {
+ for (auto e = equivalence_source.GetNext(); e.has_value();
+ e = equivalence_source.GetNext()) {
+ equivalences_.push_back(*e);
+ }
+ PruneEquivalencesAndSortBySource(&equivalences_);
+}
+
+OffsetMapper::OffsetMapper(const EquivalenceMap& equivalence_map)
+ : equivalences_(equivalence_map.size()) {
+ std::transform(equivalence_map.begin(), equivalence_map.end(),
+ equivalences_.begin(),
+ [](const EquivalenceCandidate& c) { return c.eq; });
+ PruneEquivalencesAndSortBySource(&equivalences_);
+}
+
+OffsetMapper::~OffsetMapper() = default;
+
+offset_t OffsetMapper::ForwardProject(offset_t offset) const {
+ auto pos = std::upper_bound(
+ equivalences_.begin(), equivalences_.end(), offset,
+ [](offset_t a, const Equivalence& b) { return a < b.src_offset; });
+ if (pos != equivalences_.begin()) {
+ if (pos == equivalences_.end() || offset < pos[-1].src_end() ||
+ offset - pos[-1].src_end() < pos->src_offset - offset) {
+ --pos;
+ }
+ }
+ return offset - pos->src_offset + pos->dst_offset;
+}
+
+void OffsetMapper::ForwardProjectAll(std::vector<offset_t>* offsets) const {
+ DCHECK(std::is_sorted(offsets->begin(), offsets->end()));
+ auto current = equivalences_.begin();
+ for (auto& src : *offsets) {
+ while (current != end() && current->src_end() <= src) {
+ ++current;
+ }
+
+ if (current != end() && current->src_offset <= src) {
+ src = src - current->src_offset + current->dst_offset;
+ } else {
+ src = kInvalidOffset;
+ }
+ }
+ offsets->erase(std::remove(offsets->begin(), offsets->end(), kInvalidOffset),
+ offsets->end());
+ offsets->shrink_to_fit();
+}
+
+void OffsetMapper::PruneEquivalencesAndSortBySource(
+ std::vector<Equivalence>* equivalences) {
+ std::sort(equivalences->begin(), equivalences->end(),
+ [](const Equivalence& a, const Equivalence& b) {
+ return a.src_offset < b.src_offset;
+ });
+
+ for (auto current = equivalences->begin(); current != equivalences->end();
+ ++current) {
+ // A "reaper" is an equivalence after |current| that overlaps with it, but
+ // is longer, and so truncates |current|. For example:
+ // ****** <= |current|
+ // **
+ // ****
+ // ****
+ // ********** <= |next| as reaper.
+ // If a reaper is found (as |next|), every equivalence strictly between
+ // |current| and |next| would be truncated to 0 and discarded. Handling this
+ // case is important to avoid O(n^2) behavior.
+ bool next_is_reaper = false;
+
+ // Look ahead to resolve overlaps, until a better candidate is found.
+ auto next = current + 1;
+ for (; next != equivalences->end(); ++next) {
+ DCHECK_GE(next->src_offset, current->src_offset);
+ if (next->src_offset >= current->src_end())
+ break; // No more overlap.
+
+ if (current->length < next->length) {
+ // |next| is better: So it is a reaper that shrinks |current|.
+ offset_t delta = current->src_end() - next->src_offset;
+ current->length -= delta;
+ next_is_reaper = true;
+ break;
+ }
+ }
+
+ if (next_is_reaper) {
+ // Discard all equivalences strictly between |cur| and |next|.
+ for (auto reduced = current + 1; reduced != next; ++reduced)
+ reduced->length = 0;
+ current = next - 1;
+ } else {
+ // Shrink all equivalences that overlap with |current|. These are all
+ // worse than |current| since no reaper is found.
+ for (auto reduced = current + 1; reduced != next; ++reduced) {
+ offset_t delta = current->src_end() - reduced->src_offset;
+ reduced->length -= std::min(reduced->length, delta);
+ reduced->src_offset += delta;
+ reduced->dst_offset += delta;
+ DCHECK_EQ(reduced->src_offset, current->src_end());
+ }
+ }
+ }
+
+ // Discard all equivalences with length == 0.
+ equivalences->erase(std::remove_if(equivalences->begin(), equivalences->end(),
+ [](const Equivalence& equivalence) {
+ return equivalence.length == 0;
+ }),
+ equivalences->end());
+}
+
+/******** EquivalenceMap ********/
+
+EquivalenceMap::EquivalenceMap() = default;
+
+EquivalenceMap::EquivalenceMap(std::vector<EquivalenceCandidate>&& equivalences)
+ : candidates_(std::move(equivalences)) {
+ SortByDestination();
+}
+
+EquivalenceMap::EquivalenceMap(EquivalenceMap&&) = default;
+
+EquivalenceMap::~EquivalenceMap() = default;
+
+void EquivalenceMap::Build(
+ const std::vector<offset_t>& old_sa,
+ const EncodedView& old_view,
+ const EncodedView& new_view,
+ const std::vector<TargetsAffinity>& targets_affinities,
+ double min_similarity) {
+ DCHECK_EQ(old_sa.size(), old_view.size());
+
+ CreateCandidates(old_sa, old_view, new_view, targets_affinities,
+ min_similarity);
+ SortByDestination();
+ Prune(old_view, new_view, targets_affinities, min_similarity);
+
+ offset_t coverage = 0;
+ offset_t current_offset = 0;
+ for (auto candidate : candidates_) {
+ DCHECK_GE(candidate.eq.dst_offset, current_offset);
+ coverage += candidate.eq.length;
+ current_offset = candidate.eq.dst_end();
+ }
+ LOG(INFO) << "Equivalence Count: " << size();
+ LOG(INFO) << "Coverage / Extra / Total: " << coverage << " / "
+ << new_view.size() - coverage << " / " << new_view.size();
+}
+
+void EquivalenceMap::CreateCandidates(
+ const std::vector<offset_t>& old_sa,
+ const EncodedView& old_view,
+ const EncodedView& new_view,
+ const std::vector<TargetsAffinity>& targets_affinities,
+ double min_similarity) {
+ candidates_.clear();
+
+ // This is an heuristic to find 'good' equivalences on encoded views.
+ // Equivalences are found in ascending order of |new_image|.
+ offset_t dst_offset = 0;
+
+ while (dst_offset < new_view.size()) {
+ if (!new_view.IsToken(dst_offset)) {
+ ++dst_offset;
+ continue;
+ }
+ auto match =
+ SuffixLowerBound(old_sa, old_view.begin(),
+ new_view.begin() + dst_offset, new_view.end());
+
+ offset_t next_dst_offset = dst_offset + 1;
+ // TODO(huangs): Clean up.
+ double best_similarity = min_similarity;
+ EquivalenceCandidate best_candidate = {{0, 0, 0}, 0.0};
+ for (auto it = match; it != old_sa.end(); ++it) {
+ EquivalenceCandidate candidate = VisitEquivalenceSeed(
+ old_view.image_index(), new_view.image_index(), targets_affinities,
+ static_cast<offset_t>(*it), dst_offset, min_similarity);
+ if (candidate.similarity > best_similarity) {
+ best_candidate = candidate;
+ best_similarity = candidate.similarity;
+ next_dst_offset = candidate.eq.dst_end();
+ } else {
+ break;
+ }
+ }
+ for (auto it = match; it != old_sa.begin(); --it) {
+ EquivalenceCandidate candidate = VisitEquivalenceSeed(
+ old_view.image_index(), new_view.image_index(), targets_affinities,
+ static_cast<offset_t>(it[-1]), dst_offset, min_similarity);
+ if (candidate.similarity > best_similarity) {
+ best_candidate = candidate;
+ best_similarity = candidate.similarity;
+ next_dst_offset = candidate.eq.dst_end();
+ } else {
+ break;
+ }
+ }
+ if (best_candidate.similarity >= min_similarity) {
+ candidates_.push_back(best_candidate);
+ }
+
+ dst_offset = next_dst_offset;
+ }
+}
+
+void EquivalenceMap::SortByDestination() {
+ std::sort(candidates_.begin(), candidates_.end(),
+ [](const EquivalenceCandidate& a, const EquivalenceCandidate& b) {
+ return a.eq.dst_offset < b.eq.dst_offset;
+ });
+}
+
+void EquivalenceMap::Prune(
+ const EncodedView& old_view,
+ const EncodedView& new_view,
+ const std::vector<TargetsAffinity>& target_affinities,
+ double min_similarity) {
+ // TODO(etiennep): unify with
+ // OffsetMapper::PruneEquivalencesAndSortBySource().
+ for (auto current = candidates_.begin(); current != candidates_.end();
+ ++current) {
+ if (current->similarity < min_similarity)
+ continue; // This candidate will be discarded anyways.
+
+ bool next_is_reaper = false;
+
+ // Look ahead to resolve overlaps, until a better candidate is found.
+ auto next = current + 1;
+ for (; next != candidates_.end(); ++next) {
+ DCHECK_GE(next->eq.dst_offset, current->eq.dst_offset);
+ if (next->eq.dst_offset >= current->eq.dst_offset + current->eq.length)
+ break; // No more overlap.
+
+ if (current->similarity < next->similarity) {
+ // |next| is better: So it is a reaper that shrinks |current|.
+ offset_t delta = current->eq.dst_end() - next->eq.dst_offset;
+ current->eq.length -= delta;
+ current->similarity = GetEquivalenceSimilarity(
+ old_view.image_index(), new_view.image_index(), target_affinities,
+ current->eq);
+
+ next_is_reaper = true;
+ break;
+ }
+ }
+
+ if (next_is_reaper) {
+ // Discard all equivalences strictly between |cur| and |next|.
+ for (auto reduced = current + 1; reduced != next; ++reduced) {
+ reduced->eq.length = 0;
+ reduced->similarity = 0;
+ }
+ current = next - 1;
+ } else {
+ // Shrinks all overlapping candidates following and worse than |current|.
+ for (auto reduced = current + 1; reduced != next; ++reduced) {
+ offset_t delta = current->eq.dst_end() - reduced->eq.dst_offset;
+ reduced->eq.length -= std::min(reduced->eq.length, delta);
+ reduced->eq.src_offset += delta;
+ reduced->eq.dst_offset += delta;
+ reduced->similarity = GetEquivalenceSimilarity(
+ old_view.image_index(), new_view.image_index(), target_affinities,
+ reduced->eq);
+ DCHECK_EQ(reduced->eq.dst_offset, current->eq.dst_end());
+ }
+ }
+ }
+
+ // Discard all candidates with similarity smaller than |min_similarity|.
+ candidates_.erase(
+ std::remove_if(candidates_.begin(), candidates_.end(),
+ [min_similarity](const EquivalenceCandidate& candidate) {
+ return candidate.similarity < min_similarity;
+ }),
+ candidates_.end());
+}
+
+} // namespace zucchini
diff --git a/equivalence_map.h b/equivalence_map.h
new file mode 100644
index 0000000..91b215c
--- /dev/null
+++ b/equivalence_map.h
@@ -0,0 +1,183 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_EQUIVALENCE_MAP_H_
+#define COMPONENTS_ZUCCHINI_EQUIVALENCE_MAP_H_
+
+#include <stddef.h>
+
+#include <limits>
+#include <vector>
+
+#include "components/zucchini/image_index.h"
+#include "components/zucchini/image_utils.h"
+#include "components/zucchini/targets_affinity.h"
+
+namespace zucchini {
+
+constexpr double kMismatchFatal = -std::numeric_limits<double>::infinity();
+
+class EncodedView;
+class EquivalenceSource;
+
+// Returns similarity score between a token (raw byte or first byte of a
+// reference) in |old_image_index| at |src| and a token in |new_image_index|
+// at |dst|. |targets_affinities| describes affinities for each target pool and
+// is used to evaluate similarity between references, hence it's size must be
+// equal to the number of pools in both |old_image_index| and |new_image_index|.
+// Both |src| and |dst| must refer to tokens in |old_image_index| and
+// |new_image_index|.
+double GetTokenSimilarity(
+ const ImageIndex& old_image_index,
+ const ImageIndex& new_image_index,
+ const std::vector<TargetsAffinity>& targets_affinities,
+ offset_t src,
+ offset_t dst);
+
+// Returns a similarity score between content in |old_image_index| and
+// |new_image_index| at regions described by |equivalence|, using
+// |targets_affinities| to evaluate similarity between references.
+double GetEquivalenceSimilarity(
+ const ImageIndex& old_image_index,
+ const ImageIndex& new_image_index,
+ const std::vector<TargetsAffinity>& targets_affinities,
+ const Equivalence& equivalence);
+
+// Extends |equivalence| forward and returns the result. This is related to
+// VisitEquivalenceSeed().
+EquivalenceCandidate ExtendEquivalenceForward(
+ const ImageIndex& old_image_index,
+ const ImageIndex& new_image_index,
+ const std::vector<TargetsAffinity>& targets_affinities,
+ const EquivalenceCandidate& equivalence,
+ double min_similarity);
+
+// Extends |equivalence| backward and returns the result. This is related to
+// VisitEquivalenceSeed().
+EquivalenceCandidate ExtendEquivalenceBackward(
+ const ImageIndex& old_image_index,
+ const ImageIndex& new_image_index,
+ const std::vector<TargetsAffinity>& targets_affinities,
+ const EquivalenceCandidate& equivalence,
+ double min_similarity);
+
+// Creates an equivalence, starting with |src| and |dst| as offset hint, and
+// extends it both forward and backward, trying to maximise similarity between
+// |old_image_index| and |new_image_index|, and returns the result.
+// |targets_affinities| is used to evaluate similarity between references.
+// |min_similarity| describes the minimum acceptable similarity score and is
+// used as threshold to discard bad equivalences.
+EquivalenceCandidate VisitEquivalenceSeed(
+ const ImageIndex& old_image_index,
+ const ImageIndex& new_image_index,
+ const std::vector<TargetsAffinity>& targets_affinities,
+ offset_t src,
+ offset_t dst,
+ double min_similarity);
+
+// Container of pruned equivalences used to map offsets from |old_image| to
+// offsets in |new_image|. Equivalences are pruned by cropping smaller
+// equivalences to avoid overlaps, to make the equivalence map (for covered
+// bytes in |old_image| and |new_image|) one-to-one.
+class OffsetMapper {
+ public:
+ using const_iterator = std::vector<Equivalence>::const_iterator;
+
+ // Constructors for various data sources.
+ // - From a list of |equivalences|, already sorted (by |src_offset|) and
+ // pruned, useful for tests.
+ explicit OffsetMapper(std::vector<Equivalence>&& equivalences);
+ // - From a generator, useful for Zucchini-apply.
+ explicit OffsetMapper(EquivalenceSource&& equivalence_source);
+ // - From an EquivalenceMap that needs to be processed, useful for
+ // Zucchini-gen.
+ explicit OffsetMapper(const EquivalenceMap& equivalence_map);
+ ~OffsetMapper();
+
+ size_t size() const { return equivalences_.size(); }
+ const_iterator begin() const { return equivalences_.begin(); }
+ const_iterator end() const { return equivalences_.end(); }
+
+ // Returns an offset in |new_image| corresponding to |offset| in |old_image|.
+ // If |offset| is not part of an equivalence, the equivalence nearest to
+ // |offset| is used as if it contained |offset|. This assumes |equivalences_|
+ // is not empty.
+ offset_t ForwardProject(offset_t offset) const;
+
+ // Given sorted |offsets|, applies a projection in-place of all offsets that
+ // are part of a pruned equivalence from |old_image| to |new_image|. Other
+ // offsets are removed from |offsets|.
+ void ForwardProjectAll(std::vector<offset_t>* offsets) const;
+
+ // Accessor for testing.
+ const std::vector<Equivalence> equivalences() const { return equivalences_; }
+
+ // Sorts |equivalences| by |src_offset| and removes all source overlaps; so a
+ // source location that was covered by some Equivalence would become covered
+ // by exactly one Equivalence. Moreover, for the offset, the equivalence
+ // corresponds to the largest (pre-pruning) covering Equivalence, and in case
+ // of a tie, the Equivalence with minimal |src_offset|. |equivalences| may
+ // change in size since empty Equivalences are removed.
+ static void PruneEquivalencesAndSortBySource(
+ std::vector<Equivalence>* equivalences);
+
+ private:
+ std::vector<Equivalence> equivalences_;
+};
+
+// Container of equivalences between |old_image_index| and |new_image_index|,
+// sorted by |Equivalence::dst_offset|, only used during patch generation.
+class EquivalenceMap {
+ public:
+ using const_iterator = std::vector<EquivalenceCandidate>::const_iterator;
+
+ EquivalenceMap();
+ // Initializes the object with |equivalences|.
+ explicit EquivalenceMap(std::vector<EquivalenceCandidate>&& candidates);
+ EquivalenceMap(EquivalenceMap&&);
+ EquivalenceMap(const EquivalenceMap&) = delete;
+ ~EquivalenceMap();
+
+ // Finds relevant equivalences between |old_view| and |new_view|, using
+ // suffix array |old_sa| computed from |old_view| and using
+ // |targets_affinities| to evaluate similarity between references. This
+ // function is not symmetric. Equivalences might overlap in |old_view|, but
+ // not in |new_view|. It tries to maximize accumulated similarity within each
+ // equivalence, while maximizing |new_view| coverage. The minimum similarity
+ // of an equivalence is given by |min_similarity|.
+ void Build(const std::vector<offset_t>& old_sa,
+ const EncodedView& old_view,
+ const EncodedView& new_view,
+ const std::vector<TargetsAffinity>& targets_affinities,
+ double min_similarity);
+
+ size_t size() const { return candidates_.size(); }
+ const_iterator begin() const { return candidates_.begin(); }
+ const_iterator end() const { return candidates_.end(); }
+
+ private:
+ // Discovers equivalence candidates between |old_view| and |new_view| and
+ // stores them in the object. Note that resulting candidates are not sorted
+ // and might be overlapping in new image.
+ void CreateCandidates(const std::vector<offset_t>& old_sa,
+ const EncodedView& old_view,
+ const EncodedView& new_view,
+ const std::vector<TargetsAffinity>& targets_affinities,
+ double min_similarity);
+ // Sorts candidates by their offset in new image.
+ void SortByDestination();
+ // Visits |candidates_| (sorted by |dst_offset|) and remove all destination
+ // overlaps. Candidates with low similarity scores are more likely to be
+ // shrunken. Unfit candidates may be removed.
+ void Prune(const EncodedView& old_view,
+ const EncodedView& new_view,
+ const std::vector<TargetsAffinity>& targets_affinities,
+ double min_similarity);
+
+ std::vector<EquivalenceCandidate> candidates_;
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_EQUIVALENCE_MAP_H_
diff --git a/equivalence_map_unittest.cc b/equivalence_map_unittest.cc
new file mode 100644
index 0000000..ce8ffe1
--- /dev/null
+++ b/equivalence_map_unittest.cc
@@ -0,0 +1,446 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/equivalence_map.h"
+
+#include <cstring>
+#include <utility>
+#include <vector>
+
+#include "components/zucchini/encoded_view.h"
+#include "components/zucchini/image_index.h"
+#include "components/zucchini/suffix_array.h"
+#include "components/zucchini/targets_affinity.h"
+#include "components/zucchini/test_disassembler.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+namespace {
+
+using OffsetVector = std::vector<offset_t>;
+
+// Make all references 2 bytes long.
+constexpr offset_t kReferenceSize = 2;
+
+// Creates and initialize an ImageIndex from |a| and with 2 types of references.
+// The result is populated with |refs0| and |refs1|. |a| is expected to be a
+// string literal valid for the lifetime of the object.
+ImageIndex MakeImageIndexForTesting(const char* a,
+ std::vector<Reference>&& refs0,
+ std::vector<Reference>&& refs1) {
+ TestDisassembler disasm(
+ {kReferenceSize, TypeTag(0), PoolTag(0)}, std::move(refs0),
+ {kReferenceSize, TypeTag(1), PoolTag(0)}, std::move(refs1),
+ {kReferenceSize, TypeTag(2), PoolTag(1)}, {});
+
+ ImageIndex image_index(
+ ConstBufferView(reinterpret_cast<const uint8_t*>(a), std::strlen(a)));
+
+ EXPECT_TRUE(image_index.Initialize(&disasm));
+ return image_index;
+}
+
+std::vector<TargetsAffinity> MakeTargetsAffinitiesForTesting(
+ const ImageIndex& old_image_index,
+ const ImageIndex& new_image_index,
+ const EquivalenceMap& equivalence_map) {
+ std::vector<TargetsAffinity> target_affinities(old_image_index.PoolCount());
+ for (const auto& old_pool_tag_and_targets : old_image_index.target_pools()) {
+ PoolTag pool_tag = old_pool_tag_and_targets.first;
+ target_affinities[pool_tag.value()].InferFromSimilarities(
+ equivalence_map, old_pool_tag_and_targets.second.targets(),
+ new_image_index.pool(pool_tag).targets());
+ }
+ return target_affinities;
+}
+
+} // namespace
+
+TEST(EquivalenceMapTest, GetTokenSimilarity) {
+ ImageIndex old_index = MakeImageIndexForTesting(
+ "ab1122334455", {{2, 0}, {4, 1}, {6, 2}, {8, 2}}, {{10, 3}});
+ // Note: {4, 1} -> {6, 3} and {6, 2} -> {4, 1}, then result is sorted.
+ ImageIndex new_index = MakeImageIndexForTesting(
+ "a11b33224455", {{1, 0}, {4, 1}, {6, 3}, {8, 1}}, {{10, 2}});
+ std::vector<TargetsAffinity> affinities = MakeTargetsAffinitiesForTesting(
+ old_index, new_index,
+ EquivalenceMap({{{0, 0, 1}, 1.0}, {{1, 3, 1}, 1.0}}));
+
+ // Raw match.
+ EXPECT_LT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 0, 0));
+ // Raw mismatch.
+ EXPECT_GT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 0, 1));
+ EXPECT_GT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 1, 0));
+
+ // Type mismatch.
+ EXPECT_EQ(kMismatchFatal,
+ GetTokenSimilarity(old_index, new_index, affinities, 0, 1));
+ EXPECT_EQ(kMismatchFatal,
+ GetTokenSimilarity(old_index, new_index, affinities, 2, 0));
+ EXPECT_EQ(kMismatchFatal,
+ GetTokenSimilarity(old_index, new_index, affinities, 2, 10));
+ EXPECT_EQ(kMismatchFatal,
+ GetTokenSimilarity(old_index, new_index, affinities, 10, 1));
+
+ // Reference strong match.
+ EXPECT_LT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 2, 1));
+ EXPECT_LT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 4, 6));
+
+ // Reference weak match.
+ EXPECT_LT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 6, 4));
+ EXPECT_LT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 6, 8));
+ EXPECT_LT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 8, 4));
+
+ // Weak match is not greater than strong match.
+ EXPECT_LE(GetTokenSimilarity(old_index, new_index, affinities, 6, 4),
+ GetTokenSimilarity(old_index, new_index, affinities, 2, 1));
+
+ // Reference mismatch.
+ EXPECT_GT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 2, 4));
+ EXPECT_GT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 2, 6));
+}
+
+TEST(EquivalenceMapTest, GetEquivalenceSimilarity) {
+ ImageIndex image_index =
+ MakeImageIndexForTesting("abcdef1122", {{6, 0}}, {{8, 1}});
+ std::vector<TargetsAffinity> affinities =
+ MakeTargetsAffinitiesForTesting(image_index, image_index, {});
+
+ // Sanity check. These are no-op with length-0 equivalences.
+ EXPECT_EQ(0.0, GetEquivalenceSimilarity(image_index, image_index, affinities,
+ {0, 0, 0}));
+ EXPECT_EQ(0.0, GetEquivalenceSimilarity(image_index, image_index, affinities,
+ {0, 3, 0}));
+ EXPECT_EQ(0.0, GetEquivalenceSimilarity(image_index, image_index, affinities,
+ {3, 0, 0}));
+
+ // Now examine larger equivalences.
+ EXPECT_LT(0.0, GetEquivalenceSimilarity(image_index, image_index, affinities,
+ {0, 0, 3}));
+ EXPECT_GE(0.0, GetEquivalenceSimilarity(image_index, image_index, affinities,
+ {0, 3, 3}));
+ EXPECT_GE(0.0, GetEquivalenceSimilarity(image_index, image_index, affinities,
+ {3, 0, 3}));
+
+ EXPECT_LT(0.0, GetEquivalenceSimilarity(image_index, image_index, affinities,
+ {6, 6, 4}));
+}
+
+TEST(EquivalenceMapTest, ExtendEquivalenceForward) {
+ auto test_extend_forward =
+ [](const ImageIndex old_index, const ImageIndex new_index,
+ const EquivalenceCandidate& equivalence, double base_similarity) {
+ return ExtendEquivalenceForward(
+ old_index, new_index,
+ MakeTargetsAffinitiesForTesting(old_index, new_index, {}),
+ equivalence, base_similarity)
+ .eq;
+ };
+
+ EXPECT_EQ(Equivalence({0, 0, 0}),
+ test_extend_forward(MakeImageIndexForTesting("", {}, {}),
+ MakeImageIndexForTesting("", {}, {}),
+ {{0, 0, 0}, 0.0}, 8.0));
+
+ EXPECT_EQ(Equivalence({0, 0, 0}),
+ test_extend_forward(MakeImageIndexForTesting("banana", {}, {}),
+ MakeImageIndexForTesting("zzzz", {}, {}),
+ {{0, 0, 0}, 0.0}, 8.0));
+
+ EXPECT_EQ(Equivalence({0, 0, 6}),
+ test_extend_forward(MakeImageIndexForTesting("banana", {}, {}),
+ MakeImageIndexForTesting("banana", {}, {}),
+ {{0, 0, 0}, 0.0}, 8.0));
+
+ EXPECT_EQ(Equivalence({2, 2, 4}),
+ test_extend_forward(MakeImageIndexForTesting("banana", {}, {}),
+ MakeImageIndexForTesting("banana", {}, {}),
+ {{2, 2, 0}, 0.0}, 8.0));
+
+ EXPECT_EQ(Equivalence({0, 0, 6}),
+ test_extend_forward(MakeImageIndexForTesting("bananaxx", {}, {}),
+ MakeImageIndexForTesting("bananayy", {}, {}),
+ {{0, 0, 0}, 0.0}, 8.0));
+
+ EXPECT_EQ(
+ Equivalence({0, 0, 8}),
+ test_extend_forward(MakeImageIndexForTesting("banana11", {{6, 0}}, {}),
+ MakeImageIndexForTesting("banana11", {{6, 0}}, {}),
+ {{0, 0, 0}, 0.0}, 8.0));
+
+ EXPECT_EQ(
+ Equivalence({0, 0, 6}),
+ test_extend_forward(MakeImageIndexForTesting("banana11", {{6, 0}}, {}),
+ MakeImageIndexForTesting("banana22", {}, {{6, 0}}),
+ {{0, 0, 0}, 0.0}, 8.0));
+
+ EXPECT_EQ(
+ Equivalence({0, 0, 17}),
+ test_extend_forward(MakeImageIndexForTesting("bananaxxpineapple", {}, {}),
+ MakeImageIndexForTesting("bananayypineapple", {}, {}),
+ {{0, 0, 0}, 0.0}, 8.0));
+
+ EXPECT_EQ(
+ Equivalence({3, 0, 19}),
+ test_extend_forward(
+ MakeImageIndexForTesting("foobanana11xxpineapplexx", {{9, 0}}, {}),
+ MakeImageIndexForTesting("banana11yypineappleyy", {{6, 0}}, {}),
+ {{3, 0, 0}, 0.0}, 8.0));
+}
+
+TEST(EquivalenceMapTest, ExtendEquivalenceBackward) {
+ auto test_extend_backward =
+ [](const ImageIndex old_index, const ImageIndex new_index,
+ const EquivalenceCandidate& equivalence, double base_similarity) {
+ return ExtendEquivalenceBackward(
+ old_index, new_index,
+ MakeTargetsAffinitiesForTesting(old_index, new_index, {}),
+ equivalence, base_similarity)
+ .eq;
+ };
+
+ EXPECT_EQ(Equivalence({0, 0, 0}),
+ test_extend_backward(MakeImageIndexForTesting("", {}, {}),
+ MakeImageIndexForTesting("", {}, {}),
+ {{0, 0, 0}, 0.0}, 8.0));
+
+ EXPECT_EQ(Equivalence({6, 4, 0}),
+ test_extend_backward(MakeImageIndexForTesting("banana", {}, {}),
+ MakeImageIndexForTesting("zzzz", {}, {}),
+ {{6, 4, 0}, 0.0}, 8.0));
+
+ EXPECT_EQ(Equivalence({0, 0, 6}),
+ test_extend_backward(MakeImageIndexForTesting("banana", {}, {}),
+ MakeImageIndexForTesting("banana", {}, {}),
+ {{6, 6, 0}, 0.0}, 8.0));
+
+ EXPECT_EQ(Equivalence({2, 2, 6}),
+ test_extend_backward(MakeImageIndexForTesting("xxbanana", {}, {}),
+ MakeImageIndexForTesting("yybanana", {}, {}),
+ {{8, 8, 0}, 0.0}, 8.0));
+
+ EXPECT_EQ(
+ Equivalence({0, 0, 8}),
+ test_extend_backward(MakeImageIndexForTesting("11banana", {{0, 0}}, {}),
+ MakeImageIndexForTesting("11banana", {{0, 0}}, {}),
+ {{8, 8, 0}, 0.0}, 8.0));
+
+ EXPECT_EQ(
+ Equivalence({2, 2, 6}),
+ test_extend_backward(MakeImageIndexForTesting("11banana", {{0, 0}}, {}),
+ MakeImageIndexForTesting("22banana", {}, {{0, 0}}),
+ {{8, 8, 0}, 0.0}, 8.0));
+
+ EXPECT_EQ(Equivalence({0, 0, 17}),
+ test_extend_backward(
+ MakeImageIndexForTesting("bananaxxpineapple", {}, {}),
+ MakeImageIndexForTesting("bananayypineapple", {}, {}),
+ {{8, 8, 9}, 9.0}, 8.0));
+
+ EXPECT_EQ(
+ Equivalence({3, 0, 19}),
+ test_extend_backward(
+ MakeImageIndexForTesting("foobanana11xxpineapplexx", {{9, 0}}, {}),
+ MakeImageIndexForTesting("banana11yypineappleyy", {{6, 0}}, {}),
+ {{22, 19, 0}, 0.0}, 8.0));
+}
+
+TEST(EquivalenceMapTest, PruneEquivalencesAndSortBySource) {
+ auto PruneEquivalencesAndSortBySourceTest =
+ [](std::vector<Equivalence>&& equivalences) {
+ OffsetMapper::PruneEquivalencesAndSortBySource(&equivalences);
+ return equivalences;
+ };
+
+ EXPECT_EQ(std::vector<Equivalence>(),
+ PruneEquivalencesAndSortBySourceTest({}));
+ EXPECT_EQ(std::vector<Equivalence>({{0, 10, 1}}),
+ PruneEquivalencesAndSortBySourceTest({{0, 10, 1}}));
+ EXPECT_EQ(std::vector<Equivalence>(),
+ PruneEquivalencesAndSortBySourceTest({{0, 10, 0}}));
+ EXPECT_EQ(std::vector<Equivalence>({{0, 10, 1}, {1, 11, 1}}),
+ PruneEquivalencesAndSortBySourceTest({{0, 10, 1}, {1, 11, 1}}));
+ EXPECT_EQ(std::vector<Equivalence>({{0, 10, 2}, {2, 13, 1}}),
+ PruneEquivalencesAndSortBySourceTest({{0, 10, 2}, {1, 12, 2}}));
+ EXPECT_EQ(std::vector<Equivalence>({{0, 10, 2}}),
+ PruneEquivalencesAndSortBySourceTest({{0, 10, 2}, {1, 12, 1}}));
+ EXPECT_EQ(std::vector<Equivalence>({{0, 10, 2}, {2, 14, 1}}),
+ PruneEquivalencesAndSortBySourceTest({{0, 10, 2}, {1, 13, 2}}));
+ EXPECT_EQ(std::vector<Equivalence>({{0, 10, 1}, {1, 12, 3}}),
+ PruneEquivalencesAndSortBySourceTest({{0, 10, 2}, {1, 12, 3}}));
+ EXPECT_EQ(std::vector<Equivalence>({{0, 10, 3}, {3, 16, 2}}),
+ PruneEquivalencesAndSortBySourceTest(
+ {{0, 10, 3}, {1, 13, 3}, {3, 16, 2}})); // Pruning is greedy
+
+ // Consider following pattern that may cause O(n^2) behavior if not handled
+ // properly.
+ // ***************
+ // **********
+ // ********
+ // ******
+ // ****
+ // **
+ // ***************
+ // This test case makes sure the function does not stall on a large instance
+ // of this pattern.
+ EXPECT_EQ(std::vector<Equivalence>({{0, 10, +300000}, {300000, 30, +300000}}),
+ PruneEquivalencesAndSortBySourceTest([] {
+ std::vector<Equivalence> equivalenses;
+ equivalenses.push_back({0, 10, +300000});
+ for (offset_t i = 0; i < 100000; ++i)
+ equivalenses.push_back({200000 + i, 20, +200000 - 2 * i});
+ equivalenses.push_back({300000, 30, +300000});
+ return equivalenses;
+ }()));
+}
+
+TEST(EquivalenceMapTest, ForwardProject) {
+ auto ForwardProjectAllTest = [](const OffsetMapper& offset_mapper,
+ std::initializer_list<offset_t> offsets) {
+ OffsetVector offsets_vec(offsets);
+ offset_mapper.ForwardProjectAll(&offsets_vec);
+ return offsets_vec;
+ };
+
+ OffsetMapper offset_mapper1({{0, 10, 2}, {2, 13, 1}, {4, 16, 2}});
+ EXPECT_EQ(OffsetVector({10}), ForwardProjectAllTest(offset_mapper1, {0}));
+ EXPECT_EQ(OffsetVector({13}), ForwardProjectAllTest(offset_mapper1, {2}));
+ EXPECT_EQ(OffsetVector({}), ForwardProjectAllTest(offset_mapper1, {3}));
+ EXPECT_EQ(OffsetVector({10, 13}),
+ ForwardProjectAllTest(offset_mapper1, {0, 2}));
+ EXPECT_EQ(OffsetVector({11, 13, 17}),
+ ForwardProjectAllTest(offset_mapper1, {1, 2, 5}));
+ EXPECT_EQ(OffsetVector({11, 17}),
+ ForwardProjectAllTest(offset_mapper1, {1, 3, 5}));
+ EXPECT_EQ(OffsetVector({10, 11, 13, 16, 17}),
+ ForwardProjectAllTest(offset_mapper1, {0, 1, 2, 3, 4, 5, 6}));
+
+ OffsetMapper offset_mapper2({{0, 10, 2}, {13, 2, 1}, {16, 4, 2}});
+ EXPECT_EQ(OffsetVector({2}), ForwardProjectAllTest(offset_mapper2, {13}));
+ EXPECT_EQ(OffsetVector({10, 2}),
+ ForwardProjectAllTest(offset_mapper2, {0, 13}));
+ EXPECT_EQ(OffsetVector({11, 2, 5}),
+ ForwardProjectAllTest(offset_mapper2, {1, 13, 17}));
+ EXPECT_EQ(OffsetVector({11, 5}),
+ ForwardProjectAllTest(offset_mapper2, {1, 14, 17}));
+ EXPECT_EQ(OffsetVector({10, 11, 2, 4, 5}),
+ ForwardProjectAllTest(offset_mapper2, {0, 1, 13, 14, 16, 17, 18}));
+}
+
+TEST(EquivalenceMapTest, ProjectOffset) {
+ OffsetMapper offset_mapper1({{0, 10, 2}, {2, 13, 1}, {4, 16, 2}});
+ EXPECT_EQ(10U, offset_mapper1.ForwardProject(0));
+ EXPECT_EQ(11U, offset_mapper1.ForwardProject(1));
+ EXPECT_EQ(13U, offset_mapper1.ForwardProject(2));
+ EXPECT_EQ(14U, offset_mapper1.ForwardProject(3)); // Previous equivalence.
+ EXPECT_EQ(16U, offset_mapper1.ForwardProject(4));
+ EXPECT_EQ(17U, offset_mapper1.ForwardProject(5));
+ EXPECT_EQ(18U, offset_mapper1.ForwardProject(6)); // Previous equivalence.
+
+ OffsetMapper offset_mapper2({{0, 10, 2}, {13, 2, 1}, {16, 4, 2}});
+ EXPECT_EQ(10U, offset_mapper2.ForwardProject(0));
+ EXPECT_EQ(11U, offset_mapper2.ForwardProject(1));
+ EXPECT_EQ(2U, offset_mapper2.ForwardProject(13));
+ EXPECT_EQ(3U, offset_mapper2.ForwardProject(14)); // Previous equivalence.
+ EXPECT_EQ(4U, offset_mapper2.ForwardProject(16));
+ EXPECT_EQ(5U, offset_mapper2.ForwardProject(17));
+ EXPECT_EQ(6U, offset_mapper2.ForwardProject(18)); // Previous equivalence.
+}
+
+TEST(EquivalenceMapTest, Build) {
+ auto test_build_equivalence = [](const ImageIndex old_index,
+ const ImageIndex new_index,
+ double minimum_similarity) {
+ auto affinities = MakeTargetsAffinitiesForTesting(old_index, new_index, {});
+
+ EncodedView old_view(old_index);
+ EncodedView new_view(new_index);
+
+ for (const auto& old_pool_tag_and_targets : old_index.target_pools()) {
+ PoolTag pool_tag = old_pool_tag_and_targets.first;
+ std::vector<uint32_t> old_labels;
+ std::vector<uint32_t> new_labels;
+ size_t label_bound = affinities[pool_tag.value()].AssignLabels(
+ 1.0, &old_labels, &new_labels);
+ old_view.SetLabels(pool_tag, std::move(old_labels), label_bound);
+ new_view.SetLabels(pool_tag, std::move(new_labels), label_bound);
+ }
+
+ std::vector<offset_t> old_sa =
+ MakeSuffixArray<InducedSuffixSort>(old_view, old_view.Cardinality());
+
+ EquivalenceMap equivalence_map;
+ equivalence_map.Build(old_sa, old_view, new_view, affinities,
+ minimum_similarity);
+
+ offset_t current_dst_offset = 0;
+ offset_t coverage = 0;
+ for (const auto& candidate : equivalence_map) {
+ EXPECT_GE(candidate.eq.dst_offset, current_dst_offset);
+ EXPECT_GT(candidate.eq.length, offset_t(0));
+ EXPECT_LE(candidate.eq.src_offset + candidate.eq.length,
+ old_index.size());
+ EXPECT_LE(candidate.eq.dst_offset + candidate.eq.length,
+ new_index.size());
+ EXPECT_GE(candidate.similarity, minimum_similarity);
+ current_dst_offset = candidate.eq.dst_offset;
+ coverage += candidate.eq.length;
+ }
+ return coverage;
+ };
+
+ EXPECT_EQ(0U,
+ test_build_equivalence(MakeImageIndexForTesting("", {}, {}),
+ MakeImageIndexForTesting("", {}, {}), 4.0));
+
+ EXPECT_EQ(0U, test_build_equivalence(
+ MakeImageIndexForTesting("", {}, {}),
+ MakeImageIndexForTesting("banana", {}, {}), 4.0));
+
+ EXPECT_EQ(0U,
+ test_build_equivalence(MakeImageIndexForTesting("banana", {}, {}),
+ MakeImageIndexForTesting("", {}, {}), 4.0));
+
+ EXPECT_EQ(0U, test_build_equivalence(
+ MakeImageIndexForTesting("banana", {}, {}),
+ MakeImageIndexForTesting("zzzz", {}, {}), 4.0));
+
+ EXPECT_EQ(6U, test_build_equivalence(
+ MakeImageIndexForTesting("banana", {}, {}),
+ MakeImageIndexForTesting("banana", {}, {}), 4.0));
+
+ EXPECT_EQ(6U, test_build_equivalence(
+ MakeImageIndexForTesting("bananaxx", {}, {}),
+ MakeImageIndexForTesting("bananayy", {}, {}), 4.0));
+
+ EXPECT_EQ(8U, test_build_equivalence(
+ MakeImageIndexForTesting("banana11", {{6, 0}}, {}),
+ MakeImageIndexForTesting("banana11", {{6, 0}}, {}), 4.0));
+
+ EXPECT_EQ(6U, test_build_equivalence(
+ MakeImageIndexForTesting("banana11", {{6, 0}}, {}),
+ MakeImageIndexForTesting("banana22", {}, {{6, 0}}), 4.0));
+
+ EXPECT_EQ(
+ 15U,
+ test_build_equivalence(
+ MakeImageIndexForTesting("banana11pineapple", {{6, 0}}, {}),
+ MakeImageIndexForTesting("banana22pineapple", {}, {{6, 0}}), 4.0));
+
+ EXPECT_EQ(
+ 15U,
+ test_build_equivalence(
+ MakeImageIndexForTesting("bananaxxxxxxxxpineapple", {}, {}),
+ MakeImageIndexForTesting("bananayyyyyyyypineapple", {}, {}), 4.0));
+
+ EXPECT_EQ(
+ 19U,
+ test_build_equivalence(
+ MakeImageIndexForTesting("foobanana11xxpineapplexx", {{9, 0}}, {}),
+ MakeImageIndexForTesting("banana11yypineappleyy", {{6, 0}}, {}),
+ 4.0));
+}
+
+} // namespace zucchini
diff --git a/heuristic_ensemble_matcher.cc b/heuristic_ensemble_matcher.cc
new file mode 100644
index 0000000..aead5dc
--- /dev/null
+++ b/heuristic_ensemble_matcher.cc
@@ -0,0 +1,369 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/heuristic_ensemble_matcher.h"
+
+#include <algorithm>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "base/bind.h"
+#include "base/numerics/safe_conversions.h"
+#include "base/strings/stringprintf.h"
+#include "components/zucchini/binary_data_histogram.h"
+#include "components/zucchini/element_detection.h"
+#include "components/zucchini/image_utils.h"
+#include "components/zucchini/io_utils.h"
+
+namespace zucchini {
+
+namespace {
+
+/******** Helper Functions ********/
+
+// Uses |detector| to find embedded executables inside |image|, and returns the
+// result on success, or base::nullopt on failure, which occurs if too many (>
+// |kElementLimit|) elements are found.
+base::Optional<std::vector<Element>> FindEmbeddedElements(
+ ConstBufferView image,
+ const std::string& name,
+ ElementDetector&& detector) {
+ // Maximum number of Elements in a file. This is enforced because our matching
+ // algorithm is O(n^2), which suffices for regular archive files that should
+ // have up to 10's of executable files. An archive containing 100's of
+ // executables is likely pathological, and is rejected to prevent exploits.
+ static constexpr size_t kElementLimit = 256;
+ std::vector<Element> elements;
+ ElementFinder element_finder(image, std::move(detector));
+ for (auto element = element_finder.GetNext();
+ element.has_value() && elements.size() <= kElementLimit;
+ element = element_finder.GetNext()) {
+ elements.push_back(*element);
+ }
+ if (elements.size() >= kElementLimit) {
+ LOG(WARNING) << name << ": Found too many elements.";
+ return base::nullopt;
+ }
+ LOG(INFO) << name << ": Found " << elements.size() << " elements.";
+ return elements;
+}
+
+// Determines whether a proposed comparison between Elements should be rejected
+// early, to decrease the likelihood of creating false-positive matches, which
+// may be costly for patching. Our heuristic simply prohibits big difference in
+// size (relative and absolute) between matched elements.
+bool UnsafeDifference(const Element& old_element, const Element& new_element) {
+ static constexpr double kMaxBloat = 2.0;
+ static constexpr size_t kMinWorrysomeDifference = 2 << 20; // 2MB
+ size_t lo_size = std::min(old_element.size, new_element.size);
+ size_t hi_size = std::max(old_element.size, new_element.size);
+ if (hi_size - lo_size < kMinWorrysomeDifference)
+ return false;
+ if (hi_size < lo_size * kMaxBloat)
+ return false;
+ return true;
+}
+
+std::ostream& operator<<(std::ostream& stream, const Element& elt) {
+ stream << "(" << elt.exe_type << ", " << AsHex<8, size_t>(elt.offset) << " +"
+ << AsHex<8, size_t>(elt.size) << ")";
+ return stream;
+}
+
+/******** MatchingInfoOut ********/
+
+// A class to output detailed information during ensemble matching. Extracting
+// the functionality to a separate class decouples formatting and printing logic
+// from matching logic. The base class consists of stubs.
+class MatchingInfoOut {
+ protected:
+ MatchingInfoOut() = default;
+
+ public:
+ virtual ~MatchingInfoOut() = default;
+ virtual void InitSizes(size_t old_size, size_t new_size) {}
+ virtual void DeclareTypeMismatch(int iold, int inew) {}
+ virtual void DeclareUnsafeDistance(int iold, int inew) {}
+ virtual void DeclareCandidate(int iold, int inew) {}
+ virtual void DeclareMatch(int iold,
+ int inew,
+ double dist,
+ bool is_identical) {}
+ virtual void DeclareOutlier(int iold, int inew) {}
+
+ virtual void OutputCompare(const Element& old_element,
+ const Element& new_element,
+ double dist) {}
+
+ virtual void OutputMatch(const Element& best_old_element,
+ const Element& new_element,
+ bool is_identical,
+ double best_dist) {}
+
+ virtual void OutputScores(const std::string& stats) {}
+
+ virtual void OutputTextGrid() {}
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(MatchingInfoOut);
+};
+
+/******** MatchingInfoTerse ********/
+
+// A terse MatchingInfoOut that prints only basic information, using LOG().
+class MatchingInfoOutTerse : public MatchingInfoOut {
+ public:
+ MatchingInfoOutTerse() = default;
+ ~MatchingInfoOutTerse() override = default;
+
+ void OutputScores(const std::string& stats) override {
+ LOG(INFO) << "Best dists: " << stats;
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(MatchingInfoOutTerse);
+};
+
+/******** MatchingInfoOutVerbose ********/
+
+// A verbose MatchingInfoOut that prints detailed information using |out_|,
+// including comparison pairs, scores, and a text grid representation of
+// pairwise matching results.
+class MatchingInfoOutVerbose : public MatchingInfoOut {
+ public:
+ explicit MatchingInfoOutVerbose(std::ostream& out) : out_(out) {}
+ ~MatchingInfoOutVerbose() override = default;
+
+ // Outputs sizes and initializes |text_grid_|.
+ void InitSizes(size_t old_size, size_t new_size) override {
+ out_ << "Comparing old (" << old_size << " elements) and new (" << new_size
+ << " elements)" << std::endl;
+ text_grid_.assign(new_size, std::string(old_size, '-'));
+ best_dist_.assign(new_size, -1.0);
+ }
+
+ // Functions to update match status in text grid representation.
+
+ void DeclareTypeMismatch(int iold, int inew) override {
+ text_grid_[inew][iold] = 'T';
+ }
+ void DeclareUnsafeDistance(int iold, int inew) override {
+ text_grid_[inew][iold] = 'U';
+ }
+ void DeclareCandidate(int iold, int inew) override {
+ text_grid_[inew][iold] = 'C'; // Provisional.
+ }
+ void DeclareMatch(int iold,
+ int inew,
+ double dist,
+ bool is_identical) override {
+ text_grid_[inew][iold] = is_identical ? 'I' : 'M';
+ best_dist_[inew] = dist;
+ }
+ void DeclareOutlier(int iold, int inew) override {
+ text_grid_[inew][iold] = 'O';
+ }
+
+ // Functions to print detailed information.
+
+ void OutputCompare(const Element& old_element,
+ const Element& new_element,
+ double dist) override {
+ out_ << "Compare old" << old_element << " to new" << new_element << " --> "
+ << base::StringPrintf("%.5f", dist) << std::endl;
+ }
+
+ void OutputMatch(const Element& best_old_element,
+ const Element& new_element,
+ bool is_identical,
+ double best_dist) override {
+ if (is_identical) {
+ out_ << "Skipped old" << best_old_element << " - identical to new"
+ << new_element;
+ } else {
+ out_ << "Matched old" << best_old_element << " to new" << new_element
+ << " --> " << base::StringPrintf("%.5f", best_dist);
+ }
+ out_ << std::endl;
+ }
+
+ void OutputScores(const std::string& stats) override {
+ out_ << "Best dists: " << stats << std::endl;
+ }
+
+ void OutputTextGrid() override {
+ int new_size = static_cast<int>(text_grid_.size());
+ for (int inew = 0; inew < new_size; ++inew) {
+ const std::string& line = text_grid_[inew];
+ out_ << " ";
+ for (char ch : line) {
+ char prefix = (ch == 'I' || ch == 'M') ? '(' : ' ';
+ char suffix = (ch == 'I' || ch == 'M') ? ')' : ' ';
+ out_ << prefix << ch << suffix;
+ }
+ if (best_dist_[inew] >= 0)
+ out_ << " " << base::StringPrintf("%.5f", best_dist_[inew]);
+ out_ << std::endl;
+ }
+ if (!text_grid_.empty()) {
+ out_ << " Legend: I = identical, M = matched, T = type mismatch, "
+ "U = unsafe distance, C = candidate, O = outlier, - = skipped."
+ << std::endl;
+ }
+ }
+
+ private:
+ std::ostream& out_;
+
+ // Text grid representation of matches. Rows correspond to "old" and columns
+ // correspond to "new".
+ std::vector<std::string> text_grid_;
+
+ // For each "new" element, distance of best match. -1 denotes no match.
+ std::vector<double> best_dist_;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(MatchingInfoOutVerbose);
+};
+
+} // namespace
+
+/******** HeuristicEnsembleMatcher ********/
+
+HeuristicEnsembleMatcher::HeuristicEnsembleMatcher(std::ostream* out)
+ : out_(out) {}
+
+HeuristicEnsembleMatcher::~HeuristicEnsembleMatcher() = default;
+
+bool HeuristicEnsembleMatcher::RunMatch(ConstBufferView old_image,
+ ConstBufferView new_image) {
+ DCHECK(matches_.empty());
+ LOG(INFO) << "Start matching.";
+
+ // Find all elements in "old" and "new".
+ base::Optional<std::vector<Element>> old_elements =
+ FindEmbeddedElements(old_image, "Old file",
+ base::BindRepeating(DetectElementFromDisassembler));
+ if (!old_elements.has_value())
+ return false;
+ base::Optional<std::vector<Element>> new_elements =
+ FindEmbeddedElements(new_image, "New file",
+ base::BindRepeating(DetectElementFromDisassembler));
+ if (!new_elements.has_value())
+ return false;
+
+ std::unique_ptr<MatchingInfoOut> info_out;
+ if (out_)
+ info_out = std::make_unique<MatchingInfoOutVerbose>(*out_);
+ else
+ info_out = std::make_unique<MatchingInfoOutTerse>();
+
+ const int num_new_elements = base::checked_cast<int>(new_elements->size());
+ const int num_old_elements = base::checked_cast<int>(old_elements->size());
+ info_out->InitSizes(num_old_elements, num_new_elements);
+
+ // For each "new" element, match it with the "old" element that's nearest to
+ // it, with distance determined by BinaryDataHistogram. The resulting
+ // "old"-"new" pairs are stored into |results|. Possibilities:
+ // - Type mismatch: No match.
+ // - UnsafeDifference() heuristics fail: No match.
+ // - Identical match: Skip "new" since this is a trivial case.
+ // - Non-identical match: Match "new" with "old" with min distance.
+ // - No match: Skip "new".
+ struct Results {
+ int iold;
+ int inew;
+ double dist;
+ };
+ std::vector<Results> results;
+
+ // Precompute histograms for "old" since they get reused.
+ std::vector<BinaryDataHistogram> old_his(num_old_elements);
+ for (int iold = 0; iold < num_old_elements; ++iold) {
+ ConstBufferView sub_image(old_image[(*old_elements)[iold]]);
+ old_his[iold].Compute(sub_image);
+ // ProgramDetector should have imposed minimal size limit to |sub_image|.
+ // Therefore resulting histogram are expected to be valid.
+ CHECK(old_his[iold].IsValid());
+ }
+
+ const int kUninitIold = num_old_elements;
+ for (int inew = 0; inew < num_new_elements; ++inew) {
+ const Element& cur_new_element = (*new_elements)[inew];
+ ConstBufferView cur_new_sub_image(new_image[cur_new_element.region()]);
+ BinaryDataHistogram new_his;
+ new_his.Compute(cur_new_sub_image);
+ CHECK(new_his.IsValid());
+
+ double best_dist = HUGE_VAL;
+ int best_iold = kUninitIold;
+ bool is_identical = false;
+
+ for (int iold = 0; iold < num_old_elements; ++iold) {
+ const Element& cur_old_element = (*old_elements)[iold];
+ if (cur_old_element.exe_type != cur_new_element.exe_type) {
+ info_out->DeclareTypeMismatch(iold, inew);
+ continue;
+ }
+ if (UnsafeDifference(cur_old_element, cur_new_element)) {
+ info_out->DeclareUnsafeDistance(iold, inew);
+ continue;
+ }
+ double dist = old_his[iold].Distance(new_his);
+ info_out->DeclareCandidate(iold, inew);
+ info_out->OutputCompare(cur_old_element, cur_new_element, dist);
+ if (best_dist > dist) { // Tie resolution: First-one, first-serve.
+ best_iold = iold;
+ best_dist = dist;
+ if (best_dist == 0) {
+ ConstBufferView sub_image(old_image[cur_old_element.region()]);
+ if (sub_image.equals(cur_new_sub_image)) {
+ is_identical = true;
+ break;
+ }
+ }
+ }
+ }
+
+ if (best_iold != kUninitIold) {
+ const Element& best_old_element = (*old_elements)[best_iold];
+ info_out->DeclareMatch(best_iold, inew, best_dist, is_identical);
+ if (is_identical) // Skip "new" if identical match is found.
+ ++num_identical_;
+ else
+ results.push_back({best_iold, inew, best_dist});
+ info_out->OutputMatch(best_old_element, cur_new_element, is_identical,
+ best_dist);
+ }
+ }
+
+ // Populate |matches_| from |result|. To reduce that chance of false-positive
+ // matches, statistics on dists are computed. If a match's |dist| is an
+ // outlier then it is rejected.
+ if (results.size() > 0) {
+ OutlierDetector detector;
+ for (const auto& result : results) {
+ if (result.dist > 0)
+ detector.Add(result.dist);
+ }
+ detector.Prepare();
+ info_out->OutputScores(detector.RenderStats());
+ for (const Results& result : results) {
+ if (detector.DecideOutlier(result.dist) > 0) {
+ info_out->DeclareOutlier(result.iold, result.inew);
+ } else {
+ matches_.push_back(
+ {(*old_elements)[result.iold], (*new_elements)[result.inew]});
+ }
+ }
+ info_out->OutputTextGrid();
+ }
+
+ Trim();
+ return true;
+}
+
+} // namespace zucchini
diff --git a/heuristic_ensemble_matcher.h b/heuristic_ensemble_matcher.h
new file mode 100644
index 0000000..1adb998
--- /dev/null
+++ b/heuristic_ensemble_matcher.h
@@ -0,0 +1,39 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_HEURISTIC_ENSEMBLE_MATCHER_H_
+#define COMPONENTS_ZUCCHINI_HEURISTIC_ENSEMBLE_MATCHER_H_
+
+#include <ostream>
+
+#include "base/macros.h"
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/ensemble_matcher.h"
+
+namespace zucchini {
+
+// An ensemble matcher that:
+// - Detects embedded elements in "old" and "new" archive files.
+// - Applies heuristics to create matched pairs.
+// It is desired to have matched pairs that:
+// - Have "reasonable" size difference (see UnsafeDifference() in the .cc file).
+// - Have "minimal distance" among other potential matched pairs.
+class HeuristicEnsembleMatcher : public EnsembleMatcher {
+ public:
+ explicit HeuristicEnsembleMatcher(std::ostream* out);
+ ~HeuristicEnsembleMatcher() override;
+
+ // EnsembleMatcher:
+ bool RunMatch(ConstBufferView old_image, ConstBufferView new_image) override;
+
+ private:
+ // Optional stream to print detailed information during matching.
+ std::ostream* out_ = nullptr;
+
+ DISALLOW_COPY_AND_ASSIGN(HeuristicEnsembleMatcher);
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_HEURISTIC_ENSEMBLE_MATCHER_H_
diff --git a/image_index.cc b/image_index.cc
new file mode 100644
index 0000000..6c7a28b
--- /dev/null
+++ b/image_index.cc
@@ -0,0 +1,78 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/image_index.h"
+
+#include <algorithm>
+#include <utility>
+
+#include "components/zucchini/algorithm.h"
+#include "components/zucchini/disassembler.h"
+
+namespace zucchini {
+
+ImageIndex::ImageIndex(ConstBufferView image)
+ : image_(image), type_tags_(image.size(), kNoTypeTag) {}
+
+ImageIndex::ImageIndex(ImageIndex&&) = default;
+
+ImageIndex::~ImageIndex() = default;
+
+bool ImageIndex::Initialize(Disassembler* disasm) {
+ std::vector<ReferenceGroup> ref_groups = disasm->MakeReferenceGroups();
+ for (const auto& group : ref_groups) {
+ // Build pool-to-type mapping.
+ DCHECK_NE(kNoPoolTag, group.pool_tag());
+ TargetPool& target_pool = target_pools_[group.pool_tag()];
+ target_pool.AddType(group.type_tag());
+ target_pool.InsertTargets(std::move(*group.GetReader(disasm)));
+ }
+ for (const auto& group : ref_groups) {
+ // Find and store all references for each type, returns false on finding
+ // any overlap, to signal error.
+ if (!InsertReferences(group.traits(),
+ std::move(*group.GetReader(disasm)))) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool ImageIndex::IsToken(offset_t location) const {
+ TypeTag type = LookupType(location);
+
+ // |location| points into raw data.
+ if (type == kNoTypeTag)
+ return true;
+
+ // |location| points into a Reference.
+ IndirectReference reference = refs(type).at(location);
+ // Only the first byte of a reference is a token.
+ return location == reference.location;
+}
+
+bool ImageIndex::InsertReferences(const ReferenceTypeTraits& traits,
+ ReferenceReader&& ref_reader) {
+ // Store ReferenceSet for current type (of |group|).
+ DCHECK_NE(kNoTypeTag, traits.type_tag);
+ auto result = reference_sets_.emplace(
+ traits.type_tag, ReferenceSet(traits, pool(traits.pool_tag)));
+ DCHECK(result.second);
+
+ result.first->second.InitReferences(std::move(ref_reader));
+ for (auto ref : reference_sets_.at(traits.type_tag)) {
+ DCHECK(RangeIsBounded(ref.location, traits.width, size()));
+ auto cur_type_tag = type_tags_.begin() + ref.location;
+
+ // Check for overlap with existing reference. If found, then invalidate.
+ if (std::any_of(cur_type_tag, cur_type_tag + traits.width,
+ [](TypeTag type) { return type != kNoTypeTag; })) {
+ return false;
+ }
+ std::fill(cur_type_tag, cur_type_tag + traits.width, traits.type_tag);
+ }
+ return true;
+}
+
+} // namespace zucchini
diff --git a/image_index.h b/image_index.h
new file mode 100644
index 0000000..4f07015
--- /dev/null
+++ b/image_index.h
@@ -0,0 +1,116 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_IMAGE_INDEX_H_
+#define COMPONENTS_ZUCCHINI_IMAGE_INDEX_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <map>
+#include <vector>
+
+#include "base/logging.h"
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/image_utils.h"
+#include "components/zucchini/reference_set.h"
+#include "components/zucchini/target_pool.h"
+
+namespace zucchini {
+
+class Disassembler;
+
+// A class that holds annotations of an image, allowing quick access to its raw
+// and reference content. The memory overhead of storing all references is
+// relatively high, so this is only used during patch generation.
+class ImageIndex {
+ public:
+ explicit ImageIndex(ConstBufferView image);
+ ImageIndex(const ImageIndex&) = delete;
+ ImageIndex(ImageIndex&&);
+ ~ImageIndex();
+
+ // Inserts all references read from |disasm|. This should be called exactly
+ // once. If overlap between any two references of any type is encountered,
+ // returns false and leaves the object in an invalid state. Otherwise,
+ // returns true.
+ // TODO(huangs): Refactor ReaderFactory and WriterFactory so
+ // |const Disassembler&| can be used here.
+ bool Initialize(Disassembler* disasm);
+
+ // Returns the array size needed to accommodate all reference type values.
+ size_t TypeCount() const {
+ if (reference_sets_.empty())
+ return 0U;
+ return reference_sets_.rbegin()->first.value() + 1;
+ }
+
+ // Returns the array size needed to accommodate all pool values.
+ size_t PoolCount() const {
+ if (target_pools_.empty())
+ return 0U;
+ return target_pools_.rbegin()->first.value() + 1;
+ }
+
+ // Returns true if |image_[location]| is either:
+ // - A raw value.
+ // - The first byte of a reference.
+ bool IsToken(offset_t location) const;
+
+ // Returns true if |image_[location]| is part of a reference.
+ bool IsReference(offset_t location) const {
+ return LookupType(location) != kNoTypeTag;
+ }
+
+ // Returns the type tag of the reference covering |location|, or kNoTypeTag if
+ // |location| is not part of a reference.
+ TypeTag LookupType(offset_t location) const {
+ DCHECK_LT(location, size());
+ return type_tags_[location];
+ }
+
+ // Returns the raw value at |location|.
+ uint8_t GetRawValue(offset_t location) const {
+ DCHECK_LT(location, size());
+ return image_[location];
+ }
+
+ const std::map<PoolTag, TargetPool>& target_pools() const {
+ return target_pools_;
+ }
+ const std::map<TypeTag, ReferenceSet>& reference_sets() const {
+ return reference_sets_;
+ }
+
+ const TargetPool& pool(PoolTag pool_tag) const {
+ return target_pools_.at(pool_tag);
+ }
+ const ReferenceSet& refs(TypeTag type_tag) const {
+ return reference_sets_.at(type_tag);
+ }
+
+ // Returns the size of the image.
+ size_t size() const { return image_.size(); }
+
+ private:
+ // Inserts to |*this| index, all references described by |traits| read from
+ // |ref_reader|, which gets consumed. This should be called exactly once for
+ // each reference type. If overlap between any two references of any type is
+ // encountered, returns false and leaves the object in an invalid state.
+ // Otherwise, returns true.
+ bool InsertReferences(const ReferenceTypeTraits& traits,
+ ReferenceReader&& ref_reader);
+
+ const ConstBufferView image_;
+
+ // Used for random access lookup of reference type, for each byte in |image_|.
+ std::vector<TypeTag> type_tags_;
+
+ std::map<PoolTag, TargetPool> target_pools_;
+ std::map<TypeTag, ReferenceSet> reference_sets_;
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_IMAGE_INDEX_H_
diff --git a/image_index_unittest.cc b/image_index_unittest.cc
new file mode 100644
index 0000000..cf6f8a7
--- /dev/null
+++ b/image_index_unittest.cc
@@ -0,0 +1,131 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/image_index.h"
+
+#include <stddef.h>
+
+#include <numeric>
+#include <vector>
+
+#include "base/test/gtest_util.h"
+#include "components/zucchini/image_utils.h"
+#include "components/zucchini/test_disassembler.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+class ImageIndexTest : public testing::Test {
+ protected:
+ ImageIndexTest()
+ : buffer_(20),
+ image_index_(ConstBufferView(buffer_.data(), buffer_.size())) {
+ std::iota(buffer_.begin(), buffer_.end(), 0);
+ }
+
+ void InitializeWithDefaultTestData() {
+ TestDisassembler disasm({2, TypeTag(0), PoolTag(0)},
+ {{1, 0}, {8, 1}, {10, 2}},
+ {4, TypeTag(1), PoolTag(0)}, {{3, 3}},
+ {3, TypeTag(2), PoolTag(1)}, {{12, 4}, {17, 5}});
+ EXPECT_TRUE(image_index_.Initialize(&disasm));
+ }
+
+ std::vector<uint8_t> buffer_;
+ ImageIndex image_index_;
+};
+
+TEST_F(ImageIndexTest, TypeAndPool) {
+ TestDisassembler disasm({2, TypeTag(0), PoolTag(0)}, {},
+ {4, TypeTag(1), PoolTag(0)}, {},
+ {3, TypeTag(2), PoolTag(1)}, {});
+ EXPECT_TRUE(image_index_.Initialize(&disasm));
+
+ EXPECT_EQ(3U, image_index_.TypeCount());
+ EXPECT_EQ(2U, image_index_.PoolCount());
+
+ EXPECT_EQ(TypeTag(0), image_index_.refs(TypeTag(0)).type_tag());
+ EXPECT_EQ(TypeTag(1), image_index_.refs(TypeTag(1)).type_tag());
+ EXPECT_EQ(TypeTag(2), image_index_.refs(TypeTag(2)).type_tag());
+
+ EXPECT_EQ(PoolTag(0), image_index_.refs(TypeTag(0)).pool_tag());
+ EXPECT_EQ(PoolTag(0), image_index_.refs(TypeTag(1)).pool_tag());
+ EXPECT_EQ(PoolTag(1), image_index_.refs(TypeTag(2)).pool_tag());
+}
+
+TEST_F(ImageIndexTest, InvalidInitialize1) {
+ // Overlap within the same group.
+ TestDisassembler disasm({2, TypeTag(0), PoolTag(0)}, {{1, 0}, {2, 0}},
+ {4, TypeTag(1), PoolTag(0)}, {},
+ {3, TypeTag(2), PoolTag(1)}, {});
+ EXPECT_FALSE(image_index_.Initialize(&disasm));
+}
+
+TEST_F(ImageIndexTest, InvalidInitialize2) {
+ // Overlap across different readers.
+ TestDisassembler disasm({2, TypeTag(0), PoolTag(0)},
+ {{1, 0}, {8, 1}, {10, 2}},
+ {4, TypeTag(1), PoolTag(0)}, {{3, 3}},
+ {3, TypeTag(2), PoolTag(1)}, {{11, 0}});
+ EXPECT_FALSE(image_index_.Initialize(&disasm));
+}
+
+TEST_F(ImageIndexTest, LookupType) {
+ InitializeWithDefaultTestData();
+
+ std::vector<int> expected = {
+ -1, // raw
+ 0, 0, // ref 0
+ 1, 1, 1, 1, // ref 1
+ -1, // raw
+ 0, 0, // ref 0
+ 0, 0, // ref 0
+ 2, 2, 2, // ref 2
+ -1, -1, // raw
+ 2, 2, 2, // ref 2
+ };
+
+ for (offset_t i = 0; i < image_index_.size(); ++i)
+ EXPECT_EQ(TypeTag(expected[i]), image_index_.LookupType(i));
+}
+
+TEST_F(ImageIndexTest, IsToken) {
+ InitializeWithDefaultTestData();
+
+ std::vector<bool> expected = {
+ 1, // raw
+ 1, 0, // ref 0
+ 1, 0, 0, 0, // ref 1
+ 1, // raw
+ 1, 0, // ref 0
+ 1, 0, // ref 0
+ 1, 0, 0, // ref 2
+ 1, 1, // raw
+ 1, 0, 0, // ref 2
+ };
+
+ for (offset_t i = 0; i < image_index_.size(); ++i)
+ EXPECT_EQ(expected[i], image_index_.IsToken(i));
+}
+
+TEST_F(ImageIndexTest, IsReference) {
+ InitializeWithDefaultTestData();
+
+ std::vector<bool> expected = {
+ 0, // raw
+ 1, 1, // ref 0
+ 1, 1, 1, 1, // ref 1
+ 0, // raw
+ 1, 1, // ref 0
+ 1, 1, // ref 0
+ 1, 1, 1, // ref 2
+ 0, 0, // raw
+ 1, 1, 1, // ref 2
+ };
+
+ for (offset_t i = 0; i < image_index_.size(); ++i)
+ EXPECT_EQ(expected[i], image_index_.IsReference(i));
+}
+
+} // namespace zucchini
diff --git a/image_utils.h b/image_utils.h
new file mode 100644
index 0000000..c3db9ed
--- /dev/null
+++ b/image_utils.h
@@ -0,0 +1,206 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_IMAGE_UTILS_H_
+#define COMPONENTS_ZUCCHINI_IMAGE_UTILS_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "base/numerics/safe_conversions.h"
+#include "base/optional.h"
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/typed_value.h"
+
+namespace zucchini {
+
+// offset_t is used to describe an offset in an image.
+// Files bigger than 4GB are not supported.
+using offset_t = uint32_t;
+// Divide by 2 since label marking uses the most significant bit.
+constexpr offset_t kOffsetBound = static_cast<offset_t>(-1) / 2;
+constexpr offset_t kInvalidOffset = static_cast<offset_t>(-1);
+
+// key_t is used to identify an offset in a table.
+using key_t = uint32_t;
+
+enum Bitness : uint8_t {
+ // The numerical values are intended to simplify WidthOf() below.
+ kBit32 = 4,
+ kBit64 = 8
+};
+
+inline uint32_t WidthOf(Bitness bitness) {
+ return static_cast<uint32_t>(bitness);
+}
+
+// Used to uniquely identify a reference type.
+// Strongly typed objects are used to avoid ambiguitees with PoolTag.
+struct TypeTag : public TypedValue<TypeTag, uint8_t> {
+ // inheriting constructor:
+ using TypedValue<TypeTag, uint8_t>::TypedValue;
+};
+
+// Used to uniquely identify a pool.
+struct PoolTag : public TypedValue<PoolTag, uint8_t> {
+ // inheriting constructor:
+ using TypedValue<PoolTag, uint8_t>::TypedValue;
+};
+
+constexpr TypeTag kNoTypeTag(0xFF); // Typically used to identify raw data.
+constexpr PoolTag kNoPoolTag(0xFF);
+
+// Specification of references in an image file.
+struct ReferenceTypeTraits {
+ constexpr ReferenceTypeTraits(offset_t width_in,
+ TypeTag type_tag_in,
+ PoolTag pool_tag_in)
+ : width(width_in), type_tag(type_tag_in), pool_tag(pool_tag_in) {}
+
+ // |width| specifies number of bytes covered by the reference's binary
+ // encoding.
+ const offset_t width;
+ // |type_tag| identifies the reference type being described.
+ const TypeTag type_tag;
+ // |pool_tag| identifies the pool this type belongs to.
+ const PoolTag pool_tag;
+};
+
+// There is no need to store |type| because references of the same type are
+// always aggregated into the same container, and so during iteration we'd have
+// |type| already.
+struct Reference {
+ offset_t location;
+ offset_t target;
+};
+
+inline bool operator==(const Reference& a, const Reference& b) {
+ return a.location == b.location && a.target == b.target;
+}
+
+struct IndirectReference {
+ offset_t location;
+ key_t target_key; // Key within a pool of references with same semantics.
+};
+
+inline bool operator==(const IndirectReference& a, const IndirectReference& b) {
+ return a.location == b.location && a.target_key == b.target_key;
+}
+
+// Interface for extracting References through member function GetNext().
+// This is used by Disassemblers to extract references from an image file.
+// Typically, a Reader lazily extracts values and does not hold any storage.
+class ReferenceReader {
+ public:
+ virtual ~ReferenceReader() = default;
+
+ // Returns the next available Reference, or nullopt_t if exhausted.
+ // Extracted References must be ordered by their location in the image.
+ virtual base::Optional<Reference> GetNext() = 0;
+};
+
+// Interface for writing References through member function
+// PutNext(reference). This is used by Disassemblers to write new References
+// in the image file.
+class ReferenceWriter {
+ public:
+ virtual ~ReferenceWriter() = default;
+
+ // Writes |reference| in the underlying image file. This operation always
+ // succeeds.
+ virtual void PutNext(Reference reference) = 0;
+};
+
+// Position of the most significant bit of offset_t.
+constexpr offset_t kIndexMarkBitPosition = sizeof(offset_t) * 8 - 1;
+
+// Helper functions to mark an offset_t, so we can distinguish file offsets from
+// Label indices. Implementation: Marking is flagged by the most significant bit
+// (MSB).
+constexpr inline bool IsMarked(offset_t value) {
+ return value >> kIndexMarkBitPosition != 0;
+}
+constexpr inline offset_t MarkIndex(offset_t value) {
+ return value | (offset_t(1) << kIndexMarkBitPosition);
+}
+constexpr inline offset_t UnmarkIndex(offset_t value) {
+ return value & ~(offset_t(1) << kIndexMarkBitPosition);
+}
+
+// Constant as placeholder for non-existing offset for an index.
+constexpr offset_t kUnusedIndex = offset_t(-1);
+static_assert(IsMarked(kUnusedIndex), "kUnusedIndex must be marked");
+
+// An Equivalence is a block of length |length| that approximately match in
+// |old_image| at an offset of |src_offset| and in |new_image| at an offset of
+// |dst_offset|.
+struct Equivalence {
+ offset_t src_offset;
+ offset_t dst_offset;
+ offset_t length;
+
+ offset_t src_end() const { return src_offset + length; }
+ offset_t dst_end() const { return dst_offset + length; }
+};
+
+inline bool operator==(const Equivalence& a, const Equivalence& b) {
+ return a.src_offset == b.src_offset && a.dst_offset == b.dst_offset &&
+ a.length == b.length;
+}
+
+// Same as Equivalence, but with a similarity score. This is only used when
+// generating the patch.
+struct EquivalenceCandidate {
+ Equivalence eq;
+ double similarity;
+};
+
+// Enumerations for supported executables.
+enum ExecutableType : uint32_t {
+ kExeTypeUnknown = UINT32_MAX,
+ kExeTypeNoOp = 0,
+ kExeTypeWin32X86 = 1,
+ kExeTypeWin32X64 = 2,
+ kExeTypeElfX86 = 3,
+ kExeTypeElfX64 = 4,
+ kExeTypeElfArm32 = 5,
+ kExeTypeElfAArch64 = 6,
+ kExeTypeDex = 7,
+ kNumExeType
+};
+
+// A region in an image with associated executable type |exe_type|. If
+// |exe_type == kExeTypeNoOp|, then the Element represents a region of raw data.
+struct Element : public BufferRegion {
+ Element() = default;
+ constexpr Element(const BufferRegion& region_in, ExecutableType exe_type_in)
+ : BufferRegion(region_in), exe_type(exe_type_in) {}
+ constexpr explicit Element(const BufferRegion& region_in)
+ : BufferRegion(region_in), exe_type(kExeTypeNoOp) {}
+
+ // Similar to lo() and hi(), but returns values in offset_t.
+ offset_t BeginOffset() const { return base::checked_cast<offset_t>(lo()); }
+ offset_t EndOffset() const { return base::checked_cast<offset_t>(hi()); }
+
+ BufferRegion region() const { return {offset, size}; }
+
+ friend bool operator==(const Element& a, const Element& b) {
+ return a.exe_type == b.exe_type && a.offset == b.offset && a.size == b.size;
+ }
+
+ ExecutableType exe_type;
+};
+
+// A matched pair of Elements.
+struct ElementMatch {
+ bool IsValid() const { return old_element.exe_type == new_element.exe_type; }
+ ExecutableType exe_type() const { return old_element.exe_type; }
+
+ Element old_element;
+ Element new_element;
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_IMAGE_UTILS_H_
diff --git a/image_utils_unittest.cc b/image_utils_unittest.cc
new file mode 100644
index 0000000..7cae9d2
--- /dev/null
+++ b/image_utils_unittest.cc
@@ -0,0 +1,77 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/image_utils.h"
+
+#include "base/logging.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+TEST(ImageUtilsTest, Bitness) {
+ EXPECT_EQ(4U, WidthOf(kBit32));
+ EXPECT_EQ(8U, WidthOf(kBit64));
+}
+
+TEST(ImageUtilsTest, IsMarked) {
+ EXPECT_FALSE(IsMarked(0x00000000));
+ EXPECT_TRUE(IsMarked(0x80000000));
+
+ EXPECT_FALSE(IsMarked(0x00000001));
+ EXPECT_TRUE(IsMarked(0x80000001));
+
+ EXPECT_FALSE(IsMarked(0x70000000));
+ EXPECT_TRUE(IsMarked(0xF0000000));
+
+ EXPECT_FALSE(IsMarked(0x7FFFFFFF));
+ EXPECT_TRUE(IsMarked(0xFFFFFFFF));
+
+ EXPECT_FALSE(IsMarked(0x70000000));
+ EXPECT_TRUE(IsMarked(0xC0000000));
+
+ EXPECT_FALSE(IsMarked(0x0000BEEF));
+ EXPECT_TRUE(IsMarked(0x8000BEEF));
+}
+
+TEST(ImageUtilsTest, MarkIndex) {
+ EXPECT_EQ(offset_t(0x80000000), MarkIndex(0x00000000));
+ EXPECT_EQ(offset_t(0x80000000), MarkIndex(0x80000000));
+
+ EXPECT_EQ(offset_t(0x80000001), MarkIndex(0x00000001));
+ EXPECT_EQ(offset_t(0x80000001), MarkIndex(0x80000001));
+
+ EXPECT_EQ(offset_t(0xF0000000), MarkIndex(0x70000000));
+ EXPECT_EQ(offset_t(0xF0000000), MarkIndex(0xF0000000));
+
+ EXPECT_EQ(offset_t(0xFFFFFFFF), MarkIndex(0x7FFFFFFF));
+ EXPECT_EQ(offset_t(0xFFFFFFFF), MarkIndex(0xFFFFFFFF));
+
+ EXPECT_EQ(offset_t(0xC0000000), MarkIndex(0x40000000));
+ EXPECT_EQ(offset_t(0xC0000000), MarkIndex(0xC0000000));
+
+ EXPECT_EQ(offset_t(0x8000BEEF), MarkIndex(0x0000BEEF));
+ EXPECT_EQ(offset_t(0x8000BEEF), MarkIndex(0x8000BEEF));
+}
+
+TEST(ImageUtilsTest, UnmarkIndex) {
+ EXPECT_EQ(offset_t(0x00000000), UnmarkIndex(0x00000000));
+ EXPECT_EQ(offset_t(0x00000000), UnmarkIndex(0x80000000));
+
+ EXPECT_EQ(offset_t(0x00000001), UnmarkIndex(0x00000001));
+ EXPECT_EQ(offset_t(0x00000001), UnmarkIndex(0x80000001));
+
+ EXPECT_EQ(offset_t(0x70000000), UnmarkIndex(0x70000000));
+ EXPECT_EQ(offset_t(0x70000000), UnmarkIndex(0xF0000000));
+
+ EXPECT_EQ(offset_t(0x7FFFFFFF), UnmarkIndex(0x7FFFFFFF));
+ EXPECT_EQ(offset_t(0x7FFFFFFF), UnmarkIndex(0xFFFFFFFF));
+
+ EXPECT_EQ(offset_t(0x40000000), UnmarkIndex(0x40000000));
+ EXPECT_EQ(offset_t(0x40000000), UnmarkIndex(0xC0000000));
+
+ EXPECT_EQ(offset_t(0x0000BEEF), UnmarkIndex(0x0000BEEF));
+ EXPECT_EQ(offset_t(0x0000BEEF), UnmarkIndex(0x8000BEEF));
+}
+
+} // namespace zucchini
diff --git a/integration_test.cc b/integration_test.cc
new file mode 100644
index 0000000..b0ec864
--- /dev/null
+++ b/integration_test.cc
@@ -0,0 +1,104 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <stdint.h>
+
+#include <algorithm>
+#include <string>
+#include <vector>
+
+#include "base/files/file_path.h"
+#include "base/files/memory_mapped_file.h"
+#include "base/optional.h"
+#include "base/path_service.h"
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/patch_reader.h"
+#include "components/zucchini/patch_writer.h"
+#include "components/zucchini/zucchini.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+base::FilePath MakeTestPath(const std::string& filename) {
+ base::FilePath path;
+ DCHECK(PathService::Get(base::DIR_SOURCE_ROOT, &path));
+ return path.AppendASCII("chrome")
+ .AppendASCII("installer")
+ .AppendASCII("zucchini")
+ .AppendASCII("testdata")
+ .AppendASCII(filename);
+}
+
+void TestGenApply(const std::string& old_filename,
+ const std::string& new_filename,
+ bool raw) {
+ base::FilePath old_path = MakeTestPath(old_filename);
+ base::FilePath new_path = MakeTestPath(new_filename);
+
+ base::MemoryMappedFile old_file;
+ ASSERT_TRUE(old_file.Initialize(old_path));
+
+ base::MemoryMappedFile new_file;
+ ASSERT_TRUE(new_file.Initialize(new_path));
+
+ ConstBufferView old_region(old_file.data(), old_file.length());
+ ConstBufferView new_region(new_file.data(), new_file.length());
+
+ EnsemblePatchWriter patch_writer(old_region, new_region);
+
+ // Generate patch from "old" to "new".
+ ASSERT_EQ(status::kStatusSuccess,
+ raw ? GenerateRaw(old_region, new_region, &patch_writer)
+ : GenerateEnsemble(old_region, new_region, &patch_writer));
+
+ size_t patch_size = patch_writer.SerializedSize();
+ EXPECT_GE(patch_size, 80U); // Minimum size is empty patch.
+ // TODO(etiennep): Add check on maximum expected size.
+
+ std::vector<uint8_t> patch_buffer(patch_writer.SerializedSize());
+ patch_writer.SerializeInto({patch_buffer.data(), patch_buffer.size()});
+
+ // Read back generated patch.
+ base::Optional<EnsemblePatchReader> patch_reader =
+ EnsemblePatchReader::Create({patch_buffer.data(), patch_buffer.size()});
+ ASSERT_TRUE(patch_reader.has_value());
+
+ // Check basic properties.
+ EXPECT_TRUE(patch_reader->CheckOldFile(old_region));
+ EXPECT_TRUE(patch_reader->CheckNewFile(new_region));
+ EXPECT_EQ(old_file.length(), patch_reader->header().old_size);
+ // If new_size doesn't match expectation, the function is aborted.
+ ASSERT_EQ(new_file.length(), patch_reader->header().new_size);
+
+ // Apply patch to "old" to get "patched new", ensure it's identical to "new".
+ std::vector<uint8_t> patched_new_buffer(new_region.size());
+ ASSERT_EQ(status::kStatusSuccess,
+ Apply(old_region, *patch_reader,
+ {patched_new_buffer.data(), patched_new_buffer.size()}));
+
+ // Note that |new_region| and |patched_new_buffer| are the same size.
+ EXPECT_TRUE(std::equal(new_region.begin(), new_region.end(),
+ patched_new_buffer.begin()));
+}
+
+TEST(EndToEndTest, GenApplyRaw) {
+ TestGenApply("setup1.exe", "setup2.exe", true);
+ TestGenApply("chrome64_1.exe", "chrome64_2.exe", true);
+}
+
+TEST(EndToEndTest, GenApplyIdentity) {
+ TestGenApply("setup1.exe", "setup1.exe", false);
+}
+
+TEST(EndToEndTest, GenApplySimple) {
+ TestGenApply("setup1.exe", "setup2.exe", false);
+ TestGenApply("setup2.exe", "setup1.exe", false);
+ TestGenApply("chrome64_1.exe", "chrome64_2.exe", false);
+}
+
+TEST(EndToEndTest, GenApplyCross) {
+ TestGenApply("setup1.exe", "chrome64_1.exe", false);
+}
+
+} // namespace zucchini
diff --git a/io_utils.cc b/io_utils.cc
new file mode 100644
index 0000000..aa493d0
--- /dev/null
+++ b/io_utils.cc
@@ -0,0 +1,52 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/io_utils.h"
+
+#include <iostream>
+
+namespace zucchini {
+
+/******** LimitedOutputStream::StreamBuf ********/
+
+LimitedOutputStream::StreamBuf::StreamBuf(std::ostream& os, int limit)
+ : os_(os), limit_(limit) {}
+
+LimitedOutputStream::StreamBuf::~StreamBuf() {
+ // Display warning in case we forget to flush data with std::endl.
+ if (!str().empty()) {
+ std::cerr << "Warning: LimitedOutputStream has " << str().length()
+ << " bytes of unflushed output." << std::endl;
+ }
+}
+
+int LimitedOutputStream::StreamBuf::sync() {
+ if (full()) {
+ str("");
+ return 0;
+ }
+ os_ << str();
+ str("");
+ if (++counter_ >= limit_)
+ os_ << "(Additional output suppressed)\n";
+ os_.flush();
+ return 0;
+}
+
+/******** LimitedOutputStream ********/
+
+LimitedOutputStream::LimitedOutputStream(std::ostream& os, int limit)
+ : std::ostream(&buf_), buf_(os, limit) {}
+
+/******** PrefixSep ********/
+
+std::ostream& operator<<(std::ostream& ostr, PrefixSep& obj) {
+ if (obj.first_)
+ obj.first_ = false;
+ else
+ ostr << obj.sep_str_;
+ return ostr;
+}
+
+} // namespace zucchini
diff --git a/io_utils.h b/io_utils.h
new file mode 100644
index 0000000..56f7075
--- /dev/null
+++ b/io_utils.h
@@ -0,0 +1,146 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_IO_UTILS_H_
+#define COMPONENTS_ZUCCHINI_IO_UTILS_H_
+
+#include <stdint.h>
+
+#include <cctype>
+#include <istream>
+#include <ostream>
+#include <sstream>
+#include <string>
+
+#include "base/macros.h"
+
+namespace zucchini {
+
+// An std::ostream wrapper that that limits number of std::endl lines to output,
+// useful for preventing excessive debug message output. Usage requires some
+// work by the caller. Sample:
+// static LimitedOutputStream los(std::cerr, 10);
+// if (!los.full()) {
+// ... // Prepare message. Block may be skipped so don't do other work!
+// los << message;
+// los << std::endl; // Important!
+// }
+class LimitedOutputStream : public std::ostream {
+ private:
+ class StreamBuf : public std::stringbuf {
+ public:
+ StreamBuf(std::ostream& os, int limit);
+ ~StreamBuf() override;
+
+ int sync() override;
+ bool full() const { return counter_ >= limit_; }
+
+ private:
+ std::ostream& os_;
+ const int limit_;
+ int counter_ = 0;
+ };
+
+ public:
+ LimitedOutputStream(std::ostream& os, int limit);
+ bool full() const { return buf_.full(); }
+
+ private:
+ StreamBuf buf_;
+
+ DISALLOW_COPY_AND_ASSIGN(LimitedOutputStream);
+};
+
+// A class to render hexadecimal numbers for std::ostream with 0-padding. This
+// is more concise and flexible than stateful STL manipulator alternatives; so:
+// std::ios old_fmt(nullptr);
+// old_fmt.copyfmt(std::cout);
+// std::cout << std::uppercase << std::hex;
+// std::cout << std::setfill('0') << std::setw(8) << int_data << std::endl;
+// std::cout.copyfmt(old_fmt);
+// can be expressed as:
+// std::cout << AxHex<8>(int_data) << std::endl;
+template <int N, typename T = uint32_t>
+struct AsHex {
+ explicit AsHex(T value_in) : value(value_in) {}
+ T value;
+};
+
+template <int N, typename T>
+std::ostream& operator<<(std::ostream& os, const AsHex<N, T>& as_hex) {
+ char buf[N + 1];
+ buf[N] = '\0';
+ T value = as_hex.value;
+ for (int i = N - 1; i >= 0; --i, value >>= 4)
+ buf[i] = "0123456789ABCDEF"[static_cast<int>(value & 0x0F)];
+ if (value)
+ os << "..."; // To indicate data truncation, or negative values.
+ os << buf;
+ return os;
+}
+
+// An output manipulator to simplify printing list separators. Sample usage:
+// PrefixSep sep(",");
+// for (int i : {3, 1, 4, 1, 5, 9})
+// std::cout << sep << i;
+// std::cout << std::endl; // Outputs "3,1,4,1,5,9\n".
+class PrefixSep {
+ public:
+ explicit PrefixSep(const std::string& sep_str) : sep_str_(sep_str) {}
+
+ friend std::ostream& operator<<(std::ostream& ostr, PrefixSep& obj);
+
+ private:
+ std::string sep_str_;
+ bool first_ = true;
+
+ DISALLOW_COPY_AND_ASSIGN(PrefixSep);
+};
+
+// An input manipulator that dictates the expected next character in
+// |std::istream|, and invalidates the stream if expectation is not met.
+class EatChar {
+ public:
+ explicit EatChar(char ch) : ch_(ch) {}
+
+ friend inline std::istream& operator>>(std::istream& istr,
+ const EatChar& obj) {
+ if (!istr.fail() && istr.get() != obj.ch_)
+ istr.setstate(std::ios_base::failbit);
+ return istr;
+ }
+
+ private:
+ char ch_;
+
+ DISALLOW_COPY_AND_ASSIGN(EatChar);
+};
+
+// An input manipulator that reads an unsigned integer from |std::istream|,
+// and invalidates the stream on failure. Intolerant of leading white spaces,
+template <typename T>
+class StrictUInt {
+ public:
+ explicit StrictUInt(T& var) : var_(var) {}
+ StrictUInt(const StrictUInt&) = default;
+
+ friend std::istream& operator>>(std::istream& istr, StrictUInt<T> obj) {
+ if (!istr.fail() && !::isdigit(istr.peek())) {
+ istr.setstate(std::ios_base::failbit);
+ return istr;
+ }
+ return istr >> obj.var_;
+ }
+
+ private:
+ T& var_;
+};
+
+// Stub out uint8_t: istream treats it as char, and value won't be read as int!
+template <>
+struct StrictUInt<uint8_t> {};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_IO_UTILS_H_
diff --git a/io_utils_unittest.cc b/io_utils_unittest.cc
new file mode 100644
index 0000000..f3e82ec
--- /dev/null
+++ b/io_utils_unittest.cc
@@ -0,0 +1,161 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/io_utils.h"
+
+#include <stdint.h>
+
+#include <sstream>
+#include <string>
+
+#include "base/logging.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+TEST(IOUtilsTest, LimitedOutputStream) {
+ std::ostringstream oss;
+ LimitedOutputStream los(oss, 3);
+ EXPECT_FALSE(los.full());
+ EXPECT_EQ("", oss.str());
+ // Line 1.
+ los << "a" << 1 << "b" << 2 << "c" << 3 << std::endl;
+ EXPECT_FALSE(los.full());
+ EXPECT_EQ("a1b2c3\n", oss.str());
+ // Line 2.
+ oss.str("");
+ los << "\r\r\n\n" << std::endl; // Manual new lines don't count.
+ EXPECT_FALSE(los.full());
+ EXPECT_EQ("\r\r\n\n\n", oss.str());
+ // Line 3.
+ oss.str("");
+ los << "blah" << 137;
+ EXPECT_FALSE(los.full());
+ los << std::endl;
+ EXPECT_TRUE(los.full());
+ EXPECT_EQ("blah137\n(Additional output suppressed)\n", oss.str());
+ // Not testing adding more lines: the behavior is undefined since we rely on
+ // caller suppressing output if |los.full()| is true.
+}
+
+TEST(IOUtilsTest, AsHex) {
+ std::ostringstream oss;
+ // Helper for single-line tests. Eats dummy std::ostream& from operator<<().
+ auto extract = [&oss](std::ostream&) -> std::string {
+ std::string ret = oss.str();
+ oss.str("");
+ return ret;
+ };
+
+ EXPECT_EQ("00000000", extract(oss << AsHex<8>(0)));
+ EXPECT_EQ("12345678", extract(oss << AsHex<8>(0x12345678U)));
+ EXPECT_EQ("9ABCDEF0", extract(oss << AsHex<8>(0x9ABCDEF0U)));
+ EXPECT_EQ("(00000064)", extract(oss << "(" << AsHex<8>(100) << ")"));
+ EXPECT_EQ("00FFFF", extract(oss << AsHex<6>(0xFFFFU)));
+ EXPECT_EQ("FFFF", extract(oss << AsHex<4>(0xFFFFU)));
+ EXPECT_EQ("...FF", extract(oss << AsHex<2>(0xFFFFU)));
+ EXPECT_EQ("...00", extract(oss << AsHex<2>(0x100U)));
+ EXPECT_EQ("FF\n", extract(oss << AsHex<2>(0xFFU) << std::endl));
+ EXPECT_EQ("132457689BACDEF0",
+ extract(oss << AsHex<16, uint64_t>(0x132457689BACDEF0LLU)));
+ EXPECT_EQ("000000000001", extract(oss << AsHex<12, uint8_t>(1)));
+ EXPECT_EQ("00000089", extract(oss << AsHex<8, int32_t>(137)));
+ EXPECT_EQ("...FFFFFFFF", extract(oss << AsHex<8, int32_t>(-1)));
+ EXPECT_EQ("7FFF", extract(oss << AsHex<4, int16_t>(0x7FFFU)));
+ EXPECT_EQ("...8000", extract(oss << AsHex<4, int16_t>(0x8000U)));
+ EXPECT_EQ("8000", extract(oss << AsHex<4, uint16_t>(0x8000U)));
+}
+
+TEST(IOUtilsTest, PrefixSep) {
+ std::ostringstream oss;
+ PrefixSep sep(",");
+ oss << sep << 3;
+ EXPECT_EQ("3", oss.str());
+ oss << sep << 1;
+ EXPECT_EQ("3,1", oss.str());
+ oss << sep << 4 << sep << 1 << sep << "59";
+ EXPECT_EQ("3,1,4,1,59", oss.str());
+}
+
+TEST(IOUtilsTest, PrefixSepAlt) {
+ std::ostringstream oss;
+ PrefixSep sep(" ");
+ oss << sep << 3;
+ EXPECT_EQ("3", oss.str());
+ oss << sep << 1;
+ EXPECT_EQ("3 1", oss.str());
+ oss << sep << 4 << sep << 1 << sep << "59";
+ EXPECT_EQ("3 1 4 1 59", oss.str());
+}
+
+TEST(IOUtilsTest, EatChar) {
+ std::istringstream main_iss;
+ // Helper for single-line tests.
+ auto iss = [&main_iss](const std::string s) -> std::istringstream& {
+ main_iss.clear();
+ main_iss.str(s);
+ return main_iss;
+ };
+
+ EXPECT_TRUE(iss("a,1") >> EatChar('a') >> EatChar(',') >> EatChar('1'));
+ EXPECT_FALSE(iss("a,a") >> EatChar('a') >> EatChar(',') >> EatChar('1'));
+ EXPECT_FALSE(iss("a") >> EatChar('a') >> EatChar(',') >> EatChar('1'));
+ EXPECT_FALSE(iss("x") >> EatChar('X'));
+ EXPECT_TRUE(iss("_\n") >> EatChar('_') >> EatChar('\n'));
+}
+
+TEST(IOUtilsTest, StrictUInt) {
+ std::istringstream main_iss;
+ // Helper for single-line tests.
+ auto iss = [&main_iss](const std::string& s) -> std::istringstream& {
+ main_iss.clear();
+ main_iss.str(s);
+ return main_iss;
+ };
+
+ uint32_t u32 = 0;
+ EXPECT_TRUE(iss("1234") >> StrictUInt<uint32_t>(u32));
+ EXPECT_EQ(uint32_t(1234), u32);
+ EXPECT_TRUE(iss("001234") >> StrictUInt<uint32_t>(u32));
+ EXPECT_EQ(uint32_t(1234), u32);
+ EXPECT_FALSE(iss("blahblah") >> StrictUInt<uint32_t>(u32));
+ EXPECT_EQ(uint32_t(1234), u32); // No overwrite on failure.
+ EXPECT_TRUE(iss("137suffix") >> StrictUInt<uint32_t>(u32));
+ EXPECT_EQ(uint32_t(137), u32);
+ EXPECT_FALSE(iss(" 1234") >> StrictUInt<uint32_t>(u32));
+ EXPECT_FALSE(iss("-1234") >> StrictUInt<uint32_t>(u32));
+
+ uint16_t u16 = 0;
+ EXPECT_TRUE(iss("65535") >> StrictUInt<uint16_t>(u16));
+ EXPECT_EQ(uint16_t(65535), u16);
+ EXPECT_FALSE(iss("65536") >> StrictUInt<uint16_t>(u16)); // Overflow.
+
+ uint64_t u64 = 0;
+ EXPECT_TRUE(iss("1000000000001") >> StrictUInt<uint64_t>(u64));
+ EXPECT_EQ(uint64_t(1000000000001LL), u64);
+
+ // uint8_t is stubbed out, so no tests for it.
+}
+
+TEST(IOUtilsTest, ParseSimpleEquations) {
+ std::istringstream iss("123+456=579,4-3=1");
+ uint32_t a = 0;
+ uint32_t b = 0;
+ uint32_t c = 0;
+ EXPECT_TRUE(iss >> StrictUInt<uint32_t>(a) >> EatChar('+') >>
+ StrictUInt<uint32_t>(b) >> EatChar('=') >>
+ StrictUInt<uint32_t>(c));
+ EXPECT_EQ(uint32_t(123), a);
+ EXPECT_EQ(uint32_t(456), b);
+ EXPECT_EQ(uint32_t(579), c);
+ EXPECT_TRUE(iss >> EatChar(','));
+ EXPECT_TRUE(iss >> StrictUInt<uint32_t>(a) >> EatChar('-') >>
+ StrictUInt<uint32_t>(b) >> EatChar('=') >>
+ StrictUInt<uint32_t>(c));
+ EXPECT_EQ(uint32_t(4), a);
+ EXPECT_EQ(uint32_t(3), b);
+ EXPECT_EQ(uint32_t(1), c);
+}
+
+} // namespace zucchini
diff --git a/label_manager.cc b/label_manager.cc
new file mode 100644
index 0000000..4b74d8b
--- /dev/null
+++ b/label_manager.cc
@@ -0,0 +1,93 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/label_manager.h"
+
+#include <algorithm>
+#include <utility>
+
+#include "base/logging.h"
+#include "components/zucchini/algorithm.h"
+
+namespace zucchini {
+
+/******** BaseLabelManager ********/
+
+BaseLabelManager::BaseLabelManager() = default;
+BaseLabelManager::BaseLabelManager(const BaseLabelManager&) = default;
+BaseLabelManager::~BaseLabelManager() = default;
+
+/******** OrderedLabelManager ********/
+
+OrderedLabelManager::OrderedLabelManager() = default;
+OrderedLabelManager::OrderedLabelManager(const OrderedLabelManager&) = default;
+OrderedLabelManager::~OrderedLabelManager() = default;
+
+offset_t OrderedLabelManager::IndexOfOffset(offset_t offset) const {
+ auto it = std::lower_bound(labels_.begin(), labels_.end(), offset);
+ if (it != labels_.end() && *it == offset)
+ return static_cast<offset_t>(it - labels_.begin());
+ return kUnusedIndex;
+}
+
+void OrderedLabelManager::InsertOffsets(const std::vector<offset_t>& offsets) {
+ labels_.insert(labels_.end(), offsets.begin(), offsets.end());
+ SortAndUniquify(&labels_);
+}
+
+void OrderedLabelManager::InsertTargets(ReferenceReader&& reader) {
+ for (auto ref = reader.GetNext(); ref.has_value(); ref = reader.GetNext())
+ labels_.push_back(ref->target);
+ SortAndUniquify(&labels_);
+}
+
+/******** UnorderedLabelManager ********/
+
+UnorderedLabelManager::UnorderedLabelManager() = default;
+UnorderedLabelManager::UnorderedLabelManager(const UnorderedLabelManager&) =
+ default;
+UnorderedLabelManager::~UnorderedLabelManager() = default;
+
+offset_t UnorderedLabelManager::IndexOfOffset(offset_t offset) const {
+ auto it = labels_map_.find(offset);
+ return it != labels_map_.end() ? it->second : kUnusedIndex;
+}
+
+void UnorderedLabelManager::Init(std::vector<offset_t>&& labels) {
+ labels_ = std::move(labels);
+ labels_map_.clear();
+ gap_idx_ = 0;
+
+ size_t used_index_count = 0;
+ for (offset_t label : labels) {
+ if (label != kUnusedIndex)
+ ++used_index_count;
+ }
+ labels_map_.reserve(used_index_count);
+
+ offset_t size = static_cast<offset_t>(labels_.size());
+ for (offset_t idx = 0; idx < size; ++idx) {
+ if (labels_[idx] != kUnusedIndex) {
+ DCHECK(labels_map_.find(labels_[idx]) == labels_map_.end());
+ labels_map_[labels_[idx]] = idx;
+ }
+ }
+}
+
+void UnorderedLabelManager::InsertNewOffset(offset_t offset) {
+ DCHECK(labels_map_.find(offset) == labels_map_.end());
+ // Look for unused entry in |labels_|.
+ auto pos = std::find(labels_.begin() + gap_idx_, labels_.end(), kUnusedIndex);
+ // Either replace the unused entry, or insert at end.
+ if (pos != labels_.end()) {
+ gap_idx_ = pos - labels_.begin();
+ *pos = offset;
+ } else {
+ gap_idx_ = labels_.size();
+ labels_.push_back(offset);
+ }
+ labels_map_[offset] = static_cast<offset_t>(gap_idx_);
+}
+
+} // namespace zucchini
diff --git a/label_manager.h b/label_manager.h
new file mode 100644
index 0000000..7c6606d
--- /dev/null
+++ b/label_manager.h
@@ -0,0 +1,113 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_LABEL_MANAGER_H_
+#define COMPONENTS_ZUCCHINI_LABEL_MANAGER_H_
+
+#include <stddef.h>
+
+#include <unordered_map>
+#include <vector>
+
+#include "base/logging.h"
+#include "components/zucchini/image_utils.h"
+
+namespace zucchini {
+
+// A LabelManager stores a list of Labels. By definition, all offsets and
+// indices must be distinct. It also provides functions to:
+// - Get the offset of a stored index.
+// - Get the index of a stored offset.
+// - Create new Labels.
+
+// Base class for OrderedLabelManager and UnorderedLabelManager.
+class BaseLabelManager {
+ public:
+ BaseLabelManager();
+ BaseLabelManager(const BaseLabelManager&);
+ virtual ~BaseLabelManager();
+
+ // Returns the offset of a given |index| if it is associated with a
+ // stored Label, or |kUnusedIndex| otherwise.
+ offset_t OffsetOfIndex(offset_t index) const {
+ return index < labels_.size() ? labels_[index] : kUnusedIndex;
+ }
+
+ // If |offset| has an associated stored Label, returns its index. Otherwise
+ // returns |kUnusedIndex|.
+ virtual offset_t IndexOfOffset(offset_t offset) const = 0;
+
+ size_t size() const { return labels_.size(); }
+
+ protected:
+ // Main storage of distinct offsets. This allows O(1) look up of an offset
+ // from its index. UnorderedLabelManager may contain "gaps" with
+ // |kUnusedIndex|.
+ std::vector<offset_t> labels_;
+};
+
+// OrderedLabelManager is a LabelManager that prioritizes memory efficiency,
+// storing Labels as a sorted list of offsets in |labels_|. Label insertions
+// are performed in batch to reduce costs. Index-of-offset lookup is O(lg n)
+// (binary search).
+class OrderedLabelManager : public BaseLabelManager {
+ public:
+ OrderedLabelManager();
+ OrderedLabelManager(const OrderedLabelManager&);
+ ~OrderedLabelManager() override;
+
+ // BaseLabelManager:
+ offset_t IndexOfOffset(offset_t offset) const override;
+
+ // Creates and stores a new Label for each unique offset in |offsets|. This
+ // invalidates all previous Label lookups.
+ void InsertOffsets(const std::vector<offset_t>& offsets);
+
+ // For each unique target from |reader|, creates and stores a new Label. This
+ // invalidates all previous Label lookups.
+ void InsertTargets(ReferenceReader&& reader);
+
+ const std::vector<offset_t>& Labels() const { return labels_; }
+};
+
+// UnorderedLabelManager is a LabelManager that does not requires Labels to be
+// sorted. Therefore, it can be initialized from Labels given in any order. It
+// also prioritizes speed for lookup and insertion, but uses more memory than
+// OrderedLabelManager. In addition to using |labels_| to store *unsorted*
+// distinct offsets, an unordered_map |labels_map_| is used for index-of-offset
+// lookup.
+class UnorderedLabelManager : public BaseLabelManager {
+ public:
+ UnorderedLabelManager();
+ UnorderedLabelManager(const UnorderedLabelManager&);
+ ~UnorderedLabelManager() override;
+
+ // BaseLabelManager:
+ offset_t IndexOfOffset(offset_t offset) const override;
+
+ // Clears and reinitializes all stored data. Requires that |labels| consists
+ // of unique offsets, but it may have "gaps" in the form of |kUnusedIndex|.
+ void Init(std::vector<offset_t>&& labels);
+
+ // Creates a new Label for |offset|. Behavior is undefined if |offset| is
+ // already associated with a stored Label. If |kUnusedIndex| gaps exist, tries
+ // to reused indices to create new Labels, otherwise it allocates new indices.
+ // Previous lookup results involving stored offsets / indexes remain valid.
+ void InsertNewOffset(offset_t offset);
+
+ bool ContainsOffset(offset_t offset) const {
+ return labels_map_.find(offset) != labels_map_.end();
+ }
+
+ private:
+ // Inverse map of |labels_| (excludes |kUnusedIndex|).
+ std::unordered_map<offset_t, offset_t> labels_map_;
+
+ // Index into |label_| to scan for |kUnusedIndex| entry in |labels_|.
+ size_t gap_idx_ = 0;
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_LABEL_MANAGER_H_
diff --git a/label_manager_unittest.cc b/label_manager_unittest.cc
new file mode 100644
index 0000000..11dcdf9
--- /dev/null
+++ b/label_manager_unittest.cc
@@ -0,0 +1,137 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/label_manager.h"
+
+#include <utility>
+#include <vector>
+
+#include "components/zucchini/test_reference_reader.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+namespace {
+
+constexpr auto BAD = kUnusedIndex;
+using OffsetVector = std::vector<offset_t>;
+
+} // namespace
+
+TEST(LabelManagerTest, Ordered) {
+ OrderedLabelManager label_manager;
+ EXPECT_EQ(OffsetVector(), label_manager.Labels());
+ EXPECT_EQ(BAD, label_manager.OffsetOfIndex(0));
+ EXPECT_EQ(BAD, label_manager.IndexOfOffset(0));
+
+ // Initialize with some data, test direct lookups.
+ label_manager.InsertOffsets({0x33, 0x11, 0x44, 0x11});
+ EXPECT_EQ(OffsetVector({0x11, 0x33, 0x44}), label_manager.Labels());
+
+ EXPECT_EQ(0x11U, label_manager.OffsetOfIndex(0));
+ EXPECT_EQ(0x33U, label_manager.OffsetOfIndex(1));
+ EXPECT_EQ(0x44U, label_manager.OffsetOfIndex(2));
+ EXPECT_EQ(BAD, label_manager.OffsetOfIndex(3));
+ EXPECT_EQ(BAD, label_manager.OffsetOfIndex(4));
+
+ EXPECT_EQ(0U, label_manager.IndexOfOffset(0x11));
+ EXPECT_EQ(1U, label_manager.IndexOfOffset(0x33));
+ EXPECT_EQ(2U, label_manager.IndexOfOffset(0x44));
+ EXPECT_EQ(BAD, label_manager.IndexOfOffset(0x00));
+ EXPECT_EQ(BAD, label_manager.IndexOfOffset(0x77));
+
+ // Insert more data, note that lookup results changed.
+ label_manager.InsertOffsets({0x66, 0x11, 0x11, 0x44, 0x00});
+ EXPECT_EQ(OffsetVector({0x00, 0x11, 0x33, 0x44, 0x66}),
+ label_manager.Labels());
+
+ EXPECT_EQ(0x00U, label_manager.OffsetOfIndex(0));
+ EXPECT_EQ(0x11U, label_manager.OffsetOfIndex(1));
+ EXPECT_EQ(0x33U, label_manager.OffsetOfIndex(2));
+ EXPECT_EQ(0x44U, label_manager.OffsetOfIndex(3));
+ EXPECT_EQ(0x66U, label_manager.OffsetOfIndex(4));
+
+ EXPECT_EQ(1U, label_manager.IndexOfOffset(0x11));
+ EXPECT_EQ(2U, label_manager.IndexOfOffset(0x33));
+ EXPECT_EQ(3U, label_manager.IndexOfOffset(0x44));
+ EXPECT_EQ(0U, label_manager.IndexOfOffset(0x00));
+ EXPECT_EQ(BAD, label_manager.IndexOfOffset(0x77));
+}
+
+TEST(LabelManagerTest, OrderedInsertTargets) {
+ OrderedLabelManager label_manager;
+
+ // Initialize with some data. |location| does not matter.
+ TestReferenceReader reader1({{0, 0x33}, {1, 0x11}, {2, 0x44}, {3, 0x11}});
+ label_manager.InsertTargets(std::move(reader1));
+ EXPECT_EQ(OffsetVector({0x11, 0x33, 0x44}), label_manager.Labels());
+
+ // Insert more data.
+ TestReferenceReader reader2(
+ {{0, 0x66}, {1, 0x11}, {2, 0x11}, {3, 0x44}, {4, 0x00}});
+ label_manager.InsertTargets(std::move(reader2));
+ EXPECT_EQ(OffsetVector({0x00, 0x11, 0x33, 0x44, 0x66}),
+ label_manager.Labels());
+}
+
+TEST(LabelManagerTest, Unordered) {
+ UnorderedLabelManager label_manager;
+ EXPECT_EQ(BAD, label_manager.OffsetOfIndex(0));
+ EXPECT_EQ(BAD, label_manager.IndexOfOffset(0));
+
+ // Initialize with some data, test direct lookups.
+ label_manager.Init(OffsetVector({0x33, BAD, BAD, 0x11, 0x44, BAD}));
+
+ EXPECT_EQ(0x33U, label_manager.OffsetOfIndex(0));
+ EXPECT_EQ(BAD, label_manager.OffsetOfIndex(1));
+ EXPECT_EQ(BAD, label_manager.OffsetOfIndex(2));
+ EXPECT_EQ(0x11U, label_manager.OffsetOfIndex(3));
+ EXPECT_EQ(0x44U, label_manager.OffsetOfIndex(4));
+ EXPECT_EQ(BAD, label_manager.OffsetOfIndex(5));
+ EXPECT_EQ(BAD, label_manager.OffsetOfIndex(6));
+
+ EXPECT_EQ(3U, label_manager.IndexOfOffset(0x11));
+ EXPECT_EQ(0U, label_manager.IndexOfOffset(0x33));
+ EXPECT_EQ(4U, label_manager.IndexOfOffset(0x44));
+ EXPECT_EQ(BAD, label_manager.IndexOfOffset(0x00));
+ EXPECT_EQ(BAD, label_manager.IndexOfOffset(0x66));
+
+ // Insert one offset, assumed to be new.
+ label_manager.InsertNewOffset(0x00);
+ EXPECT_EQ(0x33U, label_manager.OffsetOfIndex(0));
+ EXPECT_EQ(0x00U, label_manager.OffsetOfIndex(1));
+ EXPECT_EQ(BAD, label_manager.OffsetOfIndex(2));
+ EXPECT_EQ(0x11U, label_manager.OffsetOfIndex(3));
+ EXPECT_EQ(0x44U, label_manager.OffsetOfIndex(4));
+
+ EXPECT_EQ(1U, label_manager.IndexOfOffset(0x00));
+ EXPECT_EQ(3U, label_manager.IndexOfOffset(0x11));
+ EXPECT_EQ(0U, label_manager.IndexOfOffset(0x33));
+ EXPECT_EQ(4U, label_manager.IndexOfOffset(0x44));
+ EXPECT_EQ(BAD, label_manager.IndexOfOffset(0x66));
+
+ // Insert few more offset, assumed to be new.
+ label_manager.InsertNewOffset(0x22);
+ label_manager.InsertNewOffset(0x77);
+ label_manager.InsertNewOffset(0x55);
+
+ EXPECT_EQ(0x33U, label_manager.OffsetOfIndex(0));
+ EXPECT_EQ(0x00U, label_manager.OffsetOfIndex(1));
+ EXPECT_EQ(0x22U, label_manager.OffsetOfIndex(2));
+ EXPECT_EQ(0x11U, label_manager.OffsetOfIndex(3));
+ EXPECT_EQ(0x44U, label_manager.OffsetOfIndex(4));
+ EXPECT_EQ(0x77U, label_manager.OffsetOfIndex(5));
+ EXPECT_EQ(0x55U, label_manager.OffsetOfIndex(6));
+
+ EXPECT_EQ(1U, label_manager.IndexOfOffset(0x00));
+ EXPECT_EQ(3U, label_manager.IndexOfOffset(0x11));
+ EXPECT_EQ(2U, label_manager.IndexOfOffset(0x22));
+ EXPECT_EQ(0U, label_manager.IndexOfOffset(0x33));
+ EXPECT_EQ(4U, label_manager.IndexOfOffset(0x44));
+ EXPECT_EQ(6U, label_manager.IndexOfOffset(0x55));
+ EXPECT_EQ(BAD, label_manager.IndexOfOffset(0x66));
+ EXPECT_EQ(5U, label_manager.IndexOfOffset(0x77));
+}
+
+} // namespace zucchini
diff --git a/main_utils.cc b/main_utils.cc
new file mode 100644
index 0000000..b874dd0
--- /dev/null
+++ b/main_utils.cc
@@ -0,0 +1,193 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/main_utils.h"
+
+#include <stddef.h>
+
+#include <memory>
+#include <ostream>
+#include <vector>
+
+#include "base/command_line.h"
+#include "base/logging.h"
+#include "base/time/time.h"
+#include "build/build_config.h"
+#include "components/zucchini/io_utils.h"
+#include "components/zucchini/zucchini_commands.h"
+
+#if defined(OS_WIN)
+#include <windows.h> // This include must come first.
+
+#include <psapi.h>
+#endif
+
+namespace {
+
+#if defined(OS_WIN)
+#endif
+
+/******** Command ********/
+
+// Specifications for a Zucchini command.
+struct Command {
+ constexpr Command(const char* name_in,
+ const char* usage_in,
+ int num_args_in,
+ CommandFunction command_function_in)
+ : name(name_in),
+ usage(usage_in),
+ num_args(num_args_in),
+ command_function(command_function_in) {}
+ Command(const Command&) = default;
+ ~Command() = default;
+
+ // Unique name of command. |-name| is used to select from command-line.
+ const char* const name;
+
+ // Usage help text of command.
+ const char* const usage;
+
+ // Number of arguments (assumed to be filenames) used by the command.
+ const int num_args;
+
+ // Main function to run for the command.
+ const CommandFunction command_function;
+};
+
+/******** List of Zucchini commands ********/
+
+constexpr Command kCommands[] = {
+ {"gen", "-gen <old_file> <new_file> <patch_file> [-raw]", 3, &MainGen},
+ {"apply", "-apply <old_file> <patch_file> <new_file>", 3, &MainApply},
+ {"read", "-read <exe> [-dump]", 1, &MainRead},
+ {"detect", "-detect <archive_file> [-dd=format#]", 1, &MainDetect},
+ {"match", "-match <old_file> <new_file>", 2, &MainMatch},
+ {"crc32", "-crc32 <file>", 1, &MainCrc32},
+};
+
+/******** ScopedResourceUsageTracker ********/
+
+// A class to track and log system resource usage.
+class ScopedResourceUsageTracker {
+ public:
+ // Initializes states for tracking.
+ ScopedResourceUsageTracker() {
+ start_time_ = base::TimeTicks::Now();
+
+#if defined(OS_WIN)
+ PROCESS_MEMORY_COUNTERS pmc;
+ if (::GetProcessMemoryInfo(::GetCurrentProcess(), &pmc, sizeof(pmc))) {
+ start_peak_page_file_usage_ = pmc.PeakPagefileUsage;
+ start_peak_working_set_size_ = pmc.PeakWorkingSetSize;
+ }
+#endif
+ }
+
+ // Computes and prints usage.
+ ~ScopedResourceUsageTracker() {
+ base::TimeTicks end_time = base::TimeTicks::Now();
+
+#if defined(OS_WIN)
+ size_t cur_peak_page_file_usage = 0;
+ size_t cur_peak_working_set_size = 0;
+ PROCESS_MEMORY_COUNTERS pmc;
+ if (::GetProcessMemoryInfo(::GetCurrentProcess(), &pmc, sizeof(pmc))) {
+ cur_peak_page_file_usage = pmc.PeakPagefileUsage;
+ cur_peak_working_set_size = pmc.PeakWorkingSetSize;
+ }
+
+ LOG(INFO) << "Zucchini.PeakPagefileUsage "
+ << cur_peak_page_file_usage / 1024 << " KiB";
+ LOG(INFO) << "Zucchini.PeakPagefileUsageChange "
+ << (cur_peak_page_file_usage - start_peak_page_file_usage_) / 1024
+ << " KiB";
+ LOG(INFO) << "Zucchini.PeakWorkingSetSize "
+ << cur_peak_working_set_size / 1024 << " KiB";
+ LOG(INFO) << "Zucchini.PeakWorkingSetSizeChange "
+ << (cur_peak_working_set_size - start_peak_working_set_size_) /
+ 1024
+ << " KiB";
+#endif // !defined(OS_MACOSX)
+
+ LOG(INFO) << "Zucchini.TotalTime " << (end_time - start_time_).InSecondsF()
+ << " s";
+ }
+
+ private:
+ base::TimeTicks start_time_;
+#if defined(OS_WIN)
+ size_t start_peak_page_file_usage_ = 0;
+ size_t start_peak_working_set_size_ = 0;
+#endif // !defined(OS_MACOSX)
+};
+
+/******** Helper functions ********/
+
+// Translates |command_line| arguments to a vector of base::FilePath (expecting
+// exactly |expected_count|). On success, writes the results to |paths| and
+// returns true. Otherwise returns false.
+bool CheckAndGetFilePathParams(const base::CommandLine& command_line,
+ size_t expected_count,
+ std::vector<base::FilePath>* paths) {
+ const base::CommandLine::StringVector& args = command_line.GetArgs();
+ if (args.size() != expected_count)
+ return false;
+
+ paths->clear();
+ paths->reserve(args.size());
+ for (const auto& arg : args)
+ paths->emplace_back(arg);
+ return true;
+}
+
+// Prints main Zucchini usage text.
+void PrintUsage(std::ostream& err) {
+ err << "Usage:" << std::endl;
+ for (const Command& command : kCommands)
+ err << " zucchini " << command.usage << std::endl;
+}
+
+} // namespace
+
+/******** Exported Functions ********/
+
+zucchini::status::Code RunZucchiniCommand(const base::CommandLine& command_line,
+ std::ostream& out,
+ std::ostream& err) {
+ // Look for a command with name that matches input.
+ const Command* command_use = nullptr;
+ for (const Command& command : kCommands) {
+ if (command_line.HasSwitch(command.name)) {
+ if (command_use) { // Too many commands found.
+ command_use = nullptr; // Set to null to flag error.
+ break;
+ }
+ command_use = &command;
+ }
+ }
+
+ // Expect exactly 1 matching command. If 0 or >= 2, print usage and quit.
+ if (!command_use) {
+ err << "Must have exactly one of:" << std::endl;
+ err << " [";
+ zucchini::PrefixSep sep(", ");
+ for (const Command& command : kCommands)
+ err << sep << "-" << command.name;
+ err << "]" << std::endl;
+ PrintUsage(err);
+ return zucchini::status::kStatusInvalidParam;
+ }
+
+ // Try to parse filename arguments. On failure, print usage and quit.
+ std::vector<base::FilePath> paths;
+ if (!CheckAndGetFilePathParams(command_line, command_use->num_args, &paths)) {
+ err << command_use->usage << std::endl;
+ PrintUsage(err);
+ return zucchini::status::kStatusInvalidParam;
+ }
+
+ ScopedResourceUsageTracker resource_usage_tracker;
+ return command_use->command_function({command_line, paths, out, err});
+}
diff --git a/main_utils.h b/main_utils.h
new file mode 100644
index 0000000..addb830
--- /dev/null
+++ b/main_utils.h
@@ -0,0 +1,35 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_MAIN_UTILS_H_
+#define COMPONENTS_ZUCCHINI_MAIN_UTILS_H_
+
+#include <iosfwd>
+
+#include "base/files/file_path.h"
+#include "components/zucchini/zucchini.h"
+
+// Utilities to run Zucchini command based on command-line input, and to print
+// help messages.
+
+namespace base {
+
+class CommandLine;
+
+} // namespace base
+
+// To add a new Zucchini command:
+// 1. Declare the command's main function in zucchini_command.h. Its signature
+// must match CommandFunction.
+// 2. Define the command's main function in zucchini_command.cc.
+// 3. Add a new entry into |kCommands| in main_utils.cc.
+
+// Searches |command_line| for Zucchini commands. If a unique command is found,
+// runs it (passes |out| and |err|), and logs resource usage. Otherwise prints
+// help message to |err|. Returns Zucchini status code for error handling.
+zucchini::status::Code RunZucchiniCommand(const base::CommandLine& command_line,
+ std::ostream& out,
+ std::ostream& err);
+
+#endif // COMPONENTS_ZUCCHINI_MAIN_UTILS_H_
diff --git a/mapped_file.cc b/mapped_file.cc
new file mode 100644
index 0000000..13c1afd
--- /dev/null
+++ b/mapped_file.cc
@@ -0,0 +1,70 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/mapped_file.h"
+
+#include <utility>
+
+#include "base/files/file_util.h"
+#include "base/logging.h"
+#include "build/build_config.h"
+
+namespace zucchini {
+
+MappedFileReader::MappedFileReader(base::File&& file) {
+ if (!file.IsValid()) {
+ error_ = "Invalid file.";
+ return; // |buffer_| will be uninitialized, and therefore invalid.
+ }
+ if (!buffer_.Initialize(std::move(file))) {
+ error_ = "Can't map file to memory.";
+ }
+}
+
+MappedFileWriter::MappedFileWriter(const base::FilePath& file_path,
+ base::File&& file,
+ size_t length)
+ : file_path_(file_path), delete_behavior_(kManualDeleteOnClose) {
+ if (!file.IsValid()) {
+ error_ = "Invalid file.";
+ return; // |buffer_| will be uninitialized, and therefore invalid.
+ }
+
+#if defined(OS_WIN)
+ file_handle_ = file.Duplicate();
+ // Tell the OS to delete the file when all handles are closed.
+ if (file_handle_.DeleteOnClose(true)) {
+ delete_behavior_ = kAutoDeleteOnClose;
+ } else {
+ error_ = "Failed to mark file for delete-on-close.";
+ }
+#endif // defined(OS_WIN)
+
+ bool is_ok = buffer_.Initialize(std::move(file), {0, length},
+ base::MemoryMappedFile::READ_WRITE_EXTEND);
+ if (!is_ok) {
+ error_ = "Can't map file to memory.";
+ }
+}
+
+MappedFileWriter::~MappedFileWriter() {
+ if (!HasError() && delete_behavior_ == kManualDeleteOnClose &&
+ !file_path_.empty() && !base::DeleteFile(file_path_, false)) {
+ error_ = "Failed to delete file.";
+ }
+}
+
+bool MappedFileWriter::Keep() {
+#if defined(OS_WIN)
+ if (delete_behavior_ == kAutoDeleteOnClose &&
+ !file_handle_.DeleteOnClose(false)) {
+ error_ = "Failed to prevent deletion of file.";
+ return false;
+ }
+#endif // defined(OS_WIN)
+ delete_behavior_ = kKeep;
+ return true;
+}
+
+} // namespace zucchini
diff --git a/mapped_file.h b/mapped_file.h
new file mode 100644
index 0000000..540f947
--- /dev/null
+++ b/mapped_file.h
@@ -0,0 +1,83 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_MAPPED_FILE_H_
+#define COMPONENTS_ZUCCHINI_MAPPED_FILE_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <string>
+
+#include "base/files/file.h"
+#include "base/files/file_path.h"
+#include "base/files/memory_mapped_file.h"
+#include "base/macros.h"
+#include "components/zucchini/buffer_view.h"
+
+namespace zucchini {
+
+// A file reader wrapper.
+class MappedFileReader {
+ public:
+ // Maps |file| to memory for reading. Also validates |file|. Errors are
+ // available via HasError() and error().
+ explicit MappedFileReader(base::File&& file);
+
+ const uint8_t* data() const { return buffer_.data(); }
+ size_t length() const { return buffer_.length(); }
+ zucchini::ConstBufferView region() const { return {data(), length()}; }
+
+ bool HasError() { return !error_.empty() || !buffer_.IsValid(); }
+ const std::string& error() { return error_; }
+
+ private:
+ std::string error_;
+ base::MemoryMappedFile buffer_;
+
+ DISALLOW_COPY_AND_ASSIGN(MappedFileReader);
+};
+
+// A file writer wrapper. The target file is deleted on destruction unless
+// Keep() is called.
+class MappedFileWriter {
+ public:
+ // Maps |file| to memory for writing. |file_path| is needed for auto delete on
+ // UNIX systems, but can be empty if auto delete is not needed. Errors are
+ // available via HasError() and error().
+ MappedFileWriter(const base::FilePath& file_path,
+ base::File&& file,
+ size_t length);
+ ~MappedFileWriter();
+
+ uint8_t* data() { return buffer_.data(); }
+ size_t length() const { return buffer_.length(); }
+ zucchini::MutableBufferView region() { return {data(), length()}; }
+
+ bool HasError() { return !error_.empty() || !buffer_.IsValid(); }
+ const std::string& error() { return error_; }
+
+ // Indicates that the file should not be deleted on destruction. Returns true
+ // iff the operation succeeds.
+ bool Keep();
+
+ private:
+ enum OnCloseDeleteBehavior {
+ kKeep,
+ kAutoDeleteOnClose,
+ kManualDeleteOnClose
+ };
+
+ std::string error_;
+ base::FilePath file_path_;
+ base::File file_handle_;
+ base::MemoryMappedFile buffer_;
+ OnCloseDeleteBehavior delete_behavior_;
+
+ DISALLOW_COPY_AND_ASSIGN(MappedFileWriter);
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_MAPPED_FILE_H_
diff --git a/mapped_file_unittest.cc b/mapped_file_unittest.cc
new file mode 100644
index 0000000..e3ee6dc
--- /dev/null
+++ b/mapped_file_unittest.cc
@@ -0,0 +1,61 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/mapped_file.h"
+
+#include <utility>
+
+#include "base/files/file.h"
+#include "base/files/file_path.h"
+#include "base/files/file_util.h"
+#include "base/files/scoped_temp_dir.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+class MappedFileWriterTest : public testing::Test {
+ protected:
+ MappedFileWriterTest() = default;
+ void SetUp() override {
+ ASSERT_TRUE(temp_dir_.CreateUniqueTempDir());
+ file_path_ = temp_dir_.GetPath().AppendASCII("test-file");
+ }
+
+ base::FilePath file_path_;
+
+ private:
+ base::ScopedTempDir temp_dir_;
+};
+
+TEST_F(MappedFileWriterTest, Keep) {
+ EXPECT_FALSE(base::PathExists(file_path_));
+ {
+ using base::File;
+ File file(file_path_, File::FLAG_CREATE_ALWAYS | File::FLAG_READ |
+ File::FLAG_WRITE | File::FLAG_SHARE_DELETE |
+ File::FLAG_CAN_DELETE_ON_CLOSE);
+ MappedFileWriter file_writer(file_path_, std::move(file), 10);
+ EXPECT_FALSE(file_writer.HasError());
+ EXPECT_TRUE(file_writer.Keep());
+ EXPECT_FALSE(file_writer.HasError());
+ EXPECT_TRUE(file_writer.error().empty());
+ }
+ EXPECT_TRUE(base::PathExists(file_path_));
+}
+
+TEST_F(MappedFileWriterTest, DeleteOnClose) {
+ EXPECT_FALSE(base::PathExists(file_path_));
+ {
+ using base::File;
+ File file(file_path_, File::FLAG_CREATE_ALWAYS | File::FLAG_READ |
+ File::FLAG_WRITE | File::FLAG_SHARE_DELETE |
+ File::FLAG_CAN_DELETE_ON_CLOSE);
+ MappedFileWriter file_writer(file_path_, std::move(file), 10);
+ EXPECT_FALSE(file_writer.HasError());
+ EXPECT_TRUE(file_writer.error().empty());
+ }
+ EXPECT_FALSE(base::PathExists(file_path_));
+}
+
+} // namespace zucchini
diff --git a/patch_fuzzer.cc b/patch_fuzzer.cc
new file mode 100644
index 0000000..2d1c9b7
--- /dev/null
+++ b/patch_fuzzer.cc
@@ -0,0 +1,19 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "base/optional.h"
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/patch_reader.h"
+
+// Entry point for LibFuzzer.
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+ logging::SetMinLogLevel(3); // Disable console spamming.
+ zucchini::ConstBufferView patch(data, size);
+ base::Optional<zucchini::EnsemblePatchReader> patch_reader =
+ zucchini::EnsemblePatchReader::Create(patch);
+ return 0;
+}
diff --git a/patch_read_write_unittest.cc b/patch_read_write_unittest.cc
new file mode 100644
index 0000000..7f84b03
--- /dev/null
+++ b/patch_read_write_unittest.cc
@@ -0,0 +1,604 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/patch_reader.h"
+#include "components/zucchini/patch_writer.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <utility>
+#include <vector>
+
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+namespace {
+
+// Used for initialization of raw test data.
+using ByteVector = std::vector<uint8_t>;
+
+// Helper function that creates an object of type |T| and intializes it from
+// data in |buffer|. Ensures initialization is successful. |buffer| is passed as
+// pointer to avoid passing a temporay, which can causes dangling references.
+template <class T>
+T TestInitialize(const ByteVector* buffer) {
+ T value;
+ BufferSource buffer_source(buffer->data(), buffer->size());
+ EXPECT_TRUE(value.Initialize(&buffer_source));
+ EXPECT_TRUE(buffer_source.empty()); // Make sure all data has been consumed
+ return value;
+}
+
+// Helper function that creates an object of type |T| and tries to intialize it
+// from invalid data in |buffer|, expecting the operation to fail. |buffer| is
+// passed as pointer to avoid passing a temporary, which can causes dangling
+// references.
+template <class T>
+void TestInvalidInitialize(const ByteVector* buffer) {
+ T value;
+ BufferSource buffer_source(buffer->data(), buffer->size());
+ EXPECT_FALSE(value.Initialize(&buffer_source));
+}
+
+// Helper function that serializes |value| into a buffer. Ensures that
+// serialization is successful and that the result matches |expected|.
+template <class T>
+void TestSerialize(const ByteVector& expected, const T& value) {
+ size_t size = value.SerializedSize();
+ EXPECT_EQ(expected.size(), size);
+ ByteVector buffer(size);
+ BufferSink buffer_sink(buffer.data(), buffer.size());
+ EXPECT_TRUE(value.SerializeInto(&buffer_sink));
+ EXPECT_EQ(expected, buffer);
+}
+
+} // namespace
+
+bool operator==(const ByteVector& a, ConstBufferView b) {
+ return a == ByteVector(b.begin(), b.end());
+}
+
+TEST(PatchTest, ParseSerializeElementMatch) {
+ ByteVector data = {
+ 0x01, 0, 0, 0, // old_offset
+ 0x03, 0, 0, 0, // new_offset
+ 0x02, 0, 0, 0, // old_length
+ 0x04, 0, 0, 0, // new_length
+ 7, 0, 0, 0, // kExeTypeDex
+ };
+ BufferSource buffer_source(data.data(), data.size());
+ ElementMatch element_match = {};
+ EXPECT_TRUE(patch::ParseElementMatch(&buffer_source, &element_match));
+ EXPECT_EQ(kExeTypeDex, element_match.exe_type());
+ EXPECT_EQ(kExeTypeDex, element_match.old_element.exe_type);
+ EXPECT_EQ(kExeTypeDex, element_match.new_element.exe_type);
+ EXPECT_EQ(0x1U, element_match.old_element.offset);
+ EXPECT_EQ(0x2U, element_match.old_element.size);
+ EXPECT_EQ(0x3U, element_match.new_element.offset);
+ EXPECT_EQ(0x4U, element_match.new_element.size);
+
+ size_t size = patch::SerializedElementMatchSize(element_match);
+ EXPECT_EQ(data.size(), size);
+ ByteVector buffer(size);
+ BufferSink buffer_sink(buffer.data(), buffer.size());
+ EXPECT_TRUE(patch::SerializeElementMatch(element_match, &buffer_sink));
+ EXPECT_EQ(data, buffer);
+}
+
+TEST(PatchTest, ParseElementMatchTooSmall) {
+ ByteVector data = {4};
+ BufferSource buffer_source(data.data(), data.size());
+ ElementMatch element_match = {};
+ EXPECT_FALSE(patch::ParseElementMatch(&buffer_source, &element_match));
+}
+
+TEST(PatchTest, ParseSerializeElementMatchExeMismatch) {
+ ByteVector buffer(28);
+ BufferSink buffer_sink(buffer.data(), buffer.size());
+ EXPECT_FALSE(patch::SerializeElementMatch(
+ ElementMatch{{{1, 2}, kExeTypeNoOp}, {{3, 4}, kExeTypeWin32X86}},
+ &buffer_sink));
+}
+
+TEST(PatchTest, SerializeElementMatchTooSmall) {
+ ByteVector buffer(4);
+ BufferSink buffer_sink(buffer.data(), buffer.size());
+ EXPECT_FALSE(patch::SerializeElementMatch(
+ ElementMatch{{{1, 2}, kExeTypeDex}, {{3, 4}, kExeTypeDex}},
+ &buffer_sink));
+}
+
+TEST(PatchTest, ParseSerializeBuffer) {
+ auto TestSerialize = [](const ByteVector& expected, const ByteVector& value) {
+ size_t size = patch::SerializedBufferSize(value);
+ EXPECT_EQ(expected.size(), size);
+ ByteVector buffer(size);
+ BufferSink buffer_sink(buffer.data(), buffer.size());
+ EXPECT_TRUE(patch::SerializeBuffer(value, &buffer_sink));
+ EXPECT_EQ(expected, buffer);
+ };
+
+ // |data| is passed as pointer to avoid passing a temporay, which can causes
+ // dangling references.
+ auto TestParse = [](const ByteVector* data) {
+ BufferSource value;
+ BufferSource buffer_source(data->data(), data->size());
+ EXPECT_TRUE(patch::ParseBuffer(&buffer_source, &value));
+ // Make sure all data has been consumed.
+ EXPECT_TRUE(buffer_source.empty());
+ return value;
+ };
+
+ ByteVector data = {
+ 0, 0, 0, 0, // size
+ };
+ BufferSource buffer = TestParse(&data);
+ EXPECT_TRUE(buffer.empty());
+ TestSerialize(data, ByteVector({}));
+
+ data = {
+ 3, 0, 0, 0, // size
+ 1, 2, 3 // content
+ };
+ buffer = TestParse(&data);
+ EXPECT_EQ(3U, buffer.size());
+ EXPECT_EQ(ByteVector({1, 2, 3}), ByteVector(buffer.begin(), buffer.end()));
+ TestSerialize(data, ByteVector({1, 2, 3}));
+
+ // Ill-formed input.
+ data = {
+ 3, 0, 0, 0, // size
+ 1, 2 // insufficient content
+ };
+ BufferSource value;
+ BufferSource buffer_source(data.data(), data.size());
+ EXPECT_FALSE(patch::ParseBuffer(&buffer_source, &value));
+ EXPECT_TRUE(value.empty());
+}
+
+TEST(PatchTest, SerializeBufferTooSmall) {
+ ByteVector buffer(3);
+ BufferSink buffer_sink(buffer.data(), buffer.size());
+ EXPECT_FALSE(patch::SerializeBuffer(ByteVector(), &buffer_sink));
+}
+
+TEST(EquivalenceSinkSourceTest, Empty) {
+ ByteVector data = {
+ 0, 0, 0, 0, // src_skip size
+ 0, 0, 0, 0, // dst_skip size
+ 0, 0, 0, 0, // copy_count size
+ };
+ EquivalenceSource equivalence_source =
+ TestInitialize<EquivalenceSource>(&data);
+
+ EXPECT_FALSE(equivalence_source.GetNext());
+ EXPECT_TRUE(equivalence_source.Done());
+
+ TestSerialize(data, EquivalenceSink());
+}
+
+TEST(EquivalenceSourceSinkTest, Normal) {
+ ByteVector data = {
+ 2, 0, 0, 0, // src_skip size
+ 6, 7, // src_skip content
+ 2, 0, 0, 0, // dst_skip size
+ 7, 1, // dst_skip content
+ 2, 0, 0, 0, // copy_count size
+ 2, 1 // copy_count content
+ };
+ EquivalenceSource equivalence_source =
+ TestInitialize<EquivalenceSource>(&data);
+ auto equivalence = equivalence_source.GetNext();
+ EXPECT_FALSE(equivalence_source.Done());
+ EXPECT_TRUE(equivalence.has_value());
+ EXPECT_EQ(offset_t(3), equivalence->src_offset);
+ EXPECT_EQ(offset_t(7), equivalence->dst_offset);
+ EXPECT_EQ(offset_t(2), equivalence->length);
+
+ equivalence = equivalence_source.GetNext();
+ EXPECT_TRUE(equivalence_source.Done());
+ EXPECT_TRUE(equivalence.has_value());
+ EXPECT_EQ(offset_t(1), equivalence->src_offset);
+ EXPECT_EQ(offset_t(10), equivalence->dst_offset);
+ EXPECT_EQ(offset_t(1), equivalence->length);
+
+ equivalence = equivalence_source.GetNext();
+ EXPECT_FALSE(equivalence.has_value());
+
+ EquivalenceSink equivalence_sink;
+ equivalence_sink.PutNext(Equivalence{3, 7, 2});
+ equivalence_sink.PutNext(Equivalence{1, 10, 1});
+ TestSerialize(data, equivalence_sink);
+}
+
+TEST(ExtraDataSourceSinkTest, Empty) {
+ ByteVector data = {
+ 0, 0, 0, 0, // extra_data size
+ };
+ ExtraDataSource extra_data_source = TestInitialize<ExtraDataSource>(&data);
+
+ EXPECT_FALSE(extra_data_source.GetNext(2));
+ EXPECT_TRUE(extra_data_source.Done());
+
+ TestSerialize(data, ExtraDataSink());
+}
+
+TEST(ExtraDataSourceSinkTest, Normal) {
+ ByteVector data = {
+ 5, 0, 0, 0, // extra_data size
+ 1, 2, 3, 4, 5, // extra_data content
+ };
+ ExtraDataSource extra_data_source = TestInitialize<ExtraDataSource>(&data);
+ EXPECT_FALSE(extra_data_source.Done());
+
+ auto extra_data = extra_data_source.GetNext(3);
+ EXPECT_FALSE(extra_data_source.Done());
+ EXPECT_TRUE(extra_data.has_value());
+ EXPECT_EQ(size_t(3), extra_data->size());
+ EXPECT_EQ(ByteVector({1, 2, 3}),
+ ByteVector(extra_data->begin(), extra_data->end()));
+
+ extra_data = extra_data_source.GetNext(2);
+ EXPECT_TRUE(extra_data_source.Done());
+ EXPECT_TRUE(extra_data.has_value());
+ EXPECT_EQ(ByteVector({4, 5}),
+ ByteVector(extra_data->begin(), extra_data->end()));
+
+ extra_data = extra_data_source.GetNext(2);
+ EXPECT_FALSE(extra_data.has_value());
+
+ ExtraDataSink extra_data_sink;
+
+ ByteVector content = {1, 2, 3};
+ extra_data_sink.PutNext({content.data(), content.size()});
+ content = {4, 5};
+ extra_data_sink.PutNext({content.data(), content.size()});
+ TestSerialize(data, extra_data_sink);
+}
+
+TEST(RawDeltaSourceSinkTest, Empty) {
+ ByteVector data = {
+ 0, 0, 0, 0, // raw_delta_skip size
+ 0, 0, 0, 0, // raw_delta_diff size
+ };
+ RawDeltaSource raw_delta_source = TestInitialize<RawDeltaSource>(&data);
+
+ EXPECT_FALSE(raw_delta_source.GetNext());
+ EXPECT_TRUE(raw_delta_source.Done());
+
+ TestSerialize(data, RawDeltaSink());
+}
+
+TEST(RawDeltaSinkSourceSinkTest, Normal) {
+ ByteVector data = {
+ 3, 0, 0, 0, // raw_delta_skip size
+ 1, 3, 0, // raw_delta_skip content
+ 3, 0, 0, 0, // raw_delta_diff size
+ 42, 24, 235, // raw_delta_diff content
+ };
+ RawDeltaSource raw_delta_source = TestInitialize<RawDeltaSource>(&data);
+ EXPECT_FALSE(raw_delta_source.Done());
+
+ auto raw_delta = raw_delta_source.GetNext();
+ EXPECT_FALSE(raw_delta_source.Done());
+ EXPECT_TRUE(raw_delta.has_value());
+ EXPECT_EQ(1U, raw_delta->copy_offset);
+ EXPECT_EQ(42, raw_delta->diff);
+
+ raw_delta = raw_delta_source.GetNext();
+ EXPECT_FALSE(raw_delta_source.Done());
+ EXPECT_TRUE(raw_delta.has_value());
+ EXPECT_EQ(5U, raw_delta->copy_offset);
+ EXPECT_EQ(24, raw_delta->diff);
+
+ raw_delta = raw_delta_source.GetNext();
+ EXPECT_TRUE(raw_delta_source.Done());
+ EXPECT_TRUE(raw_delta.has_value());
+ EXPECT_EQ(6U, raw_delta->copy_offset);
+ EXPECT_EQ(-21, raw_delta->diff);
+
+ EXPECT_FALSE(raw_delta_source.GetNext());
+ EXPECT_TRUE(raw_delta_source.Done());
+
+ RawDeltaSink raw_delta_sink;
+ raw_delta_sink.PutNext({1, 42});
+ raw_delta_sink.PutNext({5, 24});
+ raw_delta_sink.PutNext({6, -21});
+ TestSerialize(data, raw_delta_sink);
+}
+
+TEST(ReferenceDeltaSourceSinkTest, Empty) {
+ ByteVector data = {
+ 0, 0, 0, 0, // reference_delta size
+ };
+ ReferenceDeltaSource reference_delta_source =
+ TestInitialize<ReferenceDeltaSource>(&data);
+
+ EXPECT_FALSE(reference_delta_source.GetNext());
+ EXPECT_TRUE(reference_delta_source.Done());
+
+ TestSerialize(data, ReferenceDeltaSink());
+}
+
+TEST(ReferenceDeltaSourceSinkTest, Normal) {
+ ByteVector data = {
+ 2, 0, 0, 0, // reference_delta size
+ 84, 47, // reference_delta content
+ };
+ ReferenceDeltaSource reference_delta_source =
+ TestInitialize<ReferenceDeltaSource>(&data);
+ EXPECT_FALSE(reference_delta_source.Done());
+
+ auto delta = reference_delta_source.GetNext();
+ EXPECT_FALSE(reference_delta_source.Done());
+ EXPECT_TRUE(delta.has_value());
+ EXPECT_EQ(42, *delta);
+
+ delta = reference_delta_source.GetNext();
+ EXPECT_TRUE(reference_delta_source.Done());
+ EXPECT_TRUE(delta.has_value());
+ EXPECT_EQ(-24, *delta);
+
+ EXPECT_FALSE(reference_delta_source.GetNext());
+ EXPECT_TRUE(reference_delta_source.Done());
+
+ ReferenceDeltaSink reference_delta;
+ reference_delta.PutNext(42);
+ reference_delta.PutNext(-24);
+ TestSerialize(data, reference_delta);
+}
+
+TEST(TargetSourceSinkTest, Empty) {
+ ByteVector data = {
+ 0, 0, 0, 0, // extra_targets size
+ };
+ TargetSource target_source = TestInitialize<TargetSource>(&data);
+
+ EXPECT_FALSE(target_source.GetNext());
+ EXPECT_TRUE(target_source.Done());
+
+ TestSerialize(data, TargetSink());
+}
+
+TEST(TargetSourceSinkTest, Normal) {
+ ByteVector data = {
+ 2, 0, 0, 0, // extra_targets size
+ 3, 1, // extra_targets content
+ };
+ TargetSource target_source = TestInitialize<TargetSource>(&data);
+ EXPECT_FALSE(target_source.Done());
+
+ auto target = target_source.GetNext();
+ EXPECT_FALSE(target_source.Done());
+ EXPECT_TRUE(target.has_value());
+ EXPECT_EQ(3U, *target);
+
+ target = target_source.GetNext();
+ EXPECT_TRUE(target_source.Done());
+ EXPECT_TRUE(target.has_value());
+ EXPECT_EQ(5U, *target);
+
+ EXPECT_FALSE(target_source.GetNext());
+ EXPECT_TRUE(target_source.Done());
+
+ TargetSink target_sink;
+ target_sink.PutNext(3);
+ target_sink.PutNext(5);
+ TestSerialize(data, target_sink);
+}
+
+TEST(PatchElementTest, Normal) {
+ ByteVector data = {
+ 0x01, 0, 0, 0, // old_offset
+ 0x03, 0, 0, 0, // new_offset
+ 0x02, 0, 0, 0, // old_length
+ 0x04, 0, 0, 0, // new_length
+ 1, 0, 0, 0, // EXE_TYPE_WIN32_X86
+
+ 1, 0, 0, 0, // src_skip size
+ 0x10, // src_skip content
+ 1, 0, 0, 0, // dst_skip size
+ 0x11, // dst_skip content
+ 1, 0, 0, 0, // copy_count size
+ 0x12, // copy_count content
+
+ 1, 0, 0, 0, // extra_data size
+ 0x13, // extra_data content
+
+ 1, 0, 0, 0, // raw_delta_skip size
+ 0x14, // raw_delta_skip content
+ 1, 0, 0, 0, // raw_delta_diff size
+ 0x15, // raw_delta_diff content
+
+ 1, 0, 0, 0, // reference_delta size
+ 0x16, // reference_delta content
+
+ 2, 0, 0, 0, // pool count
+ 0, // pool_tag
+ 1, 0, 0, 0, // extra_targets size
+ 0x17, // extra_targets content
+ 2, // pool_tag
+ 1, 0, 0, 0, // extra_targets size
+ 0x18, // extra_targets content
+ };
+
+ PatchElementReader patch_element_reader =
+ TestInitialize<PatchElementReader>(&data);
+
+ ElementMatch element_match = patch_element_reader.element_match();
+ EXPECT_EQ(kExeTypeWin32X86, element_match.exe_type());
+ EXPECT_EQ(kExeTypeWin32X86, element_match.old_element.exe_type);
+ EXPECT_EQ(kExeTypeWin32X86, element_match.new_element.exe_type);
+ EXPECT_EQ(0x1U, element_match.old_element.offset);
+ EXPECT_EQ(0x2U, element_match.old_element.size);
+ EXPECT_EQ(0x3U, element_match.new_element.offset);
+ EXPECT_EQ(0x4U, element_match.new_element.size);
+
+ EquivalenceSource equivalence_source =
+ patch_element_reader.GetEquivalenceSource();
+ EXPECT_EQ(ByteVector({0x10}), equivalence_source.src_skip());
+ EXPECT_EQ(ByteVector({0x11}), equivalence_source.dst_skip());
+ EXPECT_EQ(ByteVector({0x12}), equivalence_source.copy_count());
+
+ ExtraDataSource extra_data_source = patch_element_reader.GetExtraDataSource();
+ EXPECT_EQ(ByteVector({0x13}), extra_data_source.extra_data());
+
+ RawDeltaSource raw_delta_source = patch_element_reader.GetRawDeltaSource();
+ EXPECT_EQ(ByteVector({0x14}), raw_delta_source.raw_delta_skip());
+ EXPECT_EQ(ByteVector({0x15}), raw_delta_source.raw_delta_diff());
+
+ ReferenceDeltaSource reference_delta_source =
+ patch_element_reader.GetReferenceDeltaSource();
+ EXPECT_EQ(ByteVector({0x16}), reference_delta_source.reference_delta());
+
+ TargetSource target_source1 =
+ patch_element_reader.GetExtraTargetSource(PoolTag(0));
+ EXPECT_EQ(ByteVector({0x17}), target_source1.extra_targets());
+ TargetSource target_source2 =
+ patch_element_reader.GetExtraTargetSource(PoolTag(1));
+ EXPECT_EQ(ByteVector({}), target_source2.extra_targets());
+ TargetSource target_source3 =
+ patch_element_reader.GetExtraTargetSource(PoolTag(2));
+ EXPECT_EQ(ByteVector({0x18}), target_source3.extra_targets());
+
+ PatchElementWriter patch_element_writer(element_match);
+
+ patch_element_writer.SetEquivalenceSink(
+ EquivalenceSink({0x10}, {0x11}, {0x12}));
+ patch_element_writer.SetExtraDataSink(ExtraDataSink({0x13}));
+ patch_element_writer.SetRawDeltaSink(RawDeltaSink({0x14}, {0x15}));
+ patch_element_writer.SetReferenceDeltaSink(ReferenceDeltaSink({0x16}));
+ patch_element_writer.SetTargetSink(PoolTag(0), TargetSink({0x17}));
+ patch_element_writer.SetTargetSink(PoolTag(2), TargetSink({0x18}));
+ TestSerialize(data, patch_element_writer);
+}
+
+TEST(EnsemblePatchTest, RawPatch) {
+ ByteVector data = {
+ 0x5A, 0x75, 0x63, 0x00, // magic
+ 0x10, 0x32, 0x54, 0x76, // old_size
+ 0x00, 0x11, 0x22, 0x33, // old_crc
+ 0x98, 0xBA, 0xDC, 0xFE, // new_size
+ 0x44, 0x55, 0x66, 0x77, // new_crc
+
+ 0, 0, 0, 0, // kRawPatch
+
+ 1, 0, 0, 0, // number of element
+
+ 0x01, 0, 0, 0, // old_offset
+ 0x00, 0, 0, 0, // new_offset
+ 0x02, 0, 0, 0, // old_length
+ 0x98, 0xBA, 0xDC, 0xFE, // new_length
+ 1, 0, 0, 0, // EXE_TYPE_WIN32_X86
+ 0, 0, 0, 0, // src_skip size
+ 0, 0, 0, 0, // dst_skip size
+ 0, 0, 0, 0, // copy_count size
+ 0, 0, 0, 0, // extra_data size
+ 0, 0, 0, 0, // raw_delta_skip size
+ 0, 0, 0, 0, // raw_delta_diff size
+ 0, 0, 0, 0, // reference_delta size
+ 0, 0, 0, 0, // pool count
+ };
+
+ EnsemblePatchReader ensemble_patch_reader =
+ TestInitialize<EnsemblePatchReader>(&data);
+
+ PatchHeader header = ensemble_patch_reader.header();
+ EXPECT_EQ(PatchHeader::kMagic, header.magic);
+ EXPECT_EQ(0x76543210U, header.old_size);
+ EXPECT_EQ(0x33221100U, header.old_crc);
+ EXPECT_EQ(0xFEDCBA98U, header.new_size);
+ EXPECT_EQ(0x77665544U, header.new_crc);
+
+ EXPECT_EQ(PatchType::kRawPatch, ensemble_patch_reader.patch_type());
+
+ const std::vector<PatchElementReader>& elements =
+ ensemble_patch_reader.elements();
+ EXPECT_EQ(size_t(1), elements.size());
+
+ EnsemblePatchWriter ensemble_patch_writer(header);
+ ensemble_patch_writer.SetPatchType(PatchType::kRawPatch);
+ PatchElementWriter patch_element_writer(elements[0].element_match());
+ patch_element_writer.SetEquivalenceSink({});
+ patch_element_writer.SetExtraDataSink({});
+ patch_element_writer.SetRawDeltaSink({});
+ patch_element_writer.SetReferenceDeltaSink({});
+ ensemble_patch_writer.AddElement(std::move(patch_element_writer));
+
+ TestSerialize(data, ensemble_patch_writer);
+}
+
+TEST(EnsemblePatchTest, CheckFile) {
+ ByteVector data = {
+ 0x5A, 0x75, 0x63, 0x00, // magic
+ 0x05, 0x00, 0x00, 0x00, // old_size
+ 0xDF, 0x13, 0xE4, 0x10, // old_crc
+ 0x03, 0x00, 0x00, 0x00, // new_size
+ 0xDC, 0xF7, 0x00, 0x40, // new_crc
+ 2, 0, 0, 0, // kEnsemblePatch
+
+ 1, 0, 0, 0, // number of element
+
+ 0x01, 0, 0, 0, // old_offset
+ 0x00, 0, 0, 0, // new_offset
+ 0x02, 0, 0, 0, // old_length
+ 0x03, 0, 0, 0, // new_length
+ 1, 0, 0, 0, // EXE_TYPE_WIN32_X86
+ 0, 0, 0, 0, // src_skip size
+ 0, 0, 0, 0, // dst_skip size
+ 0, 0, 0, 0, // copy_count size
+ 0, 0, 0, 0, // extra_data size
+ 0, 0, 0, 0, // raw_delta_skip size
+ 0, 0, 0, 0, // raw_delta_diff size
+ 0, 0, 0, 0, // reference_delta size
+ 0, 0, 0, 0, // pool count
+ };
+
+ EnsemblePatchReader ensemble_patch_reader =
+ TestInitialize<EnsemblePatchReader>(&data);
+
+ ByteVector old_file = {0x10, 0x32, 0x54, 0x76, 0x98};
+ ByteVector new_file = {0xBA, 0xDC, 0xFE};
+
+ ConstBufferView old_image(old_file.data(), old_file.size());
+ ConstBufferView new_image(new_file.data(), new_file.size());
+
+ EXPECT_TRUE(ensemble_patch_reader.CheckOldFile(old_image));
+ EXPECT_TRUE(ensemble_patch_reader.CheckNewFile(new_image));
+ EXPECT_FALSE(ensemble_patch_reader.CheckOldFile(new_image));
+ EXPECT_FALSE(ensemble_patch_reader.CheckNewFile(old_image));
+}
+
+TEST(EnsemblePatchTest, InvalidMagic) {
+ ByteVector data = {
+ 0x42, 0x42, 0x42, 0x00, // magic
+ 0x10, 0x32, 0x54, 0x76, // old_size
+ 0x00, 0x11, 0x22, 0x33, // old_crc
+ 0x03, 0x00, 0x00, 0x00, // new_size
+ 0x44, 0x55, 0x66, 0x77, // new_crc
+ 0, 0, 0, 0, // kRawPatch
+
+ 1, 0, 0, 0, // number of element
+
+ 0x01, 0, 0, 0, // old_offset
+ 0x00, 0, 0, 0, // new_offset
+ 0x02, 0, 0, 0, // old_length
+ 0x03, 0, 0, 0, // new_length
+ 1, 0, 0, 0, // EXE_TYPE_WIN32_X86
+ 0, 0, 0, 0, // src_skip size
+ 0, 0, 0, 0, // dst_skip size
+ 0, 0, 0, 0, // copy_count size
+ 0, 0, 0, 0, // extra_data size
+ 0, 0, 0, 0, // raw_delta_skip size
+ 0, 0, 0, 0, // raw_delta_diff size
+ 0, 0, 0, 0, // reference_delta size
+ 0, 0, 0, 0, // pool count
+ };
+
+ TestInvalidInitialize<EnsemblePatchReader>(&data);
+}
+
+} // namespace zucchini
diff --git a/patch_reader.cc b/patch_reader.cc
new file mode 100644
index 0000000..eceb969
--- /dev/null
+++ b/patch_reader.cc
@@ -0,0 +1,345 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/patch_reader.h"
+
+#include <type_traits>
+#include <utility>
+
+#include "base/numerics/safe_conversions.h"
+#include "components/zucchini/crc32.h"
+
+namespace zucchini {
+
+namespace patch {
+
+bool ParseElementMatch(BufferSource* source, ElementMatch* element_match) {
+ PatchElementHeader element_header;
+ if (!source->GetValue(&element_header)) {
+ LOG(ERROR) << "Impossible to read ElementMatch from source.";
+ LOG(ERROR) << base::debug::StackTrace().ToString();
+ return false;
+ }
+ ExecutableType exe_type =
+ static_cast<ExecutableType>(element_header.exe_type);
+ if (exe_type >= kNumExeType) {
+ LOG(ERROR) << "Invalid ExecutableType encountered.";
+ LOG(ERROR) << base::debug::StackTrace().ToString();
+ return false;
+ }
+ element_match->old_element.offset = element_header.old_offset;
+ element_match->new_element.offset = element_header.new_offset;
+ element_match->old_element.size = element_header.old_length;
+ element_match->new_element.size = element_header.new_length;
+ element_match->old_element.exe_type = exe_type;
+ element_match->new_element.exe_type = exe_type;
+ return true;
+}
+
+bool ParseBuffer(BufferSource* source, BufferSource* buffer) {
+ uint32_t size = 0;
+ if (!source->GetValue(&size)) {
+ LOG(ERROR) << "Impossible to read buffer size from source.";
+ LOG(ERROR) << base::debug::StackTrace().ToString();
+ return false;
+ }
+ if (!source->GetRegion(base::checked_cast<size_t>(size), buffer)) {
+ LOG(ERROR) << "Impossible to read buffer content from source.";
+ LOG(ERROR) << base::debug::StackTrace().ToString();
+ return false;
+ }
+ return true;
+}
+
+} // namespace patch
+
+/******** EquivalenceSource ********/
+
+EquivalenceSource::EquivalenceSource() = default;
+EquivalenceSource::EquivalenceSource(const EquivalenceSource&) = default;
+EquivalenceSource::~EquivalenceSource() = default;
+
+bool EquivalenceSource::Initialize(BufferSource* source) {
+ return patch::ParseBuffer(source, &src_skip_) &&
+ patch::ParseBuffer(source, &dst_skip_) &&
+ patch::ParseBuffer(source, &copy_count_);
+}
+
+base::Optional<Equivalence> EquivalenceSource::GetNext() {
+ if (src_skip_.empty() || dst_skip_.empty() || copy_count_.empty())
+ return base::nullopt;
+
+ Equivalence equivalence = {};
+
+ uint32_t length = 0;
+ if (!patch::ParseVarUInt<uint32_t>(&copy_count_, &length))
+ return base::nullopt;
+ equivalence.length = base::strict_cast<offset_t>(length);
+
+ int32_t src_offset_diff = 0; // Intentionally signed.
+ if (!patch::ParseVarInt<int32_t>(&src_skip_, &src_offset_diff))
+ return base::nullopt;
+ base::CheckedNumeric<offset_t> src_offset =
+ previous_src_offset_ + src_offset_diff;
+ if (!src_offset.IsValid())
+ return base::nullopt;
+
+ equivalence.src_offset = src_offset.ValueOrDie();
+ previous_src_offset_ = src_offset + equivalence.length;
+ if (!previous_src_offset_.IsValid())
+ return base::nullopt;
+
+ uint32_t dst_offset_diff = 0; // Intentionally unsigned.
+ if (!patch::ParseVarUInt<uint32_t>(&dst_skip_, &dst_offset_diff))
+ return base::nullopt;
+ base::CheckedNumeric<offset_t> dst_offset =
+ previous_dst_offset_ + dst_offset_diff;
+ if (!dst_offset.IsValid())
+ return base::nullopt;
+
+ equivalence.dst_offset = dst_offset.ValueOrDie();
+ previous_dst_offset_ = equivalence.dst_offset + equivalence.length;
+ if (!previous_dst_offset_.IsValid())
+ return base::nullopt;
+
+ return equivalence;
+}
+
+/******** ExtraDataSource ********/
+
+ExtraDataSource::ExtraDataSource() = default;
+ExtraDataSource::ExtraDataSource(const ExtraDataSource&) = default;
+ExtraDataSource::~ExtraDataSource() = default;
+
+bool ExtraDataSource::Initialize(BufferSource* source) {
+ return patch::ParseBuffer(source, &extra_data_);
+}
+
+base::Optional<ConstBufferView> ExtraDataSource::GetNext(offset_t size) {
+ ConstBufferView buffer;
+ if (!extra_data_.GetRegion(size, &buffer))
+ return base::nullopt;
+ return buffer;
+}
+
+/******** RawDeltaSource ********/
+
+RawDeltaSource::RawDeltaSource() = default;
+RawDeltaSource::RawDeltaSource(const RawDeltaSource&) = default;
+RawDeltaSource::~RawDeltaSource() = default;
+
+bool RawDeltaSource::Initialize(BufferSource* source) {
+ return patch::ParseBuffer(source, &raw_delta_skip_) &&
+ patch::ParseBuffer(source, &raw_delta_diff_);
+}
+
+base::Optional<RawDeltaUnit> RawDeltaSource::GetNext() {
+ if (raw_delta_skip_.empty() || raw_delta_diff_.empty())
+ return base::nullopt;
+
+ RawDeltaUnit delta = {};
+ uint32_t copy_offset_diff = 0;
+ if (!patch::ParseVarUInt<uint32_t>(&raw_delta_skip_, &copy_offset_diff))
+ return base::nullopt;
+ base::CheckedNumeric<offset_t> copy_offset =
+ copy_offset_diff + copy_offset_compensation_;
+ if (!copy_offset.IsValid())
+ return base::nullopt;
+ delta.copy_offset = copy_offset.ValueOrDie();
+
+ if (!raw_delta_diff_.GetValue<int8_t>(&delta.diff))
+ return base::nullopt;
+
+ // We keep track of the compensation needed for next offset, taking into
+ // accound delta encoding and bias of -1.
+ copy_offset_compensation_ = copy_offset + 1;
+ if (!copy_offset_compensation_.IsValid())
+ return base::nullopt;
+ return delta;
+}
+
+/******** ReferenceDeltaSource ********/
+
+ReferenceDeltaSource::ReferenceDeltaSource() = default;
+ReferenceDeltaSource::ReferenceDeltaSource(const ReferenceDeltaSource&) =
+ default;
+ReferenceDeltaSource::~ReferenceDeltaSource() = default;
+
+bool ReferenceDeltaSource::Initialize(BufferSource* source) {
+ return patch::ParseBuffer(source, &reference_delta_);
+}
+
+base::Optional<int32_t> ReferenceDeltaSource::GetNext() {
+ if (reference_delta_.empty())
+ return base::nullopt;
+ int32_t delta = 0;
+ if (!patch::ParseVarInt<int32_t>(&reference_delta_, &delta))
+ return base::nullopt;
+ return delta;
+}
+
+/******** TargetSource ********/
+
+TargetSource::TargetSource() = default;
+TargetSource::TargetSource(const TargetSource&) = default;
+TargetSource::~TargetSource() = default;
+
+bool TargetSource::Initialize(BufferSource* source) {
+ return patch::ParseBuffer(source, &extra_targets_);
+}
+
+base::Optional<offset_t> TargetSource::GetNext() {
+ if (extra_targets_.empty())
+ return base::nullopt;
+
+ uint32_t target_diff = 0;
+ if (!patch::ParseVarUInt<uint32_t>(&extra_targets_, &target_diff))
+ return base::nullopt;
+ base::CheckedNumeric<offset_t> target = target_diff + target_compensation_;
+ if (!target.IsValid())
+ return base::nullopt;
+
+ // We keep track of the compensation needed for next target, taking into
+ // accound delta encoding and bias of -1.
+ target_compensation_ = target + 1;
+ if (!target_compensation_.IsValid())
+ return base::nullopt;
+ return offset_t(target.ValueOrDie());
+}
+
+/******** PatchElementReader ********/
+
+PatchElementReader::PatchElementReader() = default;
+PatchElementReader::PatchElementReader(PatchElementReader&&) = default;
+PatchElementReader::~PatchElementReader() = default;
+
+bool PatchElementReader::Initialize(BufferSource* source) {
+ bool ok = patch::ParseElementMatch(source, &element_match_) &&
+ equivalences_.Initialize(source) &&
+ extra_data_.Initialize(source) && raw_delta_.Initialize(source) &&
+ reference_delta_.Initialize(source);
+ if (!ok)
+ return false;
+ uint32_t pool_count = 0;
+ if (!source->GetValue(&pool_count)) {
+ LOG(ERROR) << "Impossible to read pool_count from source.";
+ return false;
+ }
+ for (uint32_t i = 0; i < pool_count; ++i) {
+ uint8_t pool_tag_value = 0;
+ if (!source->GetValue(&pool_tag_value)) {
+ LOG(ERROR) << "Impossible to read pool_tag from source.";
+ return false;
+ }
+ PoolTag pool_tag(pool_tag_value);
+ if (pool_tag == kNoPoolTag) {
+ LOG(ERROR) << "Invalid pool_tag encountered in ExtraTargetList.";
+ return false;
+ }
+ auto insert_result = extra_targets_.insert({pool_tag, {}});
+ if (!insert_result.second) { // Element already present.
+ LOG(ERROR) << "Multiple ExtraTargetList found for the same pool_tag";
+ return false;
+ }
+ if (!insert_result.first->second.Initialize(source))
+ return false;
+ }
+ return true;
+}
+
+/******** EnsemblePatchReader ********/
+
+base::Optional<EnsemblePatchReader> EnsemblePatchReader::Create(
+ ConstBufferView buffer) {
+ BufferSource source(buffer);
+ EnsemblePatchReader patch;
+ if (!patch.Initialize(&source))
+ return base::nullopt;
+ return patch;
+}
+
+EnsemblePatchReader::EnsemblePatchReader() = default;
+EnsemblePatchReader::EnsemblePatchReader(EnsemblePatchReader&&) = default;
+EnsemblePatchReader::~EnsemblePatchReader() = default;
+
+bool EnsemblePatchReader::Initialize(BufferSource* source) {
+ if (!source->GetValue(&header_)) {
+ LOG(ERROR) << "Impossible to read header from source.";
+ return false;
+ }
+ if (header_.magic != PatchHeader::kMagic) {
+ LOG(ERROR) << "Patch contains invalid magic.";
+ return false;
+ }
+ uint32_t patch_type_int =
+ static_cast<uint32_t>(PatchType::kUnrecognisedPatch);
+ if (!source->GetValue(&patch_type_int)) {
+ LOG(ERROR) << "Impossible to read patch_type from source.";
+ return false;
+ }
+ patch_type_ = static_cast<PatchType>(patch_type_int);
+ if (patch_type_ != PatchType::kRawPatch &&
+ patch_type_ != PatchType::kSinglePatch &&
+ patch_type_ != PatchType::kEnsemblePatch) {
+ LOG(ERROR) << "Invalid patch_type encountered.";
+ return false;
+ }
+
+ uint32_t element_count = 0;
+ if (!source->GetValue(&element_count)) {
+ LOG(ERROR) << "Impossible to read element_count from source.";
+ return false;
+ }
+ if (patch_type_ == PatchType::kRawPatch ||
+ patch_type_ == PatchType::kSinglePatch) {
+ if (element_count != 1) {
+ LOG(ERROR) << "Unexpected number of elements in patch.";
+ return false; // Only one element expected.
+ }
+ }
+
+ offset_t current_dst_offset = 0;
+ for (uint32_t i = 0; i < element_count; ++i) {
+ PatchElementReader element_patch;
+ if (!element_patch.Initialize(source))
+ return false;
+
+ if (!element_patch.old_element().FitsIn(header_.old_size) ||
+ !element_patch.new_element().FitsIn(header_.new_size)) {
+ LOG(ERROR) << "Invalid element encountered.";
+ return false;
+ }
+
+ if (element_patch.new_element().offset != current_dst_offset) {
+ LOG(ERROR) << "Invalid element encountered.";
+ return false;
+ }
+ current_dst_offset = element_patch.new_element().EndOffset();
+
+ elements_.push_back(std::move(element_patch));
+ }
+ if (current_dst_offset != header_.new_size) {
+ LOG(ERROR) << "Patch elements don't fully cover new image file.";
+ return false;
+ }
+
+ if (!source->empty()) {
+ LOG(ERROR) << "Patch was not fully consumed.";
+ return false;
+ }
+
+ return true;
+}
+
+bool EnsemblePatchReader::CheckOldFile(ConstBufferView old_image) const {
+ return old_image.size() == header_.old_size &&
+ CalculateCrc32(old_image.begin(), old_image.end()) == header_.old_crc;
+}
+
+bool EnsemblePatchReader::CheckNewFile(ConstBufferView new_image) const {
+ return new_image.size() == header_.new_size &&
+ CalculateCrc32(new_image.begin(), new_image.end()) == header_.new_crc;
+}
+
+} // namespace zucchini
diff --git a/patch_reader.h b/patch_reader.h
new file mode 100644
index 0000000..ef6cd32
--- /dev/null
+++ b/patch_reader.h
@@ -0,0 +1,277 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_PATCH_READER_H_
+#define COMPONENTS_ZUCCHINI_PATCH_READER_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <map>
+#include <vector>
+
+#include "base/debug/stack_trace.h"
+#include "base/logging.h"
+#include "base/numerics/checked_math.h"
+#include "base/optional.h"
+#include "components/zucchini/buffer_source.h"
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/image_utils.h"
+#include "components/zucchini/patch_utils.h"
+
+namespace zucchini {
+
+namespace patch {
+
+// The Parse*() functions below attempt to extract data of a specific type from
+// the beginning of |source|. A parse function: On success, consumes the used
+// portion of |source|, writes data into the output parameter, and returns
+// true. Otherwise returns false and does not consume |source|.
+
+// Parses |source| for the next ElementMatch.
+bool ParseElementMatch(BufferSource* source, ElementMatch* element_match);
+
+// Parses |source| for the next embedded BufferSource.
+bool ParseBuffer(BufferSource* source, BufferSource* buffer);
+
+// Parses |source| for the next VarUInt.
+template <class T>
+bool ParseVarUInt(BufferSource* source, T* value) {
+ auto bytes_read = DecodeVarUInt(source->begin(), source->end(), value);
+ if (!bytes_read) {
+ LOG(ERROR) << "Impossible to read VarUInt from source.";
+ LOG(ERROR) << base::debug::StackTrace().ToString();
+ return false;
+ }
+ // Advance |source| beyond the VarUInt value.
+ source->Skip(bytes_read);
+ return true;
+}
+
+// Parses |source| for the next VarInt.
+template <class T>
+bool ParseVarInt(BufferSource* source, T* value) {
+ auto bytes_read = DecodeVarInt(source->begin(), source->end(), value);
+ if (!bytes_read) {
+ LOG(ERROR) << "Impossible to read VarInt from source.";
+ LOG(ERROR) << base::debug::StackTrace().ToString();
+ return false;
+ }
+ // Advance |source| beyond the VarInt value.
+ source->Skip(bytes_read);
+ return true;
+}
+
+} // namespace patch
+
+// The *Source classes below are light-weight (i.e., allows copying) visitors to
+// read patch data. Each of them has an associated "main type", and performs the
+// following:
+// - Consumes portions of a BufferSource (required to remain valid for the
+// lifetime of the object).
+// - Decodes consumed data, which represent a list of items with "main type".
+// - Dispenses "main type" elements (hence "Source" in the name).
+//
+// Common "core functions" implemented by *Source classes are:
+// - bool Initialize(BufferSource* source): Consumes data from BufferSource and
+// initializes internal states. Returns true if successful, and false
+// otherwise (|source| may be partially consumed).
+// - base::Optional<MAIN_TYPE> GetNext(OPT_PARAMS): Decodes consumed data and
+// returns the next item as base::Optional (returns base::nullopt on failure).
+// - bool Done() const: Returns true if no more items remain; otherwise false.
+//
+// Usage of *Source instances don't mix, and GetNext() have dissimilar
+// interfaces. Therefore we do not use inheritance to relate *Source classes,
+// and simply implement "core functions" with matching names.
+
+// Source for Equivalences.
+class EquivalenceSource {
+ public:
+ EquivalenceSource();
+ EquivalenceSource(const EquivalenceSource&);
+ ~EquivalenceSource();
+
+ // Core functions.
+ bool Initialize(BufferSource* source);
+ base::Optional<Equivalence> GetNext();
+ bool Done() const {
+ return src_skip_.empty() && dst_skip_.empty() && copy_count_.empty();
+ }
+
+ // Accessors for unittest.
+ BufferSource src_skip() const { return src_skip_; }
+ BufferSource dst_skip() const { return dst_skip_; }
+ BufferSource copy_count() const { return copy_count_; }
+
+ private:
+ BufferSource src_skip_;
+ BufferSource dst_skip_;
+ BufferSource copy_count_;
+
+ base::CheckedNumeric<offset_t> previous_src_offset_ = 0;
+ base::CheckedNumeric<offset_t> previous_dst_offset_ = 0;
+};
+
+// Source for extra data.
+class ExtraDataSource {
+ public:
+ ExtraDataSource();
+ ExtraDataSource(const ExtraDataSource&);
+ ~ExtraDataSource();
+
+ // Core functions.
+ bool Initialize(BufferSource* source);
+ // |size| is the size in bytes of the buffer requested.
+ base::Optional<ConstBufferView> GetNext(offset_t size);
+ bool Done() const { return extra_data_.empty(); }
+
+ // Accessors for unittest.
+ BufferSource extra_data() const { return extra_data_; }
+
+ private:
+ BufferSource extra_data_;
+};
+
+// Source for raw delta.
+class RawDeltaSource {
+ public:
+ RawDeltaSource();
+ RawDeltaSource(const RawDeltaSource&);
+ ~RawDeltaSource();
+
+ // Core functions.
+ bool Initialize(BufferSource* source);
+ base::Optional<RawDeltaUnit> GetNext();
+ bool Done() const {
+ return raw_delta_skip_.empty() && raw_delta_diff_.empty();
+ }
+
+ // Accessors for unittest.
+ BufferSource raw_delta_skip() const { return raw_delta_skip_; }
+ BufferSource raw_delta_diff() const { return raw_delta_diff_; }
+
+ private:
+ BufferSource raw_delta_skip_;
+ BufferSource raw_delta_diff_;
+
+ base::CheckedNumeric<offset_t> copy_offset_compensation_ = 0;
+};
+
+// Source for reference delta.
+class ReferenceDeltaSource {
+ public:
+ ReferenceDeltaSource();
+ ReferenceDeltaSource(const ReferenceDeltaSource&);
+ ~ReferenceDeltaSource();
+
+ // Core functions.
+ bool Initialize(BufferSource* source);
+ base::Optional<int32_t> GetNext();
+ bool Done() const { return reference_delta_.empty(); }
+
+ // Accessors for unittest.
+ BufferSource reference_delta() const { return reference_delta_; }
+
+ private:
+ BufferSource reference_delta_;
+};
+
+// Source for additional targets.
+class TargetSource {
+ public:
+ TargetSource();
+ TargetSource(const TargetSource&);
+ ~TargetSource();
+
+ // Core functions.
+ bool Initialize(BufferSource* source);
+ base::Optional<offset_t> GetNext();
+ bool Done() const { return extra_targets_.empty(); }
+
+ // Accessors for unittest.
+ BufferSource extra_targets() const { return extra_targets_; }
+
+ private:
+ BufferSource extra_targets_;
+
+ base::CheckedNumeric<offset_t> target_compensation_ = 0;
+};
+
+// Following are utility classes providing a structured view on data forming a
+// patch.
+
+// Utility to read a patch element. A patch element contains all the information
+// necessary to patch a single element. This class provide access
+// to the multiple streams of data forming the patch element.
+class PatchElementReader {
+ public:
+ PatchElementReader();
+ PatchElementReader(PatchElementReader&&);
+ ~PatchElementReader();
+
+ // If data read from |source| is well-formed, initialize cached sources to
+ // read from it, and returns true. Otherwise returns false.
+ bool Initialize(BufferSource* source);
+
+ const ElementMatch& element_match() const { return element_match_; }
+ const Element& old_element() const { return element_match_.old_element; }
+ const Element& new_element() const { return element_match_.new_element; }
+
+ // The Get*() functions below return copies of cached sources.
+ EquivalenceSource GetEquivalenceSource() const { return equivalences_; }
+ ExtraDataSource GetExtraDataSource() const { return extra_data_; }
+ RawDeltaSource GetRawDeltaSource() const { return raw_delta_; }
+ ReferenceDeltaSource GetReferenceDeltaSource() const {
+ return reference_delta_;
+ }
+ TargetSource GetExtraTargetSource(PoolTag tag) const {
+ auto pos = extra_targets_.find(tag);
+ return pos != extra_targets_.end() ? pos->second : TargetSource();
+ }
+
+ private:
+ ElementMatch element_match_;
+
+ // Cached sources.
+ EquivalenceSource equivalences_;
+ ExtraDataSource extra_data_;
+ RawDeltaSource raw_delta_;
+ ReferenceDeltaSource reference_delta_;
+ std::map<PoolTag, TargetSource> extra_targets_;
+};
+
+// Utility to read a Zucchini ensemble patch. An ensemble patch is the
+// concatenation of a patch header with a vector of patch elements.
+class EnsemblePatchReader {
+ public:
+ // If data read from |buffer| is well-formed, initializes and returns
+ // an instance of EnsemblePatchReader. Otherwise returns base::nullopt.
+ static base::Optional<EnsemblePatchReader> Create(ConstBufferView buffer);
+
+ EnsemblePatchReader();
+ EnsemblePatchReader(EnsemblePatchReader&&);
+ ~EnsemblePatchReader();
+
+ // If data read from |source| is well-formed, initialize internal state to
+ // read from it, and returns true. Otherwise returns false.
+ bool Initialize(BufferSource* source);
+
+ // Check old / new image file validity, comparing against expected size and
+ // CRC32. Return true if file matches expectations, false otherwise.
+ bool CheckOldFile(ConstBufferView old_image) const;
+ bool CheckNewFile(ConstBufferView new_image) const;
+
+ const PatchHeader& header() const { return header_; }
+ PatchType patch_type() const { return patch_type_; }
+ const std::vector<PatchElementReader>& elements() const { return elements_; }
+
+ private:
+ PatchHeader header_;
+ PatchType patch_type_;
+ std::vector<PatchElementReader> elements_;
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_PATCH_READER_H_
diff --git a/patch_utils.h b/patch_utils.h
new file mode 100644
index 0000000..77cf2f3
--- /dev/null
+++ b/patch_utils.h
@@ -0,0 +1,152 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_PATCH_UTILS_H_
+#define COMPONENTS_ZUCCHINI_PATCH_UTILS_H_
+
+#include <stdint.h>
+
+#include <iterator>
+#include <type_traits>
+
+#include "components/zucchini/image_utils.h"
+
+namespace zucchini {
+
+// Constants that appear inside a patch.
+enum class PatchType : uint32_t {
+ // Patch contains a single raw element, corresponding to an element match that
+ // covers the entire images, and with ExecutableType::kExeTypeNoOp.
+ kRawPatch = 0,
+
+ // Patch contains a single executable element, corresponding to an element
+ // match that covers the entire images.
+ kSinglePatch = 1,
+
+ // Patch contains multiple raw and/or executable elements.
+ kEnsemblePatch = 2,
+
+ // Used when type is uninitialized.
+ kUnrecognisedPatch
+};
+
+// A Zucchini 'ensemble' patch is the concatenation of a patch header with a
+// list of patch 'elements', each containing data for patching individual
+// elements.
+
+// Supported by MSVC, g++, and clang++. Ensures no gaps in packing.
+#pragma pack(push, 1)
+
+// Header for a Zucchini patch, found at the beginning of an ensemble patch.
+struct PatchHeader {
+ // Magic signature at the beginning of a Zucchini patch file.
+ enum : uint32_t { kMagic = 'Z' | ('u' << 8) | ('c' << 16) };
+
+ uint32_t magic = 0;
+ uint32_t old_size = 0;
+ uint32_t old_crc = 0;
+ uint32_t new_size = 0;
+ uint32_t new_crc = 0;
+};
+
+// Sanity check.
+static_assert(sizeof(PatchHeader) == 20, "PatchHeader is 20 bytes");
+
+// Header for a patch element, found at the beginning of every patch element.
+struct PatchElementHeader {
+ uint32_t old_offset;
+ uint32_t new_offset;
+ uint32_t old_length;
+ uint32_t new_length;
+ uint32_t exe_type;
+};
+
+// Sanity check.
+static_assert(sizeof(PatchElementHeader) == 20,
+ "PatchElementHeader is 28 bytes");
+
+#pragma pack(pop)
+
+// Descibes a raw FIX operation.
+struct RawDeltaUnit {
+ offset_t copy_offset; // Offset in copy regions.
+ int8_t diff; // Bytewise difference.
+};
+
+// A Zucchini patch contains data streams encoded using varint format to reduce
+// uncompressed size.
+
+// Writes |value| as a varint in |dst| and returns an iterator pointing beyond
+// the written region. |dst| is assumed to hold enough space. Typically, this
+// will write to a vector using back insertion, e.g.:
+// EncodeVarUInt(value, std::back_inserter(vector));
+template <class T, class It>
+It EncodeVarUInt(T value, It dst) {
+ static_assert(std::is_unsigned<T>::value, "Value type must be unsigned");
+
+ while (value >= 0x80) {
+ *dst++ = static_cast<uint8_t>(value) | 0x80;
+ value >>= 7;
+ }
+ *dst++ = static_cast<uint8_t>(value);
+ return dst;
+}
+
+// Same as EncodeVarUInt(), but for signed values.
+template <class T, class It>
+It EncodeVarInt(T value, It dst) {
+ static_assert(std::is_signed<T>::value, "Value type must be signed");
+
+ using unsigned_value_type = typename std::make_unsigned<T>::type;
+ if (value < 0)
+ return EncodeVarUInt((unsigned_value_type(~value) << 1) | 1, dst);
+ else
+ return EncodeVarUInt(unsigned_value_type(value) << 1, dst);
+}
+
+// Tries to read a varint unsigned integer from |[first, last)|. If
+// succesful, writes result into |value| and returns the number of bytes
+// read from |[first, last)|. Otherwise returns 0.
+template <class T, class It>
+typename std::iterator_traits<It>::difference_type DecodeVarUInt(It first,
+ It last,
+ T* value) {
+ static_assert(std::is_unsigned<T>::value, "Value type must be unsigned");
+
+ uint8_t sh = 0;
+ T val = 0;
+ for (auto it = first; it != last;) {
+ val |= T(*it & 0x7F) << sh;
+ if (*(it++) < 0x80) {
+ *value = val;
+ return it - first;
+ }
+ sh += 7;
+ if (sh >= sizeof(T) * 8) // Overflow!
+ return 0;
+ }
+ return 0;
+}
+
+// Same as DecodeVarUInt(), but for signed values.
+template <class T, class It>
+typename std::iterator_traits<It>::difference_type DecodeVarInt(It first,
+ It last,
+ T* value) {
+ static_assert(std::is_signed<T>::value, "Value type must be signed");
+
+ typename std::make_unsigned<T>::type tmp = 0;
+ auto res = DecodeVarUInt(first, last, &tmp);
+ if (res) {
+ if (tmp & 1)
+ *value = ~static_cast<T>(tmp >> 1);
+ else
+ *value = static_cast<T>(tmp >> 1);
+ }
+ return res;
+}
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_PATCH_UTILS_H_
diff --git a/patch_utils_unittest.cc b/patch_utils_unittest.cc
new file mode 100644
index 0000000..bdc8d45
--- /dev/null
+++ b/patch_utils_unittest.cc
@@ -0,0 +1,171 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/patch_utils.h"
+
+#include <stdint.h>
+
+#include <iterator>
+#include <vector>
+
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+template <class T>
+void TestEncodeDecodeVarUInt(const std::vector<T>& data) {
+ std::vector<uint8_t> buffer;
+
+ std::vector<T> values;
+ for (T basis : data) {
+ // For variety, test the neighborhood values for each case in |data|. Some
+ // test cases may result in overflow when computing |value|, but we don't
+ // care about that.
+ for (int delta = -4; delta <= 4; ++delta) {
+ T value = delta + basis;
+ EncodeVarUInt<T>(value, std::back_inserter(buffer));
+ values.push_back(value);
+
+ value = delta - basis;
+ EncodeVarUInt<T>(value, std::back_inserter(buffer));
+ values.push_back(value);
+ }
+ }
+
+ auto it = buffer.begin();
+ for (T expected : values) {
+ T value = T(-1);
+ auto res = DecodeVarUInt(it, buffer.end(), &value);
+ EXPECT_NE(0, res);
+ EXPECT_EQ(expected, value);
+ it += res;
+ }
+ EXPECT_EQ(it, buffer.end());
+
+ T value = T(-1);
+ auto res = DecodeVarUInt(it, buffer.end(), &value);
+ EXPECT_EQ(0, res);
+ EXPECT_EQ(T(-1), value);
+}
+
+template <class T>
+void TestEncodeDecodeVarInt(const std::vector<T>& data) {
+ std::vector<uint8_t> buffer;
+
+ std::vector<T> values;
+ for (T basis : data) {
+ // For variety, test the neighborhood values for each case in |data|. Some
+ // test cases may result in overflow when computing |value|, but we don't
+ // care about that.
+ for (int delta = -4; delta <= 4; ++delta) {
+ T value = delta + basis;
+ EncodeVarInt(value, std::back_inserter(buffer));
+ values.push_back(value);
+
+ value = delta - basis;
+ EncodeVarInt(value, std::back_inserter(buffer));
+ values.push_back(value);
+ }
+ }
+
+ auto it = buffer.begin();
+ for (T expected : values) {
+ T value = T(-1);
+ auto res = DecodeVarInt(it, buffer.end(), &value);
+ EXPECT_NE(0, res);
+ EXPECT_EQ(expected, value);
+ it += res;
+ }
+ EXPECT_EQ(it, buffer.end());
+
+ T value = T(-1);
+ auto res = DecodeVarInt(it, buffer.end(), &value);
+ EXPECT_EQ(0, res);
+ EXPECT_EQ(T(-1), value);
+}
+
+TEST(PatchUtilsTest, EncodeDecodeVarUInt32) {
+ TestEncodeDecodeVarUInt<uint32_t>({0, 64, 128, 8192, 16384, 1 << 20, 1 << 21,
+ 1 << 22, 1 << 27, 1 << 28, 0x7FFFFFFFU,
+ UINT32_MAX});
+}
+
+TEST(PatchUtilsTest, EncodeDecodeVarInt32) {
+ TestEncodeDecodeVarInt<int32_t>({0, 64, 128, 8192, 16384, 1 << 20, 1 << 21,
+ 1 << 22, 1 << 27, 1 << 28, -1, INT32_MIN,
+ INT32_MAX});
+}
+
+TEST(PatchUtilsTest, EncodeDecodeVarUInt64) {
+ TestEncodeDecodeVarUInt<uint64_t>({0, 64, 128, 8192, 16384, 1 << 20, 1 << 21,
+ 1 << 22, 1ULL << 55, 1ULL << 56,
+ 0x7FFFFFFFFFFFFFFFULL, UINT64_MAX});
+}
+
+TEST(PatchUtilsTest, EncodeDecodeVarInt64) {
+ TestEncodeDecodeVarInt<int64_t>({0, 64, 128, 8192, 16384, 1 << 20, 1 << 21,
+ 1 << 22, 1LL << 55, 1LL << 56, -1, INT64_MIN,
+ INT64_MAX});
+}
+
+TEST(PatchUtilsTest, DecodeVarUInt32Malformed) {
+ constexpr uint32_t kUninit = static_cast<uint32_t>(-1LL);
+
+ // Output variable to ensure that on failure, the output variable is not
+ // written to.
+ uint32_t value = uint32_t(-1);
+
+ auto TestDecodeVarInt = [&value,
+ kUninit](const std::vector<uint8_t>& buffer) {
+ value = kUninit;
+ return DecodeVarUInt(buffer.begin(), buffer.end(), &value);
+ };
+
+ // Exhausted.
+ EXPECT_EQ(0, TestDecodeVarInt(std::vector<uint8_t>{}));
+ EXPECT_EQ(kUninit, value);
+ EXPECT_EQ(0, TestDecodeVarInt(std::vector<uint8_t>(4, 128)));
+ EXPECT_EQ(kUninit, value);
+
+ // Overflow.
+ EXPECT_EQ(0, TestDecodeVarInt(std::vector<uint8_t>(6, 128)));
+ EXPECT_EQ(kUninit, value);
+ EXPECT_EQ(0, TestDecodeVarInt({128, 128, 128, 128, 128, 42}));
+ EXPECT_EQ(kUninit, value);
+
+ // Following are pathological cases that are not handled for simplicity,
+ // hence decoding is expected to be successful.
+ EXPECT_NE(0, TestDecodeVarInt({128, 128, 128, 128, 16}));
+ EXPECT_EQ(uint32_t(0), value);
+ EXPECT_NE(0, TestDecodeVarInt({128, 128, 128, 128, 32}));
+ EXPECT_EQ(uint32_t(0), value);
+ EXPECT_NE(0, TestDecodeVarInt({128, 128, 128, 128, 64}));
+ EXPECT_EQ(uint32_t(0), value);
+}
+
+TEST(PatchUtilsTest, DecodeVarUInt64Malformed) {
+ constexpr uint64_t kUninit = static_cast<uint64_t>(-1);
+
+ uint64_t value = kUninit;
+ auto TestDecodeVarInt = [&value,
+ kUninit](const std::vector<uint8_t>& buffer) {
+ value = kUninit;
+ return DecodeVarUInt(buffer.begin(), buffer.end(), &value);
+ };
+
+ // Exhausted.
+ EXPECT_EQ(0, TestDecodeVarInt(std::vector<uint8_t>{}));
+ EXPECT_EQ(kUninit, value);
+ EXPECT_EQ(0, TestDecodeVarInt(std::vector<uint8_t>(9, 128)));
+ EXPECT_EQ(kUninit, value);
+
+ // Overflow.
+ EXPECT_EQ(0, TestDecodeVarInt(std::vector<uint8_t>(10, 128)));
+ EXPECT_EQ(kUninit, value);
+ EXPECT_EQ(0, TestDecodeVarInt(
+ {128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 42}));
+ EXPECT_EQ(kUninit, value);
+}
+
+} // namespace zucchini
diff --git a/patch_writer.cc b/patch_writer.cc
new file mode 100644
index 0000000..4edbc7c
--- /dev/null
+++ b/patch_writer.cc
@@ -0,0 +1,294 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/patch_writer.h"
+
+#include <algorithm>
+#include <iterator>
+
+#include "base/numerics/checked_math.h"
+#include "base/numerics/safe_conversions.h"
+#include "components/zucchini/crc32.h"
+
+namespace zucchini {
+
+namespace patch {
+
+bool SerializeElementMatch(const ElementMatch& element_match,
+ BufferSink* sink) {
+ if (!element_match.IsValid())
+ return false;
+
+ PatchElementHeader element_header;
+ element_header.old_offset =
+ base::checked_cast<uint32_t>(element_match.old_element.offset);
+ element_header.new_offset =
+ base::checked_cast<uint32_t>(element_match.new_element.offset);
+ element_header.old_length =
+ base::checked_cast<uint32_t>(element_match.old_element.size);
+ element_header.new_length =
+ base::checked_cast<uint32_t>(element_match.new_element.size);
+ element_header.exe_type = element_match.exe_type();
+
+ return sink->PutValue<PatchElementHeader>(element_header);
+}
+
+size_t SerializedElementMatchSize(const ElementMatch& element_match) {
+ return sizeof(PatchElementHeader);
+}
+
+bool SerializeBuffer(const std::vector<uint8_t>& buffer, BufferSink* sink) {
+ // buffer.size() is not encoded as varint to simplify SerializedBufferSize().
+ base::CheckedNumeric<uint32_t> size = buffer.size();
+ if (!size.IsValid())
+ return false;
+ return sink->PutValue<uint32_t>(size.ValueOrDie()) &&
+ sink->PutRange(buffer.begin(), buffer.end());
+}
+
+size_t SerializedBufferSize(const std::vector<uint8_t>& buffer) {
+ return sizeof(uint32_t) + buffer.size();
+}
+
+} // namespace patch
+
+/******** EquivalenceSink ********/
+
+EquivalenceSink::EquivalenceSink() = default;
+EquivalenceSink::EquivalenceSink(const std::vector<uint8_t>& src_skip,
+ const std::vector<uint8_t>& dst_skip,
+ const std::vector<uint8_t>& copy_count)
+ : src_skip_(src_skip), dst_skip_(dst_skip), copy_count_(copy_count) {}
+
+EquivalenceSink::EquivalenceSink(EquivalenceSink&&) = default;
+EquivalenceSink::~EquivalenceSink() = default;
+
+void EquivalenceSink::PutNext(const Equivalence& equivalence) {
+ // Equivalences are expected to be given ordered by |dst_offset|.
+ DCHECK_GE(equivalence.dst_offset, dst_offset_);
+ // Unsigned values are ensured by above check.
+
+ // Result of substracting 2 unsigned integers is unsigned. Overflow is allowed
+ // for negative values, as long as uint32_t can hold the result.
+ uint32_t src_offset_diff =
+ base::strict_cast<uint32_t>(equivalence.src_offset - src_offset_);
+ EncodeVarInt<int32_t>(static_cast<int32_t>(src_offset_diff),
+ std::back_inserter(src_skip_));
+
+ EncodeVarUInt<uint32_t>(
+ base::strict_cast<uint32_t>(equivalence.dst_offset - dst_offset_),
+ std::back_inserter(dst_skip_));
+
+ EncodeVarUInt<uint32_t>(base::strict_cast<uint32_t>(equivalence.length),
+ std::back_inserter(copy_count_));
+
+ src_offset_ = equivalence.src_offset + equivalence.length;
+ dst_offset_ = equivalence.dst_offset + equivalence.length;
+}
+
+size_t EquivalenceSink::SerializedSize() const {
+ return patch::SerializedBufferSize(src_skip_) +
+ patch::SerializedBufferSize(dst_skip_) +
+ patch::SerializedBufferSize(copy_count_);
+}
+
+bool EquivalenceSink::SerializeInto(BufferSink* sink) const {
+ return patch::SerializeBuffer(src_skip_, sink) &&
+ patch::SerializeBuffer(dst_skip_, sink) &&
+ patch::SerializeBuffer(copy_count_, sink);
+}
+
+/******** ExtraDataSink ********/
+
+ExtraDataSink::ExtraDataSink() = default;
+ExtraDataSink::ExtraDataSink(const std::vector<uint8_t>& extra_data)
+ : extra_data_(extra_data) {}
+
+ExtraDataSink::ExtraDataSink(ExtraDataSink&&) = default;
+ExtraDataSink::~ExtraDataSink() = default;
+
+void ExtraDataSink::PutNext(ConstBufferView region) {
+ extra_data_.insert(extra_data_.end(), region.begin(), region.end());
+}
+
+size_t ExtraDataSink::SerializedSize() const {
+ return patch::SerializedBufferSize(extra_data_);
+}
+
+bool ExtraDataSink::SerializeInto(BufferSink* sink) const {
+ return patch::SerializeBuffer(extra_data_, sink);
+}
+
+/******** RawDeltaSink ********/
+
+RawDeltaSink::RawDeltaSink() = default;
+RawDeltaSink::RawDeltaSink(const std::vector<uint8_t>& raw_delta_skip,
+ const std::vector<uint8_t>& raw_delta_diff)
+ : raw_delta_skip_(raw_delta_skip), raw_delta_diff_(raw_delta_diff) {}
+
+RawDeltaSink::RawDeltaSink(RawDeltaSink&&) = default;
+RawDeltaSink::~RawDeltaSink() = default;
+
+void RawDeltaSink::PutNext(const RawDeltaUnit& delta) {
+ DCHECK_GE(delta.copy_offset, copy_offset_compensation_);
+ EncodeVarUInt<uint32_t>(base::strict_cast<uint32_t>(
+ delta.copy_offset - copy_offset_compensation_),
+ std::back_inserter(raw_delta_skip_));
+
+ copy_offset_compensation_ = delta.copy_offset + 1;
+
+ raw_delta_diff_.push_back(delta.diff);
+}
+
+size_t RawDeltaSink::SerializedSize() const {
+ return patch::SerializedBufferSize(raw_delta_skip_) +
+ patch::SerializedBufferSize(raw_delta_diff_);
+}
+
+bool RawDeltaSink::SerializeInto(BufferSink* sink) const {
+ return patch::SerializeBuffer(raw_delta_skip_, sink) &&
+ patch::SerializeBuffer(raw_delta_diff_, sink);
+}
+
+/******** ReferenceDeltaSink ********/
+
+ReferenceDeltaSink::ReferenceDeltaSink() = default;
+ReferenceDeltaSink::ReferenceDeltaSink(
+ const std::vector<uint8_t>& reference_delta)
+ : reference_delta_(reference_delta) {}
+
+ReferenceDeltaSink::ReferenceDeltaSink(ReferenceDeltaSink&&) = default;
+ReferenceDeltaSink::~ReferenceDeltaSink() = default;
+
+void ReferenceDeltaSink::PutNext(int32_t diff) {
+ EncodeVarInt<int32_t>(diff, std::back_inserter(reference_delta_));
+}
+
+size_t ReferenceDeltaSink::SerializedSize() const {
+ return patch::SerializedBufferSize(reference_delta_);
+}
+
+bool ReferenceDeltaSink::SerializeInto(BufferSink* sink) const {
+ return patch::SerializeBuffer(reference_delta_, sink);
+}
+
+/******** TargetSink ********/
+
+TargetSink::TargetSink() = default;
+TargetSink::TargetSink(const std::vector<uint8_t>& extra_targets)
+ : extra_targets_(extra_targets) {}
+
+TargetSink::TargetSink(TargetSink&&) = default;
+TargetSink::~TargetSink() = default;
+
+void TargetSink::PutNext(uint32_t target) {
+ DCHECK_GE(target, target_compensation_);
+
+ EncodeVarUInt<uint32_t>(
+ base::strict_cast<uint32_t>(target - target_compensation_),
+ std::back_inserter(extra_targets_));
+
+ target_compensation_ = target + 1;
+}
+
+size_t TargetSink::SerializedSize() const {
+ return patch::SerializedBufferSize(extra_targets_);
+}
+
+bool TargetSink::SerializeInto(BufferSink* sink) const {
+ return patch::SerializeBuffer(extra_targets_, sink);
+}
+
+/******** PatchElementWriter ********/
+
+PatchElementWriter::PatchElementWriter() = default;
+PatchElementWriter::PatchElementWriter(ElementMatch element_match)
+ : element_match_(element_match) {}
+
+PatchElementWriter::PatchElementWriter(PatchElementWriter&&) = default;
+PatchElementWriter::~PatchElementWriter() = default;
+
+size_t PatchElementWriter::SerializedSize() const {
+ size_t serialized_size =
+ patch::SerializedElementMatchSize(element_match_) +
+ equivalences_->SerializedSize() + extra_data_->SerializedSize() +
+ raw_delta_->SerializedSize() + reference_delta_->SerializedSize();
+
+ serialized_size += sizeof(uint32_t);
+ for (const auto& extra_symbols : extra_targets_)
+ serialized_size += extra_symbols.second.SerializedSize() + 1;
+ return serialized_size;
+}
+
+bool PatchElementWriter::SerializeInto(BufferSink* sink) const {
+ bool ok =
+ patch::SerializeElementMatch(element_match_, sink) &&
+ equivalences_->SerializeInto(sink) && extra_data_->SerializeInto(sink) &&
+ raw_delta_->SerializeInto(sink) && reference_delta_->SerializeInto(sink);
+ if (!ok)
+ return false;
+
+ if (!sink->PutValue<uint32_t>(
+ base::checked_cast<uint32_t>(extra_targets_.size())))
+ return false;
+ for (const auto& extra_target_sink : extra_targets_) {
+ if (!sink->PutValue<uint8_t>(extra_target_sink.first.value()))
+ return false;
+ if (!extra_target_sink.second.SerializeInto(sink))
+ return false;
+ }
+ return true;
+}
+
+/******** EnsemblePatchWriter ********/
+
+EnsemblePatchWriter::~EnsemblePatchWriter() = default;
+
+EnsemblePatchWriter::EnsemblePatchWriter(const PatchHeader& header)
+ : header_(header) {
+ DCHECK_EQ(header_.magic, PatchHeader::kMagic);
+}
+
+EnsemblePatchWriter::EnsemblePatchWriter(ConstBufferView old_image,
+ ConstBufferView new_image) {
+ header_.magic = PatchHeader::kMagic;
+ header_.old_size = base::checked_cast<uint32_t>(old_image.size());
+ header_.old_crc = CalculateCrc32(old_image.begin(), old_image.end());
+ header_.new_size = base::checked_cast<uint32_t>(new_image.size());
+ header_.new_crc = CalculateCrc32(new_image.begin(), new_image.end());
+}
+
+void EnsemblePatchWriter::AddElement(PatchElementWriter&& patch_element) {
+ DCHECK(patch_element.new_element().offset == current_dst_offset_);
+ current_dst_offset_ = patch_element.new_element().EndOffset();
+ elements_.push_back(std::move(patch_element));
+}
+
+size_t EnsemblePatchWriter::SerializedSize() const {
+ size_t serialized_size =
+ sizeof(PatchHeader) + sizeof(PatchType) + sizeof(uint32_t);
+ for (const auto& patch_element : elements_) {
+ serialized_size += patch_element.SerializedSize();
+ }
+ return serialized_size;
+}
+
+bool EnsemblePatchWriter::SerializeInto(BufferSink* sink) const {
+ DCHECK_NE(patch_type_, PatchType::kUnrecognisedPatch);
+ DCHECK_EQ(current_dst_offset_, header_.new_size);
+ bool ok =
+ sink->PutValue<PatchHeader>(header_) &&
+ sink->PutValue<PatchType>(patch_type_) &&
+ sink->PutValue<uint32_t>(base::checked_cast<uint32_t>(elements_.size()));
+ if (!ok)
+ return false;
+
+ for (const auto& element : elements_) {
+ if (!element.SerializeInto(sink))
+ return false;
+ }
+ return true;
+}
+
+} // namespace zucchini
diff --git a/patch_writer.h b/patch_writer.h
new file mode 100644
index 0000000..a7c3785
--- /dev/null
+++ b/patch_writer.h
@@ -0,0 +1,276 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_PATCH_WRITER_H_
+#define COMPONENTS_ZUCCHINI_PATCH_WRITER_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <map>
+#include <utility>
+#include <vector>
+
+#include "base/logging.h"
+#include "base/macros.h"
+#include "base/optional.h"
+#include "components/zucchini/buffer_sink.h"
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/image_utils.h"
+#include "components/zucchini/patch_utils.h"
+
+namespace zucchini {
+
+namespace patch {
+
+// If sufficient space is available, serializes |element_match| into |sink| and
+// returns true. Otherwise returns false, and |sink| will be in an undefined
+// state.
+bool SerializeElementMatch(const ElementMatch& element_match, BufferSink* sink);
+
+// Returns the size in bytes required to serialize |element_match|.
+size_t SerializedElementMatchSize(const ElementMatch& element_match);
+
+// If sufficient space is available, serializes |buffer| into |sink| and returns
+// true. Otherwise returns false, and |sink| will be in an undefined state.
+bool SerializeBuffer(const std::vector<uint8_t>& buffer, BufferSink* sink);
+
+// Returns the size in bytes required to serialize |buffer|.
+size_t SerializedBufferSize(const std::vector<uint8_t>& buffer);
+
+} // namespace patch
+
+// Each of *Sink classes below has an associated "main type", and performs the
+// following:
+// - Receives multiple "main type" elements (hence "Sink" in the name).
+// - Encodes list of received data, and writes them to internal storage (e.g.,
+// applying delta encoding).
+// - Writes encoded data to BufferSink.
+//
+// Common "core functions" implemented for *Sink classes are:
+// - void PutNext(const MAIN_TYPE& inst): Encodes and writes an instance of
+// MAIN_TYPE to internal storage. Assumptions may be applied to successive
+// |inst| provided.
+// - size_t SerializedSize() const: Returns the serialized size in bytes of
+// internal storage.
+// - bool SerializeInto(BufferSink* sink) const: If |sink| has enough space,
+// serializes internal storage into |sink|, and returns true. Otherwise
+// returns false.
+//
+// Usage of *Sink instances don't mix, and PuttNext() have dissimilar
+// interfaces. Therefore we do not use inheritance to relate *Sink classes,
+// simply implement "core functions" with matching names.
+
+// Sink for equivalences.
+class EquivalenceSink {
+ public:
+ EquivalenceSink();
+ EquivalenceSink(const std::vector<uint8_t>& src_skip,
+ const std::vector<uint8_t>& dst_skip,
+ const std::vector<uint8_t>& copy_count);
+
+ EquivalenceSink(EquivalenceSink&&);
+ ~EquivalenceSink();
+
+ // Core functions.
+ // Equivalences must be given by increasing |Equivalence::dst_offset|.
+ void PutNext(const Equivalence& equivalence);
+ size_t SerializedSize() const;
+ bool SerializeInto(BufferSink* sink) const;
+
+ private:
+ // Offset in source, delta-encoded starting from end of last equivalence, and
+ // stored as signed varint.
+ std::vector<uint8_t> src_skip_;
+ // Offset in destination, delta-encoded starting from end of last equivalence,
+ // and stored as unsigned varint.
+ std::vector<uint8_t> dst_skip_;
+ // Length of equivalence stored as unsigned varint.
+ // TODO(etiennep): Investigate on bias.
+ std::vector<uint8_t> copy_count_;
+
+ offset_t src_offset_ = 0; // Last offset in source.
+ offset_t dst_offset_ = 0; // Last offset in destination.
+};
+
+// Sink for extra data.
+class ExtraDataSink {
+ public:
+ ExtraDataSink();
+ explicit ExtraDataSink(const std::vector<uint8_t>& extra_data);
+ ExtraDataSink(ExtraDataSink&&);
+ ~ExtraDataSink();
+
+ // Core functions.
+ void PutNext(ConstBufferView region);
+ size_t SerializedSize() const;
+ bool SerializeInto(BufferSink* sink) const;
+
+ private:
+ std::vector<uint8_t> extra_data_;
+};
+
+// Sink for raw delta.
+class RawDeltaSink {
+ public:
+ RawDeltaSink();
+ RawDeltaSink(const std::vector<uint8_t>& raw_delta_skip,
+ const std::vector<uint8_t>& raw_delta_diff);
+ RawDeltaSink(RawDeltaSink&&);
+ ~RawDeltaSink();
+
+ // Core functions.
+ // Deltas must be given by increasing |RawDeltaUnit::copy_offset|.
+ void PutNext(const RawDeltaUnit& delta);
+ size_t SerializedSize() const;
+ bool SerializeInto(BufferSink* sink) const;
+
+ private:
+ std::vector<uint8_t> raw_delta_skip_; // Copy offset stating from last delta.
+ std::vector<uint8_t> raw_delta_diff_; // Bytewise difference.
+
+ // We keep track of the compensation needed for next copy offset, taking into
+ // accound delta encoding and bias of -1. Stored delta are biased by -1, so a
+ // sequence of single byte deltas is represented as a string of 0's.
+ offset_t copy_offset_compensation_ = 0;
+};
+
+// Sink for reference delta.
+class ReferenceDeltaSink {
+ public:
+ ReferenceDeltaSink();
+ explicit ReferenceDeltaSink(const std::vector<uint8_t>& reference_delta);
+ ReferenceDeltaSink(ReferenceDeltaSink&&);
+ ~ReferenceDeltaSink();
+
+ // Core functions.
+ void PutNext(int32_t diff);
+ size_t SerializedSize() const;
+ bool SerializeInto(BufferSink* sink) const;
+
+ private:
+ std::vector<uint8_t> reference_delta_;
+};
+
+// Sink for additional targets.
+class TargetSink {
+ public:
+ TargetSink();
+ explicit TargetSink(const std::vector<uint8_t>& extra_targets);
+ TargetSink(TargetSink&&);
+ ~TargetSink();
+
+ // Core functions.
+ // Targets must be given by increasing order.
+ void PutNext(uint32_t target);
+ size_t SerializedSize() const;
+ bool SerializeInto(BufferSink* sink) const;
+
+ private:
+ // Targets are delta-encoded and biaised by 1, stored as unsigned varint.
+ std::vector<uint8_t> extra_targets_;
+
+ // We keep track of the compensation needed for next target, taking into
+ // accound delta encoding and bias of -1.
+ offset_t target_compensation_ = 0;
+};
+
+// Following are utility classes to write structured data forming a patch.
+
+// Utility to write a patch element. A patch element contains all the
+// information necessary to patch a single element. This class
+// provides an interface to individually set different building blocks of data
+// in the patch element.
+class PatchElementWriter {
+ public:
+ PatchElementWriter();
+ explicit PatchElementWriter(ElementMatch element_match);
+ PatchElementWriter(PatchElementWriter&&);
+ ~PatchElementWriter();
+
+ const ElementMatch& element_match() const { return element_match_; }
+ const Element& old_element() const { return element_match_.old_element; }
+ const Element& new_element() const { return element_match_.new_element; }
+
+ // Following methods set individual blocks for this element. Previous
+ // corresponding block is replaced. All streams must be set before call to
+ // SerializedSize() of SerializeInto().
+
+ void SetEquivalenceSink(EquivalenceSink&& equivalences) {
+ equivalences_.emplace(std::move(equivalences));
+ }
+ void SetExtraDataSink(ExtraDataSink&& extra_data) {
+ extra_data_.emplace(std::move(extra_data));
+ }
+ void SetRawDeltaSink(RawDeltaSink&& raw_delta) {
+ raw_delta_.emplace(std::move(raw_delta));
+ }
+ void SetReferenceDeltaSink(ReferenceDeltaSink reference_delta) {
+ reference_delta_.emplace(std::move(reference_delta));
+ }
+ // Set additional targets for pool identified with |pool_tag|.
+ void SetTargetSink(PoolTag pool_tag, TargetSink&& extra_targets) {
+ DCHECK(pool_tag != kNoPoolTag);
+ extra_targets_.emplace(pool_tag, std::move(extra_targets));
+ }
+
+ // Returns the serialized size in bytes of the data this object is holding.
+ size_t SerializedSize() const;
+
+ // If sufficient space is available, serializes data into |sink|, which is at
+ // least SerializedSize() bytes, and returns true. Otherwise returns false.
+ bool SerializeInto(BufferSink* sink) const;
+
+ private:
+ ElementMatch element_match_;
+ base::Optional<EquivalenceSink> equivalences_;
+ base::Optional<ExtraDataSink> extra_data_;
+ base::Optional<RawDeltaSink> raw_delta_;
+ base::Optional<ReferenceDeltaSink> reference_delta_;
+ std::map<PoolTag, TargetSink> extra_targets_;
+};
+
+// Utility to write a Zucchini ensemble patch. An ensemble patch is the
+// concatenation of a patch header with a vector of patch elements.
+class EnsemblePatchWriter {
+ public:
+ explicit EnsemblePatchWriter(const PatchHeader& header);
+ EnsemblePatchWriter(ConstBufferView old_image, ConstBufferView new_image);
+ ~EnsemblePatchWriter();
+
+ void SetPatchType(PatchType patch_type) { patch_type_ = patch_type; }
+
+ // Reserves space for |count| patch elements.
+ void ReserveElements(size_t count) { elements_.reserve(count); }
+
+ // Adds an patch element into the patch. Patch elements must be ordered by
+ // their location in the new image file.
+ void AddElement(PatchElementWriter&& patch_element);
+
+ // Returns the serialized size in bytes of the data this object is holding.
+ size_t SerializedSize() const;
+
+ // If sufficient space is available, serializes data into |sink|, which is at
+ // least SerializedSize() bytes, and returns true. Otherwise returns false.
+ bool SerializeInto(BufferSink* sink) const;
+
+ // If sufficient space is available, serializes data into |buffer|, which is
+ // at least SerializedSize() bytes, and returns true. Otherwise returns false.
+ bool SerializeInto(MutableBufferView buffer) const {
+ BufferSink sink(buffer);
+ return SerializeInto(&sink);
+ }
+
+ private:
+ PatchHeader header_;
+ PatchType patch_type_ = PatchType::kUnrecognisedPatch;
+ std::vector<PatchElementWriter> elements_;
+ offset_t current_dst_offset_ = 0;
+
+ DISALLOW_COPY_AND_ASSIGN(EnsemblePatchWriter);
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_PATCH_WRITER_H_
diff --git a/reference_set.cc b/reference_set.cc
new file mode 100644
index 0000000..963e814
--- /dev/null
+++ b/reference_set.cc
@@ -0,0 +1,68 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/reference_set.h"
+
+#include <algorithm>
+#include <iterator>
+
+#include "base/logging.h"
+#include "base/macros.h"
+#include "components/zucchini/target_pool.h"
+
+namespace zucchini {
+
+namespace {
+
+// Returns true if |refs| is sorted by location.
+bool IsReferenceListSorted(const std::vector<IndirectReference>& refs) {
+ return std::is_sorted(
+ refs.begin(), refs.end(),
+ [](const IndirectReference& a, const IndirectReference& b) {
+ return a.location < b.location;
+ });
+}
+
+} // namespace
+
+ReferenceSet::ReferenceSet(const ReferenceTypeTraits& traits,
+ const TargetPool& target_pool)
+ : traits_(traits), target_pool_(target_pool) {}
+ReferenceSet::ReferenceSet(ReferenceSet&&) = default;
+ReferenceSet::~ReferenceSet() = default;
+
+void ReferenceSet::InitReferences(ReferenceReader&& ref_reader) {
+ DCHECK(references_.empty());
+ for (auto ref = ref_reader.GetNext(); ref.has_value();
+ ref = ref_reader.GetNext()) {
+ references_.push_back(
+ {ref->location, target_pool_.KeyForOffset(ref->target)});
+ }
+ DCHECK(IsReferenceListSorted(references_));
+}
+
+void ReferenceSet::InitReferences(const std::vector<Reference>& refs) {
+ DCHECK(references_.empty());
+ references_.reserve(refs.size());
+ std::transform(refs.begin(), refs.end(), std::back_inserter(references_),
+ [&](const Reference& ref) -> IndirectReference {
+ return {ref.location, target_pool_.KeyForOffset(ref.target)};
+ });
+ DCHECK(IsReferenceListSorted(references_));
+}
+
+IndirectReference ReferenceSet::at(offset_t offset) const {
+ auto pos =
+ std::upper_bound(references_.begin(), references_.end(), offset,
+ [](offset_t offset, const IndirectReference& ref) {
+ return offset < ref.location;
+ });
+
+ DCHECK(pos != references_.begin()); // Iterators.
+ --pos;
+ DCHECK_LT(offset, pos->location + width());
+ return *pos;
+}
+
+} // namespace zucchini
diff --git a/reference_set.h b/reference_set.h
new file mode 100644
index 0000000..2ca7202
--- /dev/null
+++ b/reference_set.h
@@ -0,0 +1,66 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_REFERENCE_SET_H_
+#define COMPONENTS_ZUCCHINI_REFERENCE_SET_H_
+
+#include <stddef.h>
+
+#include <vector>
+
+#include "components/zucchini/image_utils.h"
+
+namespace zucchini {
+
+class TargetPool;
+
+// Container of distinct indirect references of one type, along with traits,
+// only used during patch generation.
+class ReferenceSet {
+ public:
+ using const_iterator = std::vector<IndirectReference>::const_iterator;
+
+ // |traits| specifies the reference represented. |target_pool| specifies
+ // common targets shared by all reference represented, and mediates target
+ // translation between offsets and indexes.
+ ReferenceSet(const ReferenceTypeTraits& traits,
+ const TargetPool& target_pool);
+ ReferenceSet(const ReferenceSet&) = delete;
+ ReferenceSet(ReferenceSet&&);
+ ~ReferenceSet();
+
+ // Either one of the initializers below should be called exactly once. These
+ // insert all references from |ref_reader/refs| into this class. The targets
+ // of these references must be in |target_pool_|.
+ void InitReferences(ReferenceReader&& ref_reader);
+ void InitReferences(const std::vector<Reference>& refs);
+
+ const std::vector<IndirectReference>& references() const {
+ return references_;
+ }
+ const ReferenceTypeTraits& traits() const { return traits_; }
+ const TargetPool& target_pool() const { return target_pool_; }
+ TypeTag type_tag() const { return traits_.type_tag; }
+ PoolTag pool_tag() const { return traits_.pool_tag; }
+ offset_t width() const { return traits_.width; }
+
+ // Looks up the IndirectReference by an |offset| that it spans. |offset| is
+ // assumed to be valid, i.e., |offset| must be spanned by some
+ // IndirectReference in |references_|.
+ IndirectReference at(offset_t offset) const;
+
+ size_t size() const { return references_.size(); }
+ const_iterator begin() const { return references_.begin(); }
+ const_iterator end() const { return references_.end(); }
+
+ private:
+ ReferenceTypeTraits traits_;
+ const TargetPool& target_pool_;
+ // List of distinct IndirectReference instances sorted by location.
+ std::vector<IndirectReference> references_;
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_REFERENCE_SET_H_
diff --git a/reference_set_unittest.cc b/reference_set_unittest.cc
new file mode 100644
index 0000000..b4ccceb
--- /dev/null
+++ b/reference_set_unittest.cc
@@ -0,0 +1,51 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/reference_set.h"
+
+#include <vector>
+
+#include "components/zucchini/image_utils.h"
+#include "components/zucchini/target_pool.h"
+#include "components/zucchini/test_reference_reader.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+namespace {
+
+constexpr offset_t kWidth = 2U;
+
+} // namespace
+
+class ReferenceSetTest : public testing::Test {
+ protected:
+ // For simplicity, |target_pool_| has no type info (not needed here).
+ TargetPool target_pool_ = TargetPool{{0, 2, 3, 5}};
+ ReferenceSet reference_set_ =
+ ReferenceSet{{kWidth, TypeTag(0), PoolTag(0)}, target_pool_};
+};
+
+TEST_F(ReferenceSetTest, InitReferencesFromReader) {
+ EXPECT_EQ(std::vector<IndirectReference>(), reference_set_.references());
+ EXPECT_EQ(0U, reference_set_.size());
+ std::vector<Reference> references = {{10, 0}, {12, 2}, {14, 5}};
+ reference_set_.InitReferences(TestReferenceReader(references));
+ EXPECT_EQ(std::vector<IndirectReference>({{10, 0}, {12, 1}, {14, 3}}),
+ reference_set_.references());
+ EXPECT_EQ(3U, reference_set_.size());
+}
+
+TEST_F(ReferenceSetTest, At) {
+ reference_set_.InitReferences({{10, 0}, {12, 2}, {15, 5}});
+ // Each references has kWidth = 2, so check all bytes covered.
+ EXPECT_EQ(IndirectReference({10, 0}), reference_set_.at(10));
+ EXPECT_EQ(IndirectReference({10, 0}), reference_set_.at(11));
+ EXPECT_EQ(IndirectReference({12, 1}), reference_set_.at(12));
+ EXPECT_EQ(IndirectReference({12, 1}), reference_set_.at(13));
+ EXPECT_EQ(IndirectReference({15, 3}), reference_set_.at(15));
+ EXPECT_EQ(IndirectReference({15, 3}), reference_set_.at(16));
+}
+
+} // namespace zucchini
diff --git a/rel32_finder.cc b/rel32_finder.cc
new file mode 100644
index 0000000..9a07ade
--- /dev/null
+++ b/rel32_finder.cc
@@ -0,0 +1,137 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/rel32_finder.h"
+
+#include <algorithm>
+
+namespace zucchini {
+
+/******** Abs32GapFinder ********/
+
+Abs32GapFinder::Abs32GapFinder(ConstBufferView image,
+ ConstBufferView region,
+ const std::vector<offset_t>& abs32_locations,
+ size_t abs32_width)
+ : base_(image.begin()),
+ region_end_(region.end()),
+ abs32_end_(abs32_locations.end()),
+ abs32_width_(abs32_width) {
+ DCHECK_GT(abs32_width, size_t(0));
+ DCHECK_GE(region.begin(), image.begin());
+ DCHECK_LE(region.end(), image.end());
+
+ const offset_t begin_offset = region.begin() - image.begin();
+ // Find the first |abs32_current_| with |*abs32_current_ >= begin_offset|.
+ abs32_current_ = std::lower_bound(abs32_locations.begin(),
+ abs32_locations.end(), begin_offset);
+
+ // Find lower boundary, accounting for possibility that |abs32_current_[-1]|
+ // may straddle across |region.begin()|.
+ current_lo_ = region.begin();
+ if (abs32_current_ > abs32_locations.begin()) {
+ current_lo_ = std::max(current_lo_,
+ image.begin() + abs32_current_[-1] + abs32_width_);
+ }
+}
+
+Abs32GapFinder::~Abs32GapFinder() = default;
+
+base::Optional<ConstBufferView> Abs32GapFinder::GetNext() {
+ // Iterate over |[abs32_current_, abs32_end_)| and emit segments.
+ while (abs32_current_ != abs32_end_ &&
+ base_ + *abs32_current_ < region_end_) {
+ ConstBufferView::const_iterator hi = base_ + *abs32_current_;
+ ConstBufferView gap = ConstBufferView::FromRange(current_lo_, hi);
+ current_lo_ = hi + abs32_width_;
+ ++abs32_current_;
+ if (!gap.empty())
+ return gap;
+ }
+ // Emit final segment.
+ if (current_lo_ < region_end_) {
+ ConstBufferView gap = ConstBufferView::FromRange(current_lo_, region_end_);
+ current_lo_ = region_end_;
+ return gap;
+ }
+ return base::nullopt;
+}
+
+/******** Rel32Finder ********/
+
+Rel32Finder::Rel32Finder() = default;
+
+Rel32Finder::Rel32Finder(ConstBufferView region)
+ : region_(region), next_cursor_(region_.begin()) {}
+
+Rel32Finder::~Rel32Finder() = default;
+
+/******** Rel32FinderX86 ********/
+
+ConstBufferView Rel32FinderX86::Scan(ConstBufferView region) {
+ ConstBufferView::const_iterator cursor = region.begin();
+ while (cursor < region.end()) {
+ // Heuristic rel32 detection by looking for opcodes that use them.
+ if (cursor + 5 <= region.end()) {
+ if (cursor[0] == 0xE8 || cursor[0] == 0xE9) { // JMP rel32; CALL rel32
+ rel32_ = {cursor + 1, false};
+ return ConstBufferView::FromRange(cursor, rel32_.location + 4);
+ }
+ }
+ if (cursor + 6 <= region.end()) {
+ if (cursor[0] == 0x0F && (cursor[1] & 0xF0) == 0x80) { // Jcc long form
+ rel32_ = {cursor + 2, false};
+ return ConstBufferView::FromRange(cursor, rel32_.location + 4);
+ }
+ }
+ ++cursor;
+ }
+ return {region.end(), 0};
+}
+
+/******** Rel32FinderX64 ********/
+
+ConstBufferView Rel32FinderX64::Scan(ConstBufferView region) {
+ ConstBufferView::const_iterator cursor = region.begin();
+ while (cursor < region.end()) {
+ // Heuristic rel32 detection by looking for opcodes that use them.
+ if (cursor + 5 <= region.end()) {
+ if (cursor[0] == 0xE8 || cursor[0] == 0xE9) { // JMP rel32; CALL rel32
+ rel32_ = {cursor + 1, false};
+ return ConstBufferView::FromRange(cursor, rel32_.location + 4);
+ }
+ }
+ if (cursor + 6 <= region.end()) {
+ if (cursor[0] == 0x0F && (cursor[1] & 0xF0) == 0x80) { // Jcc long form
+ rel32_ = {cursor + 2, false};
+ return ConstBufferView::FromRange(cursor, rel32_.location + 4);
+ } else if ((cursor[0] == 0xFF &&
+ (cursor[1] == 0x15 || cursor[1] == 0x25)) ||
+ ((cursor[0] == 0x89 || cursor[0] == 0x8B ||
+ cursor[0] == 0x8D) &&
+ (cursor[1] & 0xC7) == 0x05)) {
+ // 6-byte instructions:
+ // [2-byte opcode] [disp32]:
+ // Opcode
+ // FF 15: CALL QWORD PTR [rip+disp32]
+ // FF 25: JMP QWORD PTR [rip+disp32]
+ //
+ // [1-byte opcode] [ModR/M] [disp32]:
+ // Opcode
+ // 89: MOV DWORD PTR [rip+disp32],reg
+ // 8B: MOV reg,DWORD PTR [rip+disp32]
+ // 8D: LEA reg,[rip+disp32]
+ // ModR/M : MMRRRMMM
+ // MM = 00 & MMM = 101 => rip+disp32
+ // RRR: selects reg operand from [eax|ecx|...|edi]
+ rel32_ = {cursor + 2, true};
+ return ConstBufferView::FromRange(cursor, rel32_.location + 4);
+ }
+ }
+ ++cursor;
+ }
+ return {region.end(), 0};
+}
+
+} // namespace zucchini
diff --git a/rel32_finder.h b/rel32_finder.h
new file mode 100644
index 0000000..798983e
--- /dev/null
+++ b/rel32_finder.h
@@ -0,0 +1,189 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_REL32_FINDER_H_
+#define COMPONENTS_ZUCCHINI_REL32_FINDER_H_
+
+#include <stddef.h>
+
+#include <vector>
+
+#include "base/logging.h"
+#include "base/macros.h"
+#include "base/optional.h"
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/image_utils.h"
+
+namespace zucchini {
+
+// See README.md for definitions on abs32 and rel32 references. We assume the
+// following:
+// - Abs32 locations have fixed lengths, and never overlap.
+// - Rel32 locations can be reasonably identified by heuristically disassembling
+// machine code.
+// - Rel32 locations never overlap with each other, and never with abs32
+// locations.
+
+// Abs32GapFinder is a class that iterates over all contiguous gaps in |region|
+// that lie outside of |abs32_locations| elements, each spanning |abs_width|
+// bytes. For example, given
+// region = [base_ + 8, base_ + 25),
+// abs32_locations = {2, 6, 15, 20, 27},
+// abs32_width_ = 4,
+// we obtain the following:
+// 111111111122222222223 -> offsets
+// 0123456789012345678901234567890
+// ........*****************...... -> region = *
+// ^ ^ ^ ^ ^ -> abs32 locations
+// aaaaaaaa aaaa aaaa aaaa -> abs32 locations with width
+// ........--*****----*----*...... -> region excluding abs32 -> 3 gaps
+// The resulting gaps (must be non-empty) are:
+// [10, 15), [19, 20), [24, 25).
+// These gaps can then be passed to Rel32Finder (below) to find rel32 references
+// that are guaranteed to not overlap with any abs32 references.
+class Abs32GapFinder {
+ public:
+ // |abs32_locations| is a sorted list of non-overlapping abs32 reference
+ // locations in |image|, each spanning |abs32_width| bytes. Gaps are searched
+ // in |region|, which must be part of |image|.
+ Abs32GapFinder(ConstBufferView image,
+ ConstBufferView region,
+ const std::vector<offset_t>& abs32_locations,
+ size_t abs32_width);
+ ~Abs32GapFinder();
+
+ // Returns the next available gap, or nullopt if exhausted.
+ base::Optional<ConstBufferView> GetNext();
+
+ private:
+ const ConstBufferView::const_iterator base_;
+ const ConstBufferView::const_iterator region_end_;
+ ConstBufferView::const_iterator current_lo_;
+ std::vector<offset_t>::const_iterator abs32_current_;
+ std::vector<offset_t>::const_iterator abs32_end_;
+ size_t abs32_width_;
+
+ DISALLOW_COPY_AND_ASSIGN(Abs32GapFinder);
+};
+
+// A class to parse executable bytes of an image to find rel32 locations.
+// Architecture-specific parse details are delegated to inherited classes.
+// This is typically used along with Abs32GapFinder to find search regions.
+// The caller may filter rel32 locations, based on rel32 targets.
+class Rel32Finder {
+ public:
+ Rel32Finder();
+ // |region| is the region being scanned for rel32 references.
+ explicit Rel32Finder(ConstBufferView region);
+ virtual ~Rel32Finder();
+
+ // Reset object to start scanning for rel32 references in |region|.
+ void Reset(ConstBufferView region) {
+ next_cursor_ = region.begin();
+ region_ = region;
+ }
+
+ // Accept the last reference found. Next call to FindNext() will scan starting
+ // beyond that reference, instead of the current search position.
+ void Accept() { region_.seek(next_cursor_); }
+
+ // Accessors for unittest.
+ ConstBufferView::const_iterator next_cursor() const { return next_cursor_; }
+ ConstBufferView region() const { return region_; }
+
+ protected:
+ // Scans for the next rel32 reference. If a reference is found, advances the
+ // search position beyond it and returns true. Otherwise, moves the search
+ // position to the end of the region and returns false.
+ bool FindNext() {
+ ConstBufferView result = Scan(region_);
+ region_.seek(result.begin());
+ next_cursor_ = result.end();
+ if (region_.empty())
+ return false;
+ region_.remove_prefix(1);
+ DCHECK_GE(next_cursor_, region_.begin());
+ DCHECK_LE(next_cursor_, region_.end());
+ return true;
+ }
+
+ // Architecture-specific rel32 reference detection, which scans executable
+ // bytes given by |region|. For each rel32 reference found, the implementation
+ // should cache the necessary data to be retrieved via accessors and return a
+ // region starting at the current search position, and ending beyond the
+ // reference that was just found, or an empty region starting at the end of
+ // the search region if no more reference is found. By default, the next time
+ // FindNext() is called, |region| will start at the current search position,
+ // unless Accept() was called, in which case |region| will start beyond the
+ // last reference.
+ virtual ConstBufferView Scan(ConstBufferView region) = 0;
+
+ private:
+ ConstBufferView region_;
+ ConstBufferView::const_iterator next_cursor_ = nullptr;
+
+ DISALLOW_COPY_AND_ASSIGN(Rel32Finder);
+};
+
+// Parsing for X86 or X64: we perform naive scan for opcodes that have rel32 as
+// an argument, and disregard instruction alignment.
+class Rel32FinderIntel : public Rel32Finder {
+ public:
+ // Struct to store GetNext() results.
+ struct Result {
+ ConstBufferView::const_iterator location;
+
+ // Some references must have their target in the same section as location,
+ // which we use this to heuristically reject rel32 reference candidates.
+ // When true, this constraint is relaxed.
+ bool can_point_outside_section;
+ };
+
+ using Rel32Finder::Rel32Finder;
+
+ // Returns the next available Result, or nullopt if exhausted.
+ base::Optional<Result> GetNext() {
+ if (FindNext())
+ return rel32_;
+ return base::nullopt;
+ }
+
+ protected:
+ // Cached results.
+ Result rel32_;
+
+ // Rel32Finder:
+ ConstBufferView Scan(ConstBufferView region) override = 0;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(Rel32FinderIntel);
+};
+
+// X86 instructions.
+class Rel32FinderX86 : public Rel32FinderIntel {
+ public:
+ using Rel32FinderIntel::Rel32FinderIntel;
+
+ private:
+ // Rel32Finder:
+ ConstBufferView Scan(ConstBufferView region) override;
+
+ DISALLOW_COPY_AND_ASSIGN(Rel32FinderX86);
+};
+
+// X64 instructions.
+class Rel32FinderX64 : public Rel32FinderIntel {
+ public:
+ using Rel32FinderIntel::Rel32FinderIntel;
+
+ private:
+ // Rel32Finder:
+ ConstBufferView Scan(ConstBufferView region) override;
+
+ DISALLOW_COPY_AND_ASSIGN(Rel32FinderX64);
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_REL32_FINDER_H_
diff --git a/rel32_finder_unittest.cc b/rel32_finder_unittest.cc
new file mode 100644
index 0000000..2da76ad
--- /dev/null
+++ b/rel32_finder_unittest.cc
@@ -0,0 +1,353 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/rel32_finder.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <iterator>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "base/format_macros.h"
+#include "base/logging.h"
+#include "base/strings/stringprintf.h"
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/image_utils.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+TEST(Abs32GapFinderTest, All) {
+ const size_t kRegionTotal = 99;
+ std::vector<uint8_t> buffer(kRegionTotal);
+ ConstBufferView image(buffer.data(), buffer.size());
+
+ // Common test code that returns the resulting segments as a string.
+ auto run_test = [&](size_t rlo, size_t rhi,
+ std::vector<offset_t> abs32_locations,
+ std::ptrdiff_t abs32_width) -> std::string {
+ CHECK_LE(rlo, kRegionTotal);
+ CHECK_LE(rhi, kRegionTotal);
+ CHECK(std::is_sorted(abs32_locations.begin(), abs32_locations.end()));
+ CHECK_GT(abs32_width, 0);
+ ConstBufferView region =
+ ConstBufferView::FromRange(image.begin() + rlo, image.begin() + rhi);
+ Abs32GapFinder gap_finder(image, region, abs32_locations, abs32_width);
+
+ std::string out_str;
+ for (auto gap = gap_finder.GetNext(); gap; gap = gap_finder.GetNext()) {
+ size_t lo = static_cast<size_t>(gap->begin() - image.begin());
+ size_t hi = static_cast<size_t>(gap->end() - image.begin());
+ out_str.append(base::StringPrintf("[%" PRIuS ",%" PRIuS ")", lo, hi));
+ }
+ return out_str;
+ };
+
+ // Empty regions yield empty segments.
+ EXPECT_EQ("", run_test(0, 0, std::vector<offset_t>(), 4));
+ EXPECT_EQ("", run_test(9, 9, std::vector<offset_t>(), 4));
+ EXPECT_EQ("", run_test(8, 8, {8}, 4));
+ EXPECT_EQ("", run_test(8, 8, {0, 12}, 4));
+
+ // If no abs32 locations exist then the segment is the main range.
+ EXPECT_EQ("[0,99)", run_test(0, 99, std::vector<offset_t>(), 4));
+ EXPECT_EQ("[20,21)", run_test(20, 21, std::vector<offset_t>(), 4));
+ EXPECT_EQ("[51,55)", run_test(51, 55, std::vector<offset_t>(), 4));
+
+ // abs32 locations found near start of main range.
+ EXPECT_EQ("[10,20)", run_test(10, 20, {5}, 4));
+ EXPECT_EQ("[10,20)", run_test(10, 20, {6}, 4));
+ EXPECT_EQ("[11,20)", run_test(10, 20, {7}, 4));
+ EXPECT_EQ("[12,20)", run_test(10, 20, {8}, 4));
+ EXPECT_EQ("[13,20)", run_test(10, 20, {9}, 4));
+ EXPECT_EQ("[14,20)", run_test(10, 20, {10}, 4));
+ EXPECT_EQ("[10,11)[15,20)", run_test(10, 20, {11}, 4));
+
+ // abs32 locations found near end of main range.
+ EXPECT_EQ("[10,15)[19,20)", run_test(10, 20, {15}, 4));
+ EXPECT_EQ("[10,16)", run_test(10, 20, {16}, 4));
+ EXPECT_EQ("[10,17)", run_test(10, 20, {17}, 4));
+ EXPECT_EQ("[10,18)", run_test(10, 20, {18}, 4));
+ EXPECT_EQ("[10,19)", run_test(10, 20, {19}, 4));
+ EXPECT_EQ("[10,20)", run_test(10, 20, {20}, 4));
+ EXPECT_EQ("[10,20)", run_test(10, 20, {21}, 4));
+
+ // Main range completely eclipsed by abs32 location.
+ EXPECT_EQ("", run_test(10, 11, {7}, 4));
+ EXPECT_EQ("", run_test(10, 11, {8}, 4));
+ EXPECT_EQ("", run_test(10, 11, {9}, 4));
+ EXPECT_EQ("", run_test(10, 11, {10}, 4));
+ EXPECT_EQ("", run_test(10, 12, {8}, 4));
+ EXPECT_EQ("", run_test(10, 12, {9}, 4));
+ EXPECT_EQ("", run_test(10, 12, {10}, 4));
+ EXPECT_EQ("", run_test(10, 13, {9}, 4));
+ EXPECT_EQ("", run_test(10, 13, {10}, 4));
+ EXPECT_EQ("", run_test(10, 14, {10}, 4));
+ EXPECT_EQ("", run_test(10, 14, {8, 12}, 4));
+
+ // Partial eclipses.
+ EXPECT_EQ("[24,25)", run_test(20, 25, {20}, 4));
+ EXPECT_EQ("[20,21)", run_test(20, 25, {21}, 4));
+ EXPECT_EQ("[20,21)[25,26)", run_test(20, 26, {21}, 4));
+
+ // abs32 location outside main range.
+ EXPECT_EQ("[40,60)", run_test(40, 60, {36, 60}, 4));
+ EXPECT_EQ("[41,61)", run_test(41, 61, {0, 10, 20, 30, 34, 62, 68, 80}, 4));
+
+ // Change abs32 width.
+ EXPECT_EQ("[10,11)[12,14)[16,19)", run_test(10, 20, {9, 11, 14, 15, 19}, 1));
+ EXPECT_EQ("", run_test(10, 11, {10}, 1));
+ EXPECT_EQ("[18,23)[29,31)", run_test(17, 31, {15, 23, 26, 31}, 3));
+ EXPECT_EQ("[17,22)[25,26)[29,30)", run_test(17, 31, {14, 22, 26, 30}, 3));
+ EXPECT_EQ("[10,11)[19,20)", run_test(10, 20, {11}, 8));
+
+ // Mixed cases with abs32 width = 4.
+ EXPECT_EQ("[10,15)[19,20)[24,25)", run_test(8, 25, {2, 6, 15, 20, 27}, 4));
+ EXPECT_EQ("[0,25)[29,45)[49,50)", run_test(0, 50, {25, 45}, 4));
+ EXPECT_EQ("[10,20)[28,50)", run_test(10, 50, {20, 24}, 4));
+ EXPECT_EQ("[49,50)[54,60)[64,70)[74,80)[84,87)",
+ run_test(49, 87, {10, 20, 30, 40, 50, 60, 70, 80, 90}, 4));
+ EXPECT_EQ("[0,10)[14,20)[24,25)[29,50)", run_test(0, 50, {10, 20, 25}, 4));
+}
+
+namespace {
+
+// A mock Rel32Finder to inject next search result on Scan().
+class TestRel32Finder : public Rel32Finder {
+ public:
+ using Rel32Finder::Rel32Finder;
+
+ bool GetNext() { return Rel32Finder::FindNext(); }
+
+ // Rel32Finder:
+ ConstBufferView Scan(ConstBufferView region) override { return next_result; }
+
+ ConstBufferView next_result;
+};
+
+} // namespace
+
+TEST(Rel32FinderTest, Scan) {
+ const size_t kRegionTotal = 99;
+ std::vector<uint8_t> buffer(kRegionTotal);
+ ConstBufferView image(buffer.data(), buffer.size());
+
+ TestRel32Finder finder(image);
+
+ auto check_finder_state = [&](const TestRel32Finder& finder,
+ size_t expected_cursor,
+ size_t expected_next_cursor) {
+ CHECK_LE(expected_cursor, kRegionTotal);
+ CHECK_LE(expected_next_cursor, kRegionTotal);
+
+ EXPECT_EQ(image.begin() + expected_cursor, finder.region().begin());
+ EXPECT_EQ(image.begin() + expected_next_cursor, finder.next_cursor());
+ };
+
+ check_finder_state(finder, 0, 0);
+
+ finder.next_result = ConstBufferView(image.begin() + 0, 1);
+ EXPECT_TRUE(finder.GetNext());
+ check_finder_state(finder, 1, 1);
+
+ finder.next_result = ConstBufferView(image.begin() + 1, 1);
+ EXPECT_TRUE(finder.GetNext());
+ check_finder_state(finder, 2, 2);
+
+ finder.next_result = ConstBufferView(image.begin() + 4, 2);
+ EXPECT_TRUE(finder.GetNext());
+ check_finder_state(finder, 5, 6);
+ finder.Accept();
+ check_finder_state(finder, 6, 6);
+
+ finder.next_result = ConstBufferView(image.begin() + 6, 1);
+ EXPECT_TRUE(finder.GetNext());
+ check_finder_state(finder, 7, 7);
+
+ finder.next_result = ConstBufferView(image.begin() + 7, 1);
+ EXPECT_TRUE(finder.GetNext());
+ check_finder_state(finder, 8, 8);
+
+ finder.next_result = ConstBufferView(image.begin() + 98, 1);
+ EXPECT_TRUE(finder.GetNext());
+ check_finder_state(finder, 99, 99);
+
+ finder.next_result = ConstBufferView(image.end(), 0);
+ EXPECT_FALSE(finder.GetNext());
+ check_finder_state(finder, 99, 99);
+}
+
+TEST(Rel32FinderX86Test, FindNext) {
+ constexpr uint8_t data[] = {
+ 0x55, // 00: push ebp
+ 0x8B, 0xEC, // 01: mov ebp,esp
+ 0xE8, 0x00, 0x00, 0x00, 0x00, // 03: call 08
+ 0xE9, 0x00, 0x00, 0x00, 0x00, // 08: jmp 0D
+ 0x0F, 0x80, 0x00, 0x00, 0x00, 0x00, // 0D: jo 13
+ 0x0F, 0x81, 0x00, 0x00, 0x00, 0x00, // 13: jno 19
+ 0x0F, 0x82, 0x00, 0x00, 0x00, 0x00, // 19: jb 1F
+ 0x0F, 0x83, 0x00, 0x00, 0x00, 0x00, // 1F: jae 25
+ 0x0F, 0x84, 0x00, 0x00, 0x00, 0x00, // 25: je 2B
+ 0x0F, 0x85, 0x00, 0x00, 0x00, 0x00, // 2B: jne 31
+ 0x0F, 0x86, 0x00, 0x00, 0x00, 0x00, // 31: jbe 37
+ 0x0F, 0x87, 0x00, 0x00, 0x00, 0x00, // 37: ja 3D
+ 0x0F, 0x88, 0x00, 0x00, 0x00, 0x00, // 3D: js 43
+ 0x0F, 0x89, 0x00, 0x00, 0x00, 0x00, // 43: jns 49
+ 0x0F, 0x8A, 0x00, 0x00, 0x00, 0x00, // 49: jp 4F
+ 0x0F, 0x8B, 0x00, 0x00, 0x00, 0x00, // 4F: jnp 55
+ 0x0F, 0x8C, 0x00, 0x00, 0x00, 0x00, // 55: jl 5B
+ 0x0F, 0x8D, 0x00, 0x00, 0x00, 0x00, // 5B: jge 61
+ 0x0F, 0x8E, 0x00, 0x00, 0x00, 0x00, // 61: jle 67
+ 0x0F, 0x8F, 0x00, 0x00, 0x00, 0x00, // 67: jg 6D
+ 0x5D, // 6D: pop ebp
+ 0xC3, // C3: ret
+ };
+
+ ConstBufferView image =
+ ConstBufferView::FromRange(std::begin(data), std::end(data));
+
+ Rel32FinderX86 rel_finder(image);
+
+ // List of expected locations as pairs of (cursor position, rel32 position).
+ std::vector<std::pair<size_t, size_t>> expected_locations = {
+ {0x04, 0x04}, {0x09, 0x09}, {0x0E, 0x0F}, {0x14, 0x15}, {0x1A, 0x1B},
+ {0x20, 0x21}, {0x26, 0x27}, {0x2C, 0x2D}, {0x32, 0x33}, {0x38, 0x39},
+ {0x3E, 0x3F}, {0x44, 0x45}, {0x4A, 0x4B}, {0x50, 0x51}, {0x56, 0x57},
+ {0x5C, 0x5D}, {0x62, 0x63}, {0x68, 0x69},
+ };
+
+ for (auto location : expected_locations) {
+ auto result = rel_finder.GetNext();
+ EXPECT_TRUE(result.has_value());
+
+ EXPECT_EQ(location.first,
+ size_t(rel_finder.region().begin() - image.begin()));
+ EXPECT_EQ(location.second, size_t(result->location - image.begin()));
+ EXPECT_EQ(result->location + 4, rel_finder.next_cursor());
+ EXPECT_FALSE(result->can_point_outside_section);
+ rel_finder.Accept();
+ }
+ EXPECT_EQ(base::nullopt, rel_finder.GetNext());
+}
+
+TEST(Rel32FinderX86Test, Accept) {
+ constexpr uint8_t data[] = {
+ 0xB9, 0x00, 0x00, 0x00, 0xE9, // 00: mov E9000000
+ 0xE8, 0x00, 0x00, 0x00, 0xE9, // 05: call E900000A
+ 0xE8, 0x00, 0x00, 0x00, 0xE9, // 0A: call E900000F
+ };
+
+ ConstBufferView image =
+ ConstBufferView::FromRange(std::begin(data), std::end(data));
+
+ auto next_location = [&](Rel32FinderX86& rel_finder) {
+ auto result = rel_finder.GetNext();
+ EXPECT_TRUE(result.has_value());
+ return result->location - image.begin();
+ };
+
+ Rel32FinderX86 rel_finder(image);
+
+ EXPECT_EQ(0x05, next_location(rel_finder)); // False positive.
+ rel_finder.Accept();
+ // False negative: shadowed by 0x05
+ // EXPECT_EQ(0x06, next_location(rel_finder));
+ EXPECT_EQ(0x0A, next_location(rel_finder)); // False positive.
+ EXPECT_EQ(0x0B, next_location(rel_finder)); // Found if 0x0A is discarded.
+}
+
+TEST(Rel32FinderX64Test, FindNext) {
+ constexpr uint8_t data[] = {
+ 0x55, // 00: push ebp
+ 0x8B, 0xEC, // 01: mov ebp,esp
+ 0xE8, 0x00, 0x00, 0x00, 0x00, // 03: call 08
+ 0xE9, 0x00, 0x00, 0x00, 0x00, // 08: jmp 0D
+ 0x0F, 0x80, 0x00, 0x00, 0x00, 0x00, // 0D: jo 13
+ 0x0F, 0x81, 0x00, 0x00, 0x00, 0x00, // 13: jno 19
+ 0x0F, 0x82, 0x00, 0x00, 0x00, 0x00, // 19: jb 1F
+ 0x0F, 0x83, 0x00, 0x00, 0x00, 0x00, // 1F: jae 25
+ 0x0F, 0x84, 0x00, 0x00, 0x00, 0x00, // 25: je 2B
+ 0x0F, 0x85, 0x00, 0x00, 0x00, 0x00, // 2B: jne 31
+ 0x0F, 0x86, 0x00, 0x00, 0x00, 0x00, // 31: jbe 37
+ 0x0F, 0x87, 0x00, 0x00, 0x00, 0x00, // 37: ja 3D
+ 0x0F, 0x88, 0x00, 0x00, 0x00, 0x00, // 3D: js 43
+ 0x0F, 0x89, 0x00, 0x00, 0x00, 0x00, // 43: jns 49
+ 0x0F, 0x8A, 0x00, 0x00, 0x00, 0x00, // 49: jp 4F
+ 0x0F, 0x8B, 0x00, 0x00, 0x00, 0x00, // 4F: jnp 55
+ 0x0F, 0x8C, 0x00, 0x00, 0x00, 0x00, // 55: jl 5B
+ 0x0F, 0x8D, 0x00, 0x00, 0x00, 0x00, // 5B: jge 61
+ 0x0F, 0x8E, 0x00, 0x00, 0x00, 0x00, // 61: jle 67
+ 0x0F, 0x8F, 0x00, 0x00, 0x00, 0x00, // 67: jg 6F
+ 0xFF, 0x15, 0x00, 0x00, 0x00, 0x00, // 6D: call [rip+00]
+ 0xFF, 0x25, 0x00, 0x00, 0x00, 0x00, // 73: jmp [rip+00]
+ 0x8B, 0x05, 0x00, 0x00, 0x00, 0x00, // 79: mov eax,[rip+00]
+ 0x8B, 0x3D, 0x00, 0x00, 0x00, 0x00, // 7F: mov edi,[rip+00]
+ 0x8D, 0x05, 0x00, 0x00, 0x00, 0x00, // 85: lea eax,[rip+00]
+ 0x8D, 0x3D, 0x00, 0x00, 0x00, 0x00, // 8B: lea edi,[rip+00]
+ 0x48, 0x8B, 0x05, 0x00, 0x00, 0x00, 0x00, // 91: mov rax,[rip+00]
+ 0x48, 0x8B, 0x3D, 0x00, 0x00, 0x00, 0x00, // 98: mov rdi,[rip+00]
+ 0x48, 0x8D, 0x05, 0x00, 0x00, 0x00, 0x00, // 9F: lea rax,[rip+00]
+ 0x48, 0x8D, 0x3D, 0x00, 0x00, 0x00, 0x00, // A6: lea rdi,[rip+00]
+ 0x4C, 0x8B, 0x05, 0x00, 0x00, 0x00, 0x00, // AD: mov r8,[rip+00]
+ 0x4C, 0x8B, 0x3D, 0x00, 0x00, 0x00, 0x00, // B4: mov r15,[rip+00]
+ 0x4C, 0x8D, 0x05, 0x00, 0x00, 0x00, 0x00, // BB: lea r8,[rip+00]
+ 0x4C, 0x8D, 0x3D, 0x00, 0x00, 0x00, 0x00, // C2: lea r15,[rip+00]
+ 0x66, 0x8B, 0x05, 0x00, 0x00, 0x00, 0x00, // C9: mov ax,[rip+00]
+ 0x66, 0x8B, 0x3D, 0x00, 0x00, 0x00, 0x00, // D0: mov di,[rip+00]
+ 0x66, 0x8D, 0x05, 0x00, 0x00, 0x00, 0x00, // D7: lea ax,[rip+00]
+ 0x66, 0x8D, 0x3D, 0x00, 0x00, 0x00, 0x00, // DE: lea di,[rip+00]
+ 0x5D, // E5: pop ebp
+ 0xC3, // E6: ret
+ };
+
+ ConstBufferView image =
+ ConstBufferView::FromRange(std::begin(data), std::end(data));
+
+ Rel32FinderX64 rel_finder(image);
+
+ // Lists of expected locations as pairs of (cursor position, rel32 position).
+ std::vector<std::pair<size_t, size_t>> expected_locations = {
+ {0x04, 0x04}, {0x09, 0x09}, {0x0E, 0x0F}, {0x14, 0x15}, {0x1A, 0x1B},
+ {0x20, 0x21}, {0x26, 0x27}, {0x2C, 0x2D}, {0x32, 0x33}, {0x38, 0x39},
+ {0x3E, 0x3F}, {0x44, 0x45}, {0x4A, 0x4B}, {0x50, 0x51}, {0x56, 0x57},
+ {0x5C, 0x5D}, {0x62, 0x63}, {0x68, 0x69},
+ };
+ std::vector<std::pair<size_t, size_t>> expected_locations_rip = {
+ {0x6E, 0x6F}, {0x74, 0x75}, {0x7A, 0x7B}, {0x80, 0x81}, {0x86, 0x87},
+ {0x8C, 0x8D}, {0x93, 0x94}, {0x9A, 0x9B}, {0xA1, 0xA2}, {0xA8, 0xA9},
+ {0xAF, 0xB0}, {0xB6, 0xB7}, {0xBD, 0xBE}, {0xC4, 0xC5}, {0xCB, 0xCC},
+ {0xD2, 0xD3}, {0xD9, 0xDA}, {0xE0, 0xE1},
+ };
+ for (auto location : expected_locations) {
+ auto result = rel_finder.GetNext();
+ EXPECT_TRUE(result.has_value());
+
+ EXPECT_EQ(location.first,
+ size_t(rel_finder.region().begin() - image.begin()));
+ EXPECT_EQ(location.second, size_t(result->location - image.begin()));
+ EXPECT_EQ(result->location + 4, rel_finder.next_cursor());
+ EXPECT_FALSE(result->can_point_outside_section);
+ rel_finder.Accept();
+ }
+ for (auto location : expected_locations_rip) {
+ auto result = rel_finder.GetNext();
+ EXPECT_TRUE(result.has_value());
+
+ EXPECT_EQ(location.first,
+ size_t(rel_finder.region().begin() - image.begin()));
+ EXPECT_EQ(location.second, size_t(result->location - image.begin()));
+ EXPECT_EQ(result->location + 4, rel_finder.next_cursor());
+ EXPECT_TRUE(result->can_point_outside_section);
+ rel_finder.Accept();
+ }
+ EXPECT_EQ(base::nullopt, rel_finder.GetNext());
+}
+
+// TODO(huangs): Test that integrates Abs32GapFinder and Rel32Finder.
+
+} // namespace zucchini
diff --git a/rel32_utils.cc b/rel32_utils.cc
new file mode 100644
index 0000000..fa59386
--- /dev/null
+++ b/rel32_utils.cc
@@ -0,0 +1,69 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/rel32_utils.h"
+
+#include <algorithm>
+
+#include "base/logging.h"
+#include "components/zucchini/io_utils.h"
+
+namespace zucchini {
+
+/******** Rel32ReaderX86 ********/
+
+Rel32ReaderX86::Rel32ReaderX86(ConstBufferView image,
+ offset_t lo,
+ offset_t hi,
+ const std::vector<offset_t>* locations,
+ const AddressTranslator& translator)
+ : image_(image),
+ target_rva_to_offset_(translator),
+ location_offset_to_rva_(translator),
+ hi_(hi),
+ last_(locations->end()) {
+ DCHECK_LE(lo, image.size());
+ DCHECK_LE(hi, image.size());
+ current_ = std::lower_bound(locations->begin(), locations->end(), lo);
+}
+
+Rel32ReaderX86::~Rel32ReaderX86() = default;
+
+base::Optional<Reference> Rel32ReaderX86::GetNext() {
+ while (current_ < last_ && *current_ < hi_) {
+ offset_t loc_offset = *(current_++);
+ DCHECK_LE(loc_offset + 4, image_.size()); // Sanity check.
+ rva_t loc_rva = location_offset_to_rva_.Convert(loc_offset);
+ rva_t target_rva = loc_rva + 4 + image_.read<int32_t>(loc_offset);
+ offset_t target_offset = target_rva_to_offset_.Convert(target_rva);
+ // In rare cases, the most significant bit of |target| is set. This
+ // interferes with label marking. We expect these to already be filtered out
+ // from |locations|.
+ DCHECK(!IsMarked(target_offset));
+ return Reference{loc_offset, target_offset};
+ }
+ return base::nullopt;
+}
+
+/******** Rel32ReceptorX86 ********/
+
+Rel32WriterX86::Rel32WriterX86(MutableBufferView image,
+ const AddressTranslator& translator)
+ : image_(image),
+ target_offset_to_rva_(translator),
+ location_offset_to_rva_(translator) {}
+
+Rel32WriterX86::~Rel32WriterX86() = default;
+
+void Rel32WriterX86::PutNext(Reference ref) {
+ rva_t target_rva = target_offset_to_rva_.Convert(ref.target);
+ rva_t loc_rva = location_offset_to_rva_.Convert(ref.location);
+
+ // Subtraction underflow is okay
+ uint32_t code =
+ static_cast<uint32_t>(target_rva) - (static_cast<uint32_t>(loc_rva) + 4);
+ image_.write<uint32_t>(ref.location, code);
+}
+
+} // namespace zucchini
diff --git a/rel32_utils.h b/rel32_utils.h
new file mode 100644
index 0000000..7a01230
--- /dev/null
+++ b/rel32_utils.h
@@ -0,0 +1,70 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_REL32_UTILS_H_
+#define COMPONENTS_ZUCCHINI_REL32_UTILS_H_
+
+#include <vector>
+
+#include "base/macros.h"
+#include "base/optional.h"
+#include "components/zucchini/address_translator.h"
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/image_utils.h"
+
+namespace zucchini {
+
+// A visitor that emits References (locations and target) from a specified
+// portion of an x86 / x64 image, given a list of valid locations.
+class Rel32ReaderX86 : public ReferenceReader {
+ public:
+ // |image| is an image containing x86 / x64 code in [|lo|, |hi|).
+ // |locations| is a sorted list of offsets of rel32 reference locations.
+ // |translator| (for |image|) is embedded into |target_rva_to_offset_| and
+ // |location_offset_to_rva_| for address translation, and therefore must
+ // outlive |*this|.
+ Rel32ReaderX86(ConstBufferView image,
+ offset_t lo,
+ offset_t hi,
+ const std::vector<offset_t>* locations,
+ const AddressTranslator& translator);
+ ~Rel32ReaderX86() override;
+
+ // Returns the next reference, or base::nullopt if exhausted.
+ base::Optional<Reference> GetNext() override;
+
+ private:
+ ConstBufferView image_;
+ AddressTranslator::RvaToOffsetCache target_rva_to_offset_;
+ AddressTranslator::OffsetToRvaCache location_offset_to_rva_;
+ const offset_t hi_;
+ const std::vector<offset_t>::const_iterator last_;
+ std::vector<offset_t>::const_iterator current_;
+
+ DISALLOW_COPY_AND_ASSIGN(Rel32ReaderX86);
+};
+
+// Writer for x86 / x64 rel32 references.
+class Rel32WriterX86 : public ReferenceWriter {
+ public:
+ // |image| wraps the raw bytes of a binary in which rel32 references will be
+ // written. |translator| (for |image|) is embedded into
+ // |target_offset_to_rva_| and |location_offset_to_rva_| for address
+ // translation, and therefore must outlive |*this|.
+ Rel32WriterX86(MutableBufferView image, const AddressTranslator& translator);
+ ~Rel32WriterX86() override;
+
+ void PutNext(Reference ref) override;
+
+ private:
+ MutableBufferView image_;
+ AddressTranslator::OffsetToRvaCache target_offset_to_rva_;
+ AddressTranslator::OffsetToRvaCache location_offset_to_rva_;
+
+ DISALLOW_COPY_AND_ASSIGN(Rel32WriterX86);
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_REL32_UTILS_H_
diff --git a/rel32_utils_unittest.cc b/rel32_utils_unittest.cc
new file mode 100644
index 0000000..80928de
--- /dev/null
+++ b/rel32_utils_unittest.cc
@@ -0,0 +1,128 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/rel32_utils.h"
+
+#include <stdint.h>
+
+#include <memory>
+#include <vector>
+
+#include "base/memory/ptr_util.h"
+#include "base/optional.h"
+#include "base/test/gtest_util.h"
+#include "components/zucchini/address_translator.h"
+#include "components/zucchini/image_utils.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+namespace {
+
+// A trivial AddressTranslator that applies constant shift.
+class TestAddressTranslator : public AddressTranslator {
+ public:
+ TestAddressTranslator(offset_t image_size, rva_t rva_begin) {
+ DCHECK_GE(rva_begin, 0U);
+ CHECK_EQ(AddressTranslator::kSuccess,
+ Initialize({{0, image_size, rva_begin, image_size}}));
+ }
+};
+
+// Checks that |reader| emits and only emits |expected_refs|, in order.
+void CheckReader(const std::vector<Reference>& expected_refs,
+ ReferenceReader* reader) {
+ for (Reference expected_ref : expected_refs) {
+ auto ref = reader->GetNext();
+ EXPECT_TRUE(ref.has_value());
+ EXPECT_EQ(expected_ref, ref.value());
+ }
+ EXPECT_EQ(base::nullopt, reader->GetNext()); // Nothing should be left.
+}
+
+} // namespace
+
+TEST(Rel32UtilsTest, Rel32ReaderX86) {
+ constexpr offset_t kTestImageSize = 0x00100000U;
+ constexpr rva_t kRvaBegin = 0x00030000U;
+ TestAddressTranslator translator(kTestImageSize, kRvaBegin);
+
+ // For simplicity, test data is not real X86 machine code. We are only
+ // including rel32 targets, without the full instructions.
+ std::vector<uint8_t> bytes = {
+ 0xFF, 0xFF, 0xFF, 0xFF, // 00030000: (Filler)
+ 0x00, 0x00, 0x00, 0x80, // 00030004: 80030008 Marked, so invalid.
+ 0x04, 0x00, 0x00, 0x00, // 00030008: 00030010
+ 0xFF, 0xFF, 0xFF, 0xFF, // 0003000C: (Filler)
+ 0x00, 0x00, 0x00, 0x00, // 00030010: 00030014
+ 0xFF, 0xFF, 0xFF, 0xFF, // 00030014: (Filler)
+ 0xF4, 0xFF, 0xFF, 0xFF, // 00030018: 00030010
+ 0xE4, 0xFF, 0xFF, 0xFF, // 0003001C: 00030004
+ };
+ ConstBufferView buffer(bytes.data(), bytes.size());
+ // Specify rel32 locations directly, instead of parsing.
+ std::vector<offset_t> rel32_locations = {0x0008U, 0x0010U, 0x0018U, 0x001CU};
+
+ // Generate everything.
+ Rel32ReaderX86 reader1(buffer, 0x0000U, 0x0020U, &rel32_locations,
+ translator);
+ CheckReader({{0x0008U, 0x0010U},
+ {0x0010U, 0x0014U},
+ {0x0018U, 0x0010U},
+ {0x001CU, 0x0004U}},
+ &reader1);
+
+ // Exclude last.
+ Rel32ReaderX86 reader2(buffer, 0x0000U, 0x001CU, &rel32_locations,
+ translator);
+ CheckReader({{0x0008U, 0x0010U}, {0x0010U, 0x0014U}, {0x0018U, 0x0010U}},
+ &reader2);
+
+ // Only find one.
+ Rel32ReaderX86 reader3(buffer, 0x000CU, 0x0018U, &rel32_locations,
+ translator);
+ CheckReader({{0x0010U, 0x0014U}}, &reader3);
+
+ // Marked target encountered (error).
+ std::vector<offset_t> rel32_marked_locations = {0x00004U};
+ Rel32ReaderX86 reader4(buffer, 0x0000U, 0x0020U, &rel32_marked_locations,
+ translator);
+ EXPECT_DCHECK_DEATH(reader4.GetNext());
+}
+
+TEST(Rel32UtilsTest, Rel32WriterX86) {
+ constexpr offset_t kTestImageSize = 0x00100000U;
+ constexpr rva_t kRvaBegin = 0x00030000U;
+ TestAddressTranslator translator(kTestImageSize, kRvaBegin);
+
+ std::vector<uint8_t> bytes(32, 0xFF);
+ MutableBufferView buffer(bytes.data(), bytes.size());
+
+ Rel32WriterX86 writer(buffer, translator);
+ writer.PutNext({0x0008U, 0x0010U});
+ EXPECT_EQ(0x00000004U, buffer.read<uint32_t>(0x08)); // 00030008: 00030010
+
+ writer.PutNext({0x0010U, 0x0014U});
+ EXPECT_EQ(0x00000000U, buffer.read<uint32_t>(0x10)); // 00030010: 00030014
+
+ writer.PutNext({0x0018U, 0x0010U});
+ EXPECT_EQ(0xFFFFFFF4U, buffer.read<uint32_t>(0x18)); // 00030018: 00030010
+
+ writer.PutNext({0x001CU, 0x0004U});
+ EXPECT_EQ(0xFFFFFFE4U, buffer.read<uint32_t>(0x1C)); // 0003001C: 00030004
+
+ EXPECT_EQ(std::vector<uint8_t>({
+ 0xFF, 0xFF, 0xFF, 0xFF, // 00030000: (Filler)
+ 0xFF, 0xFF, 0xFF, 0xFF, // 00030004: (Filler)
+ 0x04, 0x00, 0x00, 0x00, // 00030008: 00030010
+ 0xFF, 0xFF, 0xFF, 0xFF, // 0003000C: (Filler)
+ 0x00, 0x00, 0x00, 0x00, // 00030010: 00030014
+ 0xFF, 0xFF, 0xFF, 0xFF, // 00030014: (Filler)
+ 0xF4, 0xFF, 0xFF, 0xFF, // 00030018: 00030010
+ 0xE4, 0xFF, 0xFF, 0xFF, // 0003001C: 00030004
+ }),
+ bytes);
+}
+
+} // namespace zucchini
diff --git a/reloc_utils.cc b/reloc_utils.cc
new file mode 100644
index 0000000..d21a0d3
--- /dev/null
+++ b/reloc_utils.cc
@@ -0,0 +1,193 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/reloc_utils.h"
+
+#include <algorithm>
+#include <tuple>
+#include <utility>
+
+#include "base/logging.h"
+#include "base/numerics/safe_conversions.h"
+#include "components/zucchini/algorithm.h"
+#include "components/zucchini/io_utils.h"
+#include "components/zucchini/type_win_pe.h"
+
+namespace zucchini {
+
+/******** RelocUnitWin32 ********/
+
+RelocUnitWin32::RelocUnitWin32() = default;
+RelocUnitWin32::RelocUnitWin32(uint8_t type_in,
+ offset_t location_in,
+ rva_t target_rva_in)
+ : type(type_in), location(location_in), target_rva(target_rva_in) {}
+
+bool operator==(const RelocUnitWin32& a, const RelocUnitWin32& b) {
+ return std::tie(a.type, a.location, a.target_rva) ==
+ std::tie(b.type, b.location, b.target_rva);
+}
+
+/******** RelocRvaReaderWin32 ********/
+
+// static
+bool RelocRvaReaderWin32::FindRelocBlocks(
+ ConstBufferView image,
+ BufferRegion reloc_region,
+ std::vector<offset_t>* reloc_block_offsets) {
+ CHECK_LT(reloc_region.size, kOffsetBound);
+ ConstBufferView reloc_data = image[reloc_region];
+ reloc_block_offsets->clear();
+ while (reloc_data.size() >= sizeof(pe::RelocHeader)) {
+ reloc_block_offsets->push_back(reloc_data.begin() - image.begin());
+ auto size = reloc_data.read<pe::RelocHeader>(0).size;
+ // |size| must be aligned to 4-bytes.
+ if (size < sizeof(pe::RelocHeader) || size % 4 || size > reloc_data.size())
+ return false;
+ reloc_data.remove_prefix(size);
+ }
+ return reloc_data.empty(); // Fail if trailing data exist.
+}
+
+RelocRvaReaderWin32::RelocRvaReaderWin32(
+ ConstBufferView image,
+ BufferRegion reloc_region,
+ const std::vector<offset_t>& reloc_block_offsets,
+ offset_t lo,
+ offset_t hi)
+ : image_(image) {
+ CHECK_LE(lo, hi);
+ lo = base::checked_cast<offset_t>(reloc_region.InclusiveClamp(lo));
+ hi = base::checked_cast<offset_t>(reloc_region.InclusiveClamp(hi));
+ end_it_ = image_.begin() + hi;
+
+ // By default, get GetNext() to produce empty output.
+ cur_reloc_units_ = BufferSource(end_it_, 0);
+ if (reloc_block_offsets.empty())
+ return;
+
+ // Find the block that contains |lo|.
+ auto block_it = std::upper_bound(reloc_block_offsets.begin(),
+ reloc_block_offsets.end(), lo);
+ DCHECK(block_it != reloc_block_offsets.begin());
+ --block_it;
+
+ // Initialize |cur_reloc_units_| and |rva_hi_bits_|.
+ if (!LoadRelocBlock(image_.begin() + *block_it))
+ return; // Nothing left.
+
+ // Skip |cur_reloc_units_| to |lo|, truncating up.
+ offset_t cur_reloc_units_offset = cur_reloc_units_.begin() - image_.begin();
+ if (lo > cur_reloc_units_offset) {
+ offset_t delta =
+ ceil<offset_t>(lo - cur_reloc_units_offset, kRelocUnitSize);
+ cur_reloc_units_.Skip(delta);
+ }
+}
+
+RelocRvaReaderWin32::RelocRvaReaderWin32(RelocRvaReaderWin32&&) = default;
+
+RelocRvaReaderWin32::~RelocRvaReaderWin32() = default;
+
+// Unrolls a nested loop: outer = reloc blocks and inner = reloc entries.
+base::Optional<RelocUnitWin32> RelocRvaReaderWin32::GetNext() {
+ // "Outer loop" to find non-empty reloc block.
+ while (cur_reloc_units_.Remaining() < kRelocUnitSize) {
+ if (!LoadRelocBlock(cur_reloc_units_.end()))
+ return base::nullopt;
+ }
+ if (end_it_ - cur_reloc_units_.begin() < kRelocUnitSize)
+ return base::nullopt;
+ // "Inner loop" to extract single reloc unit.
+ offset_t location = cur_reloc_units_.begin() - image_.begin();
+ uint16_t entry = cur_reloc_units_.read<uint16_t>(0);
+ uint8_t type = static_cast<uint8_t>(entry >> 12);
+ rva_t rva = rva_hi_bits_ + (entry & 0xFFF);
+ cur_reloc_units_.Skip(kRelocUnitSize);
+ return RelocUnitWin32{type, location, rva};
+}
+
+bool RelocRvaReaderWin32::LoadRelocBlock(
+ ConstBufferView::const_iterator block_begin) {
+ ConstBufferView header_buf(block_begin, sizeof(pe::RelocHeader));
+ if (header_buf.end() >= end_it_ ||
+ end_it_ - header_buf.end() < kRelocUnitSize) {
+ return false;
+ }
+ const auto& header = header_buf.read<pe::RelocHeader>(0);
+ rva_hi_bits_ = header.rva_hi;
+ uint32_t block_size = header.size;
+ DCHECK_GE(block_size, sizeof(pe::RelocHeader));
+ cur_reloc_units_ = BufferSource(block_begin, block_size);
+ cur_reloc_units_.Skip(sizeof(pe::RelocHeader));
+ return true;
+}
+
+/******** RelocReaderWin32 ********/
+
+RelocReaderWin32::RelocReaderWin32(RelocRvaReaderWin32&& reloc_rva_reader,
+ uint16_t reloc_type,
+ offset_t offset_bound,
+ const AddressTranslator& translator)
+ : reloc_rva_reader_(std::move(reloc_rva_reader)),
+ reloc_type_(reloc_type),
+ offset_bound_(offset_bound),
+ entry_rva_to_offset_(translator) {}
+
+RelocReaderWin32::~RelocReaderWin32() = default;
+
+// ReferenceReader:
+base::Optional<Reference> RelocReaderWin32::GetNext() {
+ for (base::Optional<RelocUnitWin32> unit = reloc_rva_reader_.GetNext();
+ unit.has_value(); unit = reloc_rva_reader_.GetNext()) {
+ if (unit->type != reloc_type_)
+ continue;
+ offset_t target = entry_rva_to_offset_.Convert(unit->target_rva);
+ if (target == kInvalidOffset)
+ continue;
+ offset_t location = unit->location;
+ if (IsMarked(target)) {
+ LOG(WARNING) << "Warning: Skipping mark-aliased reloc target: "
+ << AsHex<8>(location) << " -> " << AsHex<8>(target) << ".";
+ continue;
+ }
+ // Ensures the target (abs32 reference) lies entirely within the image.
+ if (target >= offset_bound_)
+ continue;
+ return Reference{location, target};
+ }
+ return base::nullopt;
+}
+
+/******** RelocWriterWin32 ********/
+
+RelocWriterWin32::RelocWriterWin32(
+ uint16_t reloc_type,
+ MutableBufferView image,
+ BufferRegion reloc_region,
+ const std::vector<offset_t>& reloc_block_offsets,
+ const AddressTranslator& translator)
+ : reloc_type_(reloc_type),
+ image_(image),
+ reloc_region_(reloc_region),
+ reloc_block_offsets_(reloc_block_offsets),
+ target_offset_to_rva_(translator) {}
+
+RelocWriterWin32::~RelocWriterWin32() = default;
+
+void RelocWriterWin32::PutNext(Reference ref) {
+ DCHECK_GE(ref.location, reloc_region_.lo());
+ DCHECK_LT(ref.location, reloc_region_.hi());
+ auto block_it = std::upper_bound(reloc_block_offsets_.begin(),
+ reloc_block_offsets_.end(), ref.location);
+ --block_it;
+ rva_t rva_hi_bits = image_.read<pe::RelocHeader>(*block_it).rva_hi;
+ rva_t target_rva = target_offset_to_rva_.Convert(ref.target);
+ rva_t rva_lo_bits = target_rva - rva_hi_bits;
+ DCHECK_EQ(rva_lo_bits & 0xFFF, rva_lo_bits);
+ image_.write<uint16_t>(ref.location,
+ (rva_lo_bits & 0xFFF) | (reloc_type_ << 12));
+}
+
+} // namespace zucchini
diff --git a/reloc_utils.h b/reloc_utils.h
new file mode 100644
index 0000000..aac1efb
--- /dev/null
+++ b/reloc_utils.h
@@ -0,0 +1,140 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_RELOC_UTILS_H_
+#define COMPONENTS_ZUCCHINI_RELOC_UTILS_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "base/optional.h"
+#include "components/zucchini/address_translator.h"
+#include "components/zucchini/buffer_source.h"
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/image_utils.h"
+
+namespace zucchini {
+
+// Win32 PE relocation table stores a list of (type, RVA) pairs. The table is
+// organized into "blocks" for RVAs with common high-order bits (12-31). Each
+// block consists of a list (even length) of 2-byte "units". Each unit stores
+// type (in bits 12-15) and low-order bits (0-11) of an RVA (in bits 0-11). In
+// pseudo-struct:
+// struct Block {
+// uint32_t rva_hi;
+// uint32_t block_size_in_bytes; // 8 + multiple of 4.
+// struct {
+// uint16_t rva_lo:12, type:4; // Little-endian.
+// } units[(block_size_in_bytes - 8) / 2]; // Size must be even.
+// } reloc_table[num_blocks]; // May have padding (type = 0).
+
+// Extracted Win32 reloc Unit data.
+struct RelocUnitWin32 {
+ RelocUnitWin32();
+ RelocUnitWin32(uint8_t type_in, offset_t location_in, rva_t target_rva_in);
+ friend bool operator==(const RelocUnitWin32& a, const RelocUnitWin32& b);
+
+ uint8_t type;
+ offset_t location;
+ rva_t target_rva;
+};
+
+// A reader that parses Win32 PE relocation data and emits RelocUnitWin32 for
+// each reloc unit that lies strictly inside |[lo, hi)|.
+class RelocRvaReaderWin32 {
+ public:
+ enum : ptrdiff_t { kRelocUnitSize = sizeof(uint16_t) };
+
+ // Parses |image| at |reloc_region| to find beginning offsets of each reloc
+ // block. On success, writes the result to |reloc_block_offsets| and returns
+ // true. Otherwise leaves |reloc_block_offsets| in an undetermined state, and
+ // returns false.
+ static bool FindRelocBlocks(ConstBufferView image,
+ BufferRegion reloc_region,
+ std::vector<offset_t>* reloc_block_offsets);
+
+ // |reloc_block_offsets| should be precomputed from FindRelBlocks().
+ RelocRvaReaderWin32(ConstBufferView image,
+ BufferRegion reloc_region,
+ const std::vector<offset_t>& reloc_block_offsets,
+ offset_t lo,
+ offset_t hi);
+ RelocRvaReaderWin32(RelocRvaReaderWin32&&);
+ ~RelocRvaReaderWin32();
+
+ // Successively visits and returns data for each reloc unit, or base::nullopt
+ // when all reloc units are found. Encapsulates block transition details.
+ base::Optional<RelocUnitWin32> GetNext();
+
+ private:
+ // Assuming that |block_begin| points to the beginning of a reloc block, loads
+ // |rva_hi_bits_| and assigns |cur_reloc_units_| as the region containing the
+ // associated units, potentially truncated by |end_it_|. Returns true if reloc
+ // data are available for read, and false otherwise.
+ bool LoadRelocBlock(ConstBufferView::const_iterator block_begin);
+
+ const ConstBufferView image_;
+
+ // End iterator.
+ ConstBufferView::const_iterator end_it_;
+
+ // Unit data of the current reloc block.
+ BufferSource cur_reloc_units_;
+
+ // High-order bits (12-31) for all relocs of the current reloc block.
+ rva_t rva_hi_bits_;
+};
+
+// A reader for Win32 reloc References, implemented as a filtering and
+// translation adaptor of RelocRvaReaderWin32.
+class RelocReaderWin32 : public ReferenceReader {
+ public:
+ // Takes ownership of |reloc_rva_reader|. |offset_bound| specifies the
+ // exclusive upper bound of reloc target offsets, taking account of widths of
+ // targets (which are abs32 References).
+ RelocReaderWin32(RelocRvaReaderWin32&& reloc_rva_reader,
+ uint16_t reloc_type,
+ offset_t offset_bound,
+ const AddressTranslator& translator);
+ ~RelocReaderWin32() override;
+
+ // ReferenceReader:
+ base::Optional<Reference> GetNext() override;
+
+ private:
+ RelocRvaReaderWin32 reloc_rva_reader_;
+ const uint16_t reloc_type_; // uint16_t to simplify shifting (<< 12).
+ const offset_t offset_bound_;
+ AddressTranslator::RvaToOffsetCache entry_rva_to_offset_;
+};
+
+// A writer for Win32 reloc References. This is simpler than the reader since:
+// - No iteration is required.
+// - High-order bits of reloc target RVAs are assumed to be handled elsewhere,
+// so only low-order bits need to be written.
+class RelocWriterWin32 : public ReferenceWriter {
+ public:
+ RelocWriterWin32(uint16_t reloc_type,
+ MutableBufferView image,
+ BufferRegion reloc_region,
+ const std::vector<offset_t>& reloc_block_offsets,
+ const AddressTranslator& translator);
+ ~RelocWriterWin32() override;
+
+ // ReferenceWriter:
+ void PutNext(Reference ref) override;
+
+ private:
+ const uint16_t reloc_type_;
+ MutableBufferView image_;
+ BufferRegion reloc_region_;
+ const std::vector<offset_t>& reloc_block_offsets_;
+ AddressTranslator::OffsetToRvaCache target_offset_to_rva_;
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_RELOC_UTILS_H_
diff --git a/reloc_utils_unittest.cc b/reloc_utils_unittest.cc
new file mode 100644
index 0000000..e75264c
--- /dev/null
+++ b/reloc_utils_unittest.cc
@@ -0,0 +1,273 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/reloc_utils.h"
+
+#include <stdint.h>
+
+#include <algorithm>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "base/logging.h"
+#include "base/numerics/safe_conversions.h"
+#include "base/test/gtest_util.h"
+#include "components/zucchini/address_translator.h"
+#include "components/zucchini/algorithm.h"
+#include "components/zucchini/image_utils.h"
+#include "components/zucchini/test_utils.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+namespace {
+
+// Returns a vector that's the contatenation of two vectors of the same type.
+// Elements are copied by value.
+template <class T>
+std::vector<T> Cat(const std::vector<T>& a, const std::vector<T>& b) {
+ std::vector<T> ret(a);
+ ret.insert(ret.end(), b.begin(), b.end());
+ return ret;
+}
+
+// Returns a subvector of a vector. Elements are copied by value.
+template <class T>
+std::vector<T> Sub(const std::vector<T>& a, size_t lo, size_t hi) {
+ return std::vector<T>(a.begin() + lo, a.begin() + hi);
+}
+
+} // namespace
+
+class RelocUtilsWin32Test : public testing::Test {
+ protected:
+ using Units = std::vector<RelocUnitWin32>;
+
+ RelocUtilsWin32Test() {}
+
+ // Resets all tester data, calls RelocRvaReaderWin32::FindRelocBlocks(), and
+ // returns its results.
+ bool Initialize(const std::vector<uint8_t>& image_raw,
+ BufferRegion reloc_region) {
+ image_ = BufferSource(image_raw.data(), image_raw.size());
+ reloc_region_ = reloc_region;
+ return RelocRvaReaderWin32::FindRelocBlocks(image_, reloc_region_,
+ &reloc_block_offsets_);
+ }
+
+ // Uses RelocRvaReaderWin32 to get all relocs, returned as Units.
+ Units EmitAll(offset_t lo, offset_t hi) {
+ RelocRvaReaderWin32 reader(image_, reloc_region_, reloc_block_offsets_, lo,
+ hi);
+ Units units;
+ for (auto unit = reader.GetNext(); unit.has_value();
+ unit = reader.GetNext()) {
+ units.push_back(unit.value());
+ }
+ return units;
+ }
+
+ ConstBufferView image_;
+ BufferRegion reloc_region_;
+ std::vector<uint32_t> reloc_block_offsets_;
+};
+
+TEST_F(RelocUtilsWin32Test, RvaReaderEmpty) {
+ {
+ std::vector<uint8_t> image_raw = ParseHexString("");
+ EXPECT_TRUE(Initialize(image_raw, {0U, 0U}));
+ EXPECT_EQ(std::vector<uint32_t>(), reloc_block_offsets_); // Nothing.
+ EXPECT_EQ(Units(), EmitAll(0U, 0U));
+ }
+ {
+ std::vector<uint8_t> image_raw = ParseHexString("AA BB CC DD EE FF");
+ EXPECT_TRUE(Initialize(image_raw, {2U, 0U}));
+ EXPECT_EQ(std::vector<uint32_t>(), reloc_block_offsets_); // Nothing.
+ EXPECT_EQ(Units(), EmitAll(2U, 2U));
+ }
+ {
+ std::vector<uint8_t> image_raw = ParseHexString("00 C0 00 00 08 00 00 00");
+ EXPECT_TRUE(Initialize(image_raw, {0U, image_raw.size()}));
+ EXPECT_EQ(std::vector<uint32_t>({0U}),
+ reloc_block_offsets_); // Empty block.
+ EXPECT_EQ(Units(), EmitAll(0U, 8U));
+ }
+}
+
+TEST_F(RelocUtilsWin32Test, RvaReaderBad) {
+ std::string test_cases[] = {
+ "00 C0 00 00 07 00 00", // Header too small.
+ "00 C0 00 00 08 00 00", // Header too small, lies about size.
+ "00 C0 00 00 0A 00 00 00 66 31", // Odd number of units.
+ "00 C0 00 00 0C 00 00 00 66 31 88 31 FF", // Trailing data.
+ };
+ for (const std::string& test_case : test_cases) {
+ std::vector<uint8_t> image_raw = ParseHexString(test_case);
+ EXPECT_FALSE(Initialize(image_raw, {0U, image_raw.size()}));
+ }
+}
+
+TEST_F(RelocUtilsWin32Test, RvaReaderSingle) {
+ // Block 0: All type 0x3: {0xC166, 0xC288, 0xC342, (padding) 0xCFFF}.
+ std::vector<uint8_t> image_raw = ParseHexString(
+ "FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF "
+ "00 C0 00 00 10 00 00 00 66 31 88 32 42 33 FF 0F "
+ "FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF");
+ constexpr offset_t kBlock0 = 16U;
+ Units exp0 = {{3, kBlock0 + 8U, 0xC166U},
+ {3, kBlock0 + 10U, 0xC288U},
+ {3, kBlock0 + 12U, 0xC342U},
+ {0, kBlock0 + 14U, 0xCFFFU}};
+
+ EXPECT_TRUE(Initialize(image_raw, {16U, 16U}));
+ EXPECT_EQ(exp0, EmitAll(kBlock0, kBlock0 + 16U));
+ EXPECT_EQ(Units(), EmitAll(kBlock0, kBlock0));
+ EXPECT_EQ(Units(), EmitAll(kBlock0, kBlock0 + 8U));
+ EXPECT_EQ(Units(), EmitAll(kBlock0, kBlock0 + 9U));
+ EXPECT_EQ(Sub(exp0, 0, 1), EmitAll(kBlock0, kBlock0 + 10U));
+ EXPECT_EQ(Sub(exp0, 0, 1), EmitAll(kBlock0 + 8U, kBlock0 + 10U));
+ EXPECT_EQ(Units(), EmitAll(kBlock0 + 9U, kBlock0 + 10U));
+ EXPECT_EQ(Sub(exp0, 0, 3), EmitAll(kBlock0, kBlock0 + 15U));
+ EXPECT_EQ(Sub(exp0, 2, 3), EmitAll(kBlock0 + 11U, kBlock0 + 15U));
+}
+
+TEST_F(RelocUtilsWin32Test, RvaReaderMulti) {
+ // The sample image encodes 3 reloc blocks:
+ // Block 0: All type 0x3: {0xC166, 0xC288, 0xC344, (padding) 0xCFFF}.
+ // Block 1: All type 0x3: {0x12166, 0x12288}.
+ // Block 2: All type 0xA: {0x24000, 0x24010, 0x24020, 0x24028, 0x24A3C,
+ // 0x24170}.
+ std::vector<uint8_t> image_raw = ParseHexString(
+ "FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF "
+ "00 C0 00 00 10 00 00 00 66 31 88 32 42 33 FF 0F "
+ "00 20 01 00 0C 00 00 00 66 31 88 32 "
+ "00 40 02 00 14 00 00 00 00 A0 10 A0 20 A0 28 A0 3C A0 70 A1 "
+ "FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF");
+ offset_t image_size = base::checked_cast<offset_t>(image_raw.size());
+ constexpr offset_t kBlock0 = 16U;
+ constexpr offset_t kBlock1 = kBlock0 + 16U;
+ constexpr offset_t kBlock2 = kBlock1 + 12U;
+ constexpr offset_t kBlockEnd = kBlock2 + 20U;
+ Units exp0 = {{3, kBlock0 + 8U, 0xC166U},
+ {3, kBlock0 + 10U, 0xC288U},
+ {3, kBlock0 + 12U, 0xC342U},
+ {0, kBlock0 + 14U, 0xCFFFU}};
+ Units exp1 = {{3, kBlock0 + 24U, 0x12166U}, {3, kBlock0 + 26U, 0x12288U}};
+ Units exp2 = {{10, kBlock0 + 36U, 0x24000U}, {10, kBlock0 + 38U, 0x24010U},
+ {10, kBlock0 + 40U, 0x24020U}, {10, kBlock0 + 42U, 0x24028U},
+ {10, kBlock0 + 44U, 0x2403CU}, {10, kBlock0 + 46U, 0x24170U}};
+
+ EXPECT_TRUE(Initialize(image_raw, {kBlock0, kBlockEnd - kBlock0}));
+ EXPECT_EQ(std::vector<uint32_t>({kBlock0, kBlock1, kBlock2}),
+ reloc_block_offsets_);
+
+ // Everything.
+ EXPECT_EQ(Cat(Cat(exp0, exp1), exp2), EmitAll(kBlock0, kBlockEnd));
+ EXPECT_EQ(Cat(Cat(exp0, exp1), exp2), EmitAll(0, image_size));
+ // Entire blocks.
+ EXPECT_EQ(exp0, EmitAll(kBlock0, kBlock1));
+ EXPECT_EQ(exp1, EmitAll(kBlock1, kBlock2));
+ EXPECT_EQ(exp2, EmitAll(kBlock2, kBlockEnd));
+ EXPECT_EQ(Units(), EmitAll(0, kBlock0));
+ EXPECT_EQ(Units(), EmitAll(kBlockEnd, image_size));
+ // Within blocks, clipped at boundaries.
+ EXPECT_EQ(exp0, EmitAll(kBlock0 + 5U, kBlock1));
+ EXPECT_EQ(exp0, EmitAll(kBlock0 + 8U, kBlock1));
+ EXPECT_EQ(Sub(exp0, 1, 4), EmitAll(kBlock0 + 9U, kBlock1));
+ EXPECT_EQ(Sub(exp0, 0, 3), EmitAll(kBlock0, kBlock0 + 15U));
+ EXPECT_EQ(Sub(exp0, 0, 3), EmitAll(kBlock0, kBlock0 + 14U));
+ EXPECT_EQ(Sub(exp0, 0, 1), EmitAll(kBlock0 + 8U, kBlock0 + 10U));
+ EXPECT_EQ(Sub(exp1, 1, 2), EmitAll(kBlock1 + 10U, kBlock1 + 12U));
+ EXPECT_EQ(Sub(exp2, 2, 4), EmitAll(kBlock2 + 12U, kBlock2 + 16U));
+ EXPECT_EQ(Units(), EmitAll(kBlock0, kBlock0));
+ EXPECT_EQ(Units(), EmitAll(kBlock0, kBlock0 + 8U));
+ EXPECT_EQ(Units(), EmitAll(kBlock2 + 10U, kBlock2 + 11U));
+ EXPECT_EQ(Units(), EmitAll(kBlock2 + 11U, kBlock2 + 12U));
+ // Across blocks.
+ EXPECT_EQ(Cat(Cat(exp0, exp1), exp2), EmitAll(kBlock0 - 5U, kBlockEnd));
+ EXPECT_EQ(Cat(Cat(exp0, exp1), exp2), EmitAll(kBlock0 + 6U, kBlockEnd));
+ EXPECT_EQ(Cat(Cat(exp0, exp1), Sub(exp2, 0, 5)),
+ EmitAll(kBlock0 + 6U, kBlock2 + 18U));
+ EXPECT_EQ(Cat(Sub(exp0, 2, 4), Sub(exp1, 0, 1)),
+ EmitAll(kBlock0 + 12U, kBlock1 + 10U));
+ EXPECT_EQ(Cat(Sub(exp0, 2, 4), Sub(exp1, 0, 1)),
+ EmitAll(kBlock0 + 11U, kBlock1 + 10U));
+ EXPECT_EQ(Cat(Sub(exp0, 2, 4), Sub(exp1, 0, 1)),
+ EmitAll(kBlock0 + 12U, kBlock1 + 11U));
+ EXPECT_EQ(Sub(exp1, 1, 2), EmitAll(kBlock1 + 10U, kBlock2 + 5U));
+ EXPECT_EQ(Cat(Sub(exp1, 1, 2), exp2), EmitAll(kBlock1 + 10U, kBlockEnd + 5));
+ EXPECT_EQ(Units(), EmitAll(kBlock0 + 15, kBlock1 + 9));
+}
+
+TEST_F(RelocUtilsWin32Test, ReadWrite) {
+ // Set up mock image: Size = 0x3000, .reloc at 0x600. RVA is 0x40000 + offset.
+ constexpr rva_t kBaseRva = 0x40000;
+ std::vector<uint8_t> image_data(0x3000, 0xFF);
+ // 4 x86 relocs (xx 3x), 3 x64 relocs (xx Ax), 1 padding (xx 0X).
+ std::vector<uint8_t> reloc_data = ParseHexString(
+ "00 10 04 00 10 00 00 00 C0 32 18 A3 F8 A7 FF 0F "
+ "00 20 04 00 10 00 00 00 80 A0 65 31 F8 37 BC 3A");
+ reloc_region_ = {0x600, reloc_data.size()};
+ std::copy(reloc_data.begin(), reloc_data.end(),
+ image_data.begin() + reloc_region_.lo());
+ image_ = {image_data.data(), image_data.size()};
+ offset_t image_size = base::checked_cast<offset_t>(image_.size());
+
+ AddressTranslator translator;
+ translator.Initialize({{0, image_size, kBaseRva, image_size}});
+
+ // Precompute |reloc_block_offsets_|.
+ EXPECT_TRUE(RelocRvaReaderWin32::FindRelocBlocks(image_, reloc_region_,
+ &reloc_block_offsets_));
+ EXPECT_EQ(std::vector<uint32_t>({0x600U, 0x610U}), reloc_block_offsets_);
+
+ // Focus on x86.
+ constexpr uint16_t kRelocTypeX86 = 3;
+ constexpr offset_t kVAWidthX86 = 4;
+
+ // Make RelocRvaReaderWin32.
+ RelocRvaReaderWin32 reloc_rva_reader(image_, reloc_region_,
+ reloc_block_offsets_, 0, image_size);
+ offset_t offset_bound = image_size - kVAWidthX86 + 1;
+
+ // Make RelocReaderWin32 that wraps |reloc_rva_reader|.
+ auto reader = std::make_unique<RelocReaderWin32>(
+ std::move(reloc_rva_reader), kRelocTypeX86, offset_bound, translator);
+
+ // Read all references and check.
+ std::vector<Reference> refs;
+ for (base::Optional<Reference> ref = reader->GetNext(); ref.has_value();
+ ref = reader->GetNext()) {
+ refs.push_back(ref.value());
+ }
+ std::vector<Reference> exp_refs{
+ {0x608, 0x12C0}, {0x61A, 0x2165}, {0x61C, 0x27F8}, {0x61E, 0x2ABC}};
+ EXPECT_EQ(exp_refs, refs);
+
+ // Write reference, extract bytes and check.
+ MutableBufferView mutable_image(&image_data[0], image_data.size());
+ auto writer = std::make_unique<RelocWriterWin32>(
+ kRelocTypeX86, mutable_image, reloc_region_, reloc_block_offsets_,
+ translator);
+
+ writer->PutNext({0x608, 0x1F83});
+ std::vector<uint8_t> exp_reloc_data1 = ParseHexString(
+ "00 10 04 00 10 00 00 00 83 3F 18 A3 F8 A7 FF 0F "
+ "00 20 04 00 10 00 00 00 80 A0 65 31 F8 37 BC 3A");
+ EXPECT_EQ(exp_reloc_data1,
+ Sub(image_data, reloc_region_.lo(), reloc_region_.hi()));
+
+ EXPECT_DCHECK_DEATH(writer->PutNext({0x608, 0x2000}));
+
+ writer->PutNext({0x61C, 0x2950});
+ std::vector<uint8_t> exp_reloc_data2 = ParseHexString(
+ "00 10 04 00 10 00 00 00 83 3F 18 A3 F8 A7 FF 0F "
+ "00 20 04 00 10 00 00 00 80 A0 65 31 50 39 BC 3A");
+ EXPECT_EQ(exp_reloc_data2,
+ Sub(image_data, reloc_region_.lo(), reloc_region_.hi()));
+}
+
+} // namespace zucchini
diff --git a/suffix_array.h b/suffix_array.h
new file mode 100644
index 0000000..ea49a43
--- /dev/null
+++ b/suffix_array.h
@@ -0,0 +1,475 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_SUFFIX_ARRAY_H_
+#define COMPONENTS_ZUCCHINI_SUFFIX_ARRAY_H_
+
+#include <algorithm>
+#include <iterator>
+#include <numeric>
+#include <vector>
+
+#include "base/logging.h"
+#include "base/macros.h"
+
+namespace zucchini {
+
+// A functor class that implements the naive suffix sorting algorithm that uses
+// std::sort with lexicographical compare. This is only meant as reference of
+// the interface.
+class NaiveSuffixSort {
+ public:
+ // Type requirements:
+ // |InputRng| is an input random access range.
+ // |KeyType| is an unsigned integer type.
+ // |SAIt| is a random access iterator with mutable references.
+ template <class InputRng, class KeyType, class SAIt>
+ // |str| is the input string on which suffix sort is applied.
+ // Characters found in |str| must be in the range [0, |key_bound|)
+ // |suffix_array| is the beginning of the destination range, which is at least
+ // as large as |str|.
+ void operator()(const InputRng& str,
+ KeyType key_bound,
+ SAIt suffix_array) const {
+ using size_type = typename SAIt::value_type;
+
+ size_type n = static_cast<size_type>(std::end(str) - std::begin(str));
+
+ // |suffix_array| is first filled with ordered indices of |str|.
+ // Those indices are then sorted with lexicographical comparisons in |str|.
+ std::iota(suffix_array, suffix_array + n, 0);
+ std::sort(suffix_array, suffix_array + n, [&str](size_type i, size_type j) {
+ return std::lexicographical_compare(std::begin(str) + i, std::end(str),
+ std::begin(str) + j, std::end(str));
+ });
+ }
+};
+
+// A functor class that implements suffix array induced sorting (SA-IS)
+// algorithm with linear time and memory complexity,
+// see http://ieeexplore.ieee.org/abstract/document/5582081/
+class InducedSuffixSort {
+ public:
+ // Type requirements:
+ // |InputRng| is an input random access range.
+ // |KeyType| is an unsigned integer type.
+ // |SAIt| is a random access iterator with mutable values.
+ template <class InputRng, class KeyType, class SAIt>
+ // |str| is the input string on which suffix sort is applied.
+ // Characters found in |str| must be in the range [0, |key_bound|)
+ // |suffix_array| is the beginning of the destination range, which is at least
+ // as large as |str|.
+ void operator()(const InputRng& str,
+ KeyType key_bound,
+ SAIt suffix_array) const {
+ using value_type = typename InputRng::value_type;
+ using size_type = typename SAIt::value_type;
+
+ static_assert(std::is_unsigned<value_type>::value,
+ "SA-IS only supports input string with unsigned values");
+ static_assert(std::is_unsigned<KeyType>::value, "KeyType must be unsigned");
+
+ size_type n = static_cast<size_type>(std::end(str) - std::begin(str));
+
+ Implementation<size_type, KeyType>::SuffixSort(std::begin(str), n,
+ key_bound, suffix_array);
+ }
+
+ // Given string S of length n. We assume S is terminated by a unique sentinel
+ // $, which is considered as the smallest character. This sentinel does not
+ // exist in memory and is only treated implicitly, hence |n| does not count
+ // the sentinel in this implementation. We denote suf(S,i) the suffix formed
+ // by S[i..n).
+
+ // A suffix suf(S,i) is said to be S-type or L-type, if suf(S,i) < suf(S,i+1)
+ // or suf(S,i) > suf(S,i+1), respectively.
+ enum SLType : bool { SType, LType };
+
+ // A character S[i] is said to be S-type or L-type if the suffix suf(S,i) is
+ // S-type or L-type, respectively.
+
+ // A character S[i] is called LMS (leftmost S-type), if S[i] is S-type and
+ // S[i-1] is L-type. A suffix suf(S,i) is called LMS, if S[i] is an LMS
+ // character.
+
+ // A substring S[i..j) is an LMS-substring if
+ // (1) S[i] is LMS, S[j] is LMS or the sentinel $, and S[i..j) has no other
+ // LMS characters, or
+ // (2) S[i..j) is the sentinel $.
+
+ template <class SizeType, class KeyType>
+ struct Implementation {
+ static_assert(std::is_unsigned<SizeType>::value,
+ "SizeType must be unsigned");
+ static_assert(std::is_unsigned<KeyType>::value, "KeyType must be unsigned");
+ using size_type = SizeType;
+ using key_type = KeyType;
+
+ using iterator = typename std::vector<size_type>::iterator;
+ using const_iterator = typename std::vector<size_type>::const_iterator;
+
+ // Partition every suffix based on SL-type. Returns the number of LMS
+ // suffixes.
+ template <class StrIt>
+ static size_type BuildSLPartition(
+ StrIt str,
+ size_type length,
+ key_type key_bound,
+ std::vector<SLType>::reverse_iterator sl_partition_it) {
+ // We will count LMS suffixes (S to L-type or last S-type).
+ size_type lms_count = 0;
+
+ // |previous_type| is initialized to L-type to avoid counting an extra
+ // LMS suffix at the end
+ SLType previous_type = LType;
+
+ // Initialized to dummy, impossible key.
+ key_type previous_key = key_bound;
+
+ // We're travelling backward to determine the partition,
+ // as if we prepend one character at a time to the string, ex:
+ // b$ is L-type because b > $.
+ // ab$ is S-type because a < b, implying ab$ < b$.
+ // bab$ is L-type because b > a, implying bab$ > ab$.
+ // bbab$ is L-type, because bab$ was also L-type, implying bbab$ > bab$.
+ for (auto str_it = std::reverse_iterator<StrIt>(str + length);
+ str_it != std::reverse_iterator<StrIt>(str);
+ ++str_it, ++sl_partition_it) {
+ key_type current_key = *str_it;
+
+ if (current_key > previous_key || previous_key == key_bound) {
+ // S[i] > S[i + 1] or S[i] is last character.
+ if (previous_type == SType)
+ // suf(S,i) is L-type and suf(S,i + 1) is S-type, therefore,
+ // suf(S,i+1) was a LMS suffix.
+ ++lms_count;
+
+ previous_type = LType; // For next round.
+ } else if (current_key < previous_key) {
+ // S[i] < S[i + 1]
+ previous_type = SType; // For next round.
+ }
+ // Else, S[i] == S[i + 1]:
+ // The next character that differs determines the SL-type,
+ // so we reuse the last seen type.
+
+ *sl_partition_it = previous_type;
+ previous_key = current_key; // For next round.
+ }
+
+ return lms_count;
+ }
+
+ // Find indices of LMS suffixes and write result to |lms_indices|.
+ static void FindLmsSuffixes(const std::vector<SLType>& sl_partition,
+ iterator lms_indices) {
+ // |previous_type| is initialized to S-type to avoid counting an extra
+ // LMS suffix at the beginning
+ SLType previous_type = SType;
+ for (size_type i = 0; i < sl_partition.size(); ++i) {
+ if (sl_partition[i] == SType && previous_type == LType)
+ *lms_indices++ = i;
+ previous_type = sl_partition[i];
+ }
+ }
+
+ template <class StrIt>
+ static std::vector<size_type> MakeBucketCount(StrIt str,
+ size_type length,
+ key_type key_bound) {
+ // Occurrence of every unique character is counted in |buckets|
+ std::vector<size_type> buckets(static_cast<size_type>(key_bound));
+
+ for (auto it = str; it != str + length; ++it)
+ ++buckets[*it];
+ return buckets;
+ }
+
+ // Apply induced sort from |lms_indices| to |suffix_array| associated with
+ // the string |str|.
+ template <class StrIt, class SAIt>
+ static void InducedSort(StrIt str,
+ size_type length,
+ const std::vector<SLType>& sl_partition,
+ const std::vector<size_type>& lms_indices,
+ const std::vector<size_type>& buckets,
+ SAIt suffix_array) {
+ // All indices are first marked as unset with the illegal value |length|.
+ std::fill(suffix_array, suffix_array + length, length);
+
+ // Used to mark bucket boundaries (head or end) as indices in str.
+ DCHECK(!buckets.empty());
+ std::vector<size_type> bucket_bounds(buckets.size());
+
+ // Step 1: Assign indices for LMS suffixes, populating the end of
+ // respective buckets but keeping relative order.
+
+ // Find the end of each bucket and write it to |bucket_bounds|.
+ std::partial_sum(buckets.begin(), buckets.end(), bucket_bounds.begin());
+
+ // Process each |lms_indices| backward, and assign them to the end of
+ // their respective buckets, so relative order is preserved.
+ for (auto it = lms_indices.crbegin(); it != lms_indices.crend(); ++it) {
+ key_type key = str[*it];
+ suffix_array[--bucket_bounds[key]] = *it;
+ }
+
+ // Step 2
+ // Scan forward |suffix_array|; for each modified suf(S,i) for which
+ // suf(S,SA(i) - 1) is L-type, place suf(S,SA(i) - 1) to the current
+ // head of the corresponding bucket and forward the bucket head to the
+ // right.
+
+ // Find the head of each bucket and write it to |bucket_bounds|. Since
+ // only LMS suffixes where inserted in |suffix_array| during Step 1,
+ // |bucket_bounds| does not contains the head of each bucket and needs to
+ // be updated.
+ bucket_bounds[0] = 0;
+ std::partial_sum(buckets.begin(), buckets.end() - 1,
+ bucket_bounds.begin() + 1);
+
+ // From Step 1, the sentinel $, which we treat implicitly, would have
+ // been placed at the beginning of |suffix_array|, since $ is always
+ // considered as the smallest character. We then have to deal with the
+ // previous (last) suffix.
+ if (sl_partition[length - 1] == LType) {
+ key_type key = str[length - 1];
+ suffix_array[bucket_bounds[key]++] = length - 1;
+ }
+ for (auto it = suffix_array; it != suffix_array + length; ++it) {
+ size_type suffix_index = *it;
+
+ // While the original algorithm marks unset suffixes with -1,
+ // we found that marking them with |length| is also possible and more
+ // convenient because we are working with unsigned integers.
+ if (suffix_index != length && suffix_index > 0 &&
+ sl_partition[--suffix_index] == LType) {
+ key_type key = str[suffix_index];
+ suffix_array[bucket_bounds[key]++] = suffix_index;
+ }
+ }
+
+ // Step 3
+ // Scan backward |suffix_array|; for each modified suf(S, i) for which
+ // suf(S,SA(i) - 1) is S-type, place suf(S,SA(i) - 1) to the current
+ // end of the corresponding bucket and forward the bucket head to the
+ // left.
+
+ // Find the end of each bucket and write it to |bucket_bounds|. Since
+ // only L-type suffixes where inserted in |suffix_array| during Step 2,
+ // |bucket_bounds| does not contain the end of each bucket and needs to
+ // be updated.
+ std::partial_sum(buckets.begin(), buckets.end(), bucket_bounds.begin());
+
+ for (auto it = std::reverse_iterator<SAIt>(suffix_array + length);
+ it != std::reverse_iterator<SAIt>(suffix_array); ++it) {
+ size_type suffix_index = *it;
+ if (suffix_index != length && suffix_index > 0 &&
+ sl_partition[--suffix_index] == SType) {
+ key_type key = str[suffix_index];
+ suffix_array[--bucket_bounds[key]] = suffix_index;
+ }
+ }
+ // Deals with the last suffix, because of the sentinel.
+ if (sl_partition[length - 1] == SType) {
+ key_type key = str[length - 1];
+ suffix_array[--bucket_bounds[key]] = length - 1;
+ }
+ }
+
+ // Given a string S starting at |str| with length |length|, an array
+ // starting at |substring_array| containing lexicographically ordered LMS
+ // terminated substring indices of S and an SL-Type partition |sl_partition|
+ // of S, assigns a unique label to every unique LMS substring. The sorted
+ // labels for all LMS substrings are written to |lms_str|, while the indices
+ // of LMS suffixes are written to |lms_indices|. In addition, returns the
+ // total number of unique labels.
+ template <class StrIt, class SAIt>
+ static size_type LabelLmsSubstrings(StrIt str,
+ size_type length,
+ const std::vector<SLType>& sl_partition,
+ SAIt suffix_array,
+ iterator lms_indices,
+ iterator lms_str) {
+ // Labelling starts at 0.
+ size_type label = 0;
+
+ // |previous_lms| is initialized to 0 to indicate it is unset.
+ // Note that suf(S,0) is never a LMS suffix. Substrings will be visited in
+ // lexicographical order.
+ size_type previous_lms = 0;
+ for (auto it = suffix_array; it != suffix_array + length; ++it) {
+ if (*it > 0 && sl_partition[*it] == SType &&
+ sl_partition[*it - 1] == LType) {
+ // suf(S, *it) is a LMS suffix.
+
+ size_type current_lms = *it;
+ if (previous_lms != 0) {
+ // There was a previous LMS suffix. Check if the current LMS
+ // substring is equal to the previous one.
+ SLType current_lms_type = SType;
+ SLType previous_lms_type = SType;
+ for (size_type k = 0;; ++k) {
+ // |current_lms_end| and |previous_lms_end| denote whether we have
+ // reached the end of the current and previous LMS substring,
+ // respectively
+ bool current_lms_end = false;
+ bool previous_lms_end = false;
+
+ // Check for both previous and current substring ends.
+ // Note that it is more convenient to check if
+ // suf(S,current_lms + k) is an LMS suffix than to retrieve it
+ // from lms_indices.
+ if (current_lms + k >= length ||
+ (current_lms_type == LType &&
+ sl_partition[current_lms + k] == SType)) {
+ current_lms_end = true;
+ }
+ if (previous_lms + k >= length ||
+ (previous_lms_type == LType &&
+ sl_partition[previous_lms + k] == SType)) {
+ previous_lms_end = true;
+ }
+
+ if (current_lms_end && previous_lms_end) {
+ break; // Previous and current substrings are identical.
+ } else if (current_lms_end != previous_lms_end ||
+ str[current_lms + k] != str[previous_lms + k]) {
+ // Previous and current substrings differ, a new label is used.
+ ++label;
+ break;
+ }
+
+ current_lms_type = sl_partition[current_lms + k];
+ previous_lms_type = sl_partition[previous_lms + k];
+ }
+ }
+ *lms_indices++ = *it;
+ *lms_str++ = label;
+ previous_lms = current_lms;
+ }
+ }
+
+ return label + 1;
+ }
+
+ // Implementation of the SA-IS algorithm. |str| must be a random access
+ // iterator pointing at the beginning of S with length |length|. The result
+ // is writtend in |suffix_array|, a random access iterator.
+ template <class StrIt, class SAIt>
+ static void SuffixSort(StrIt str,
+ size_type length,
+ key_type key_bound,
+ SAIt suffix_array) {
+ if (length == 1)
+ *suffix_array = 0;
+ if (length < 2)
+ return;
+
+ std::vector<SLType> sl_partition(length);
+ size_type lms_count =
+ BuildSLPartition(str, length, key_bound, sl_partition.rbegin());
+ std::vector<size_type> lms_indices(lms_count);
+ FindLmsSuffixes(sl_partition, lms_indices.begin());
+ std::vector<size_type> buckets = MakeBucketCount(str, length, key_bound);
+
+ if (lms_indices.size() > 1) {
+ // Given |lms_indices| in the same order they appear in |str|, induce
+ // LMS substrings relative order and write result to |suffix_array|.
+ InducedSort(str, length, sl_partition, lms_indices, buckets,
+ suffix_array);
+ std::vector<size_type> lms_str(lms_indices.size());
+
+ // Given LMS substrings in relative order found in |suffix_array|,
+ // map LMS substrings to unique labels to form a new string, |lms_str|.
+ size_type label_count =
+ LabelLmsSubstrings(str, length, sl_partition, suffix_array,
+ lms_indices.begin(), lms_str.begin());
+
+ if (label_count < lms_str.size()) {
+ // Reorder |lms_str| to have LMS suffixes in the same order they
+ // appear in |str|.
+ for (size_type i = 0; i < lms_indices.size(); ++i)
+ suffix_array[lms_indices[i]] = lms_str[i];
+
+ SLType previous_type = SType;
+ for (size_type i = 0, j = 0; i < sl_partition.size(); ++i) {
+ if (sl_partition[i] == SType && previous_type == LType) {
+ lms_str[j] = suffix_array[i];
+ lms_indices[j++] = i;
+ }
+ previous_type = sl_partition[i];
+ }
+
+ // Recursively apply SuffixSort on |lms_str|, which is formed from
+ // labeled LMS suffixes in the same order they appear in |str|.
+ // Note that |KeyType| will be size_type because |lms_str| contains
+ // indices. |lms_str| is at most half the length of |str|.
+ Implementation<size_type, size_type>::SuffixSort(
+ lms_str.begin(), static_cast<size_type>(lms_str.size()),
+ label_count, suffix_array);
+
+ // Map LMS labels back to indices in |str| and write result to
+ // |lms_indices|. We're using |suffix_array| as a temporary buffer.
+ for (size_type i = 0; i < lms_indices.size(); ++i)
+ suffix_array[i] = lms_indices[suffix_array[i]];
+ std::copy_n(suffix_array, lms_indices.size(), lms_indices.begin());
+
+ // At this point, |lms_indices| contains sorted LMS suffixes of |str|.
+ }
+ }
+ // Given |lms_indices| where LMS suffixes are sorted, induce the full
+ // order of suffixes in |str|.
+ InducedSort(str, length, sl_partition, lms_indices, buckets,
+ suffix_array);
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Implementation);
+ };
+};
+
+// Generates a sorted suffix array for the input string |str| using the functor
+// |Algorithm| which provides an interface equivalent to NaiveSuffixSort.
+/// Characters found in |str| are assumed to be in range [0, |key_bound|).
+// Returns the suffix array as a vector.
+// |StrRng| is an input random access range.
+// |KeyType| is an unsigned integer type.
+template <class Algorithm, class StrRng, class KeyType>
+std::vector<typename StrRng::size_type> MakeSuffixArray(const StrRng& str,
+ KeyType key_bound) {
+ Algorithm sort;
+ std::vector<typename StrRng::size_type> suffix_array(str.end() - str.begin());
+ sort(str, key_bound, suffix_array.begin());
+ return suffix_array;
+}
+
+// Type requirements:
+// |SARng| is an input random access range.
+// |StrIt1| is a random access iterator.
+// |StrIt2| is a forward iterator.
+template <class SARng, class StrIt1, class StrIt2>
+// Lexicographical lower bound using binary search for
+// [|str2_first|, |str2_last|) in the suffix array |suffix_array| of a string
+// starting at |str1_first|. This does not necessarily return the index of
+// the longest matching substring.
+auto SuffixLowerBound(const SARng& suffix_array,
+ StrIt1 str1_first,
+ StrIt2 str2_first,
+ StrIt2 str2_last) -> decltype(std::begin(suffix_array)) {
+ using size_type = typename SARng::value_type;
+
+ size_t n = std::end(suffix_array) - std::begin(suffix_array);
+ auto it = std::lower_bound(
+ std::begin(suffix_array), std::end(suffix_array), str2_first,
+ [str1_first, str2_last, n](size_type a, StrIt2 b) {
+ return std::lexicographical_compare(str1_first + a, str1_first + n, b,
+ str2_last);
+ });
+ return it;
+}
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_SUFFIX_ARRAY_H_
diff --git a/suffix_array_unittest.cc b/suffix_array_unittest.cc
new file mode 100644
index 0000000..c6f8b02
--- /dev/null
+++ b/suffix_array_unittest.cc
@@ -0,0 +1,331 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/suffix_array.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <initializer_list>
+#include <string>
+#include <vector>
+
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+namespace {
+
+using SLType = InducedSuffixSort::SLType;
+
+} // namespace
+
+using ustring = std::basic_string<unsigned char>;
+
+constexpr uint16_t kNumChar = 256;
+
+ustring MakeUnsignedString(const std::string& str) {
+ return {str.begin(), str.end()};
+}
+
+template <class T>
+std::vector<T> MakeVector(const std::initializer_list<T>& ilist) {
+ return {ilist.begin(), ilist.end()};
+}
+
+void TestSlPartition(std::initializer_list<SLType> expected_sl_partition,
+ std::initializer_list<size_t> expected_lms_indices,
+ std::string str) {
+ using SaisImpl = InducedSuffixSort::Implementation<size_t, uint16_t>;
+
+ std::vector<SLType> sl_partition(str.size());
+ EXPECT_EQ(expected_lms_indices.size(),
+ SaisImpl::BuildSLPartition(str.begin(), str.size(), kNumChar,
+ sl_partition.rbegin()));
+ EXPECT_EQ(MakeVector(expected_sl_partition), sl_partition);
+
+ std::vector<size_t> lms_indices(expected_lms_indices.size());
+ SaisImpl::FindLmsSuffixes(expected_sl_partition, lms_indices.begin());
+ EXPECT_EQ(MakeVector(expected_lms_indices), lms_indices);
+}
+
+TEST(InducedSuffixSortTest, BuildSLPartition) {
+ TestSlPartition({}, {}, "");
+ TestSlPartition(
+ {
+ SLType::LType,
+ },
+ {}, "a");
+ TestSlPartition(
+ {
+ SLType::LType, SLType::LType,
+ },
+ {}, "ba");
+ TestSlPartition(
+ {
+ SLType::SType, SLType::LType,
+ },
+ {}, "ab");
+ TestSlPartition(
+ {
+ SLType::SType, SLType::SType, SLType::LType,
+ },
+ {}, "aab");
+ TestSlPartition(
+ {
+ SLType::LType, SLType::LType, SLType::LType,
+ },
+ {}, "bba");
+ TestSlPartition(
+ {
+ SLType::LType, SLType::SType, SLType::LType,
+ },
+ {1}, "bab");
+ TestSlPartition(
+ {
+ SLType::LType, SLType::SType, SLType::SType, SLType::LType,
+ },
+ {1}, "baab");
+
+ TestSlPartition(
+ {
+ SLType::LType, // zucchini
+ SLType::LType, // ucchini
+ SLType::SType, // cchini
+ SLType::SType, // chini
+ SLType::SType, // hini
+ SLType::SType, // ini
+ SLType::LType, // ni
+ SLType::LType, // i
+ },
+ {2}, "zucchini");
+}
+
+std::vector<size_t> BucketCount(const std::initializer_list<unsigned char> str,
+ uint16_t max_key) {
+ using SaisImpl = InducedSuffixSort::Implementation<size_t, uint16_t>;
+ return SaisImpl::MakeBucketCount(str.begin(), str.size(), max_key);
+}
+
+TEST(InducedSuffixSortTest, BucketCount) {
+ using vec = std::vector<size_t>;
+
+ EXPECT_EQ(vec({0, 0, 0, 0}), BucketCount({}, 4));
+ EXPECT_EQ(vec({1, 0, 0, 0}), BucketCount({0}, 4));
+ EXPECT_EQ(vec({0, 2, 0, 1}), BucketCount({1, 1, 3}, 4));
+}
+
+std::vector<size_t> InducedSortSubstring(ustring str) {
+ using SaisImpl = InducedSuffixSort::Implementation<size_t, uint16_t>;
+ std::vector<SLType> sl_partition(str.size());
+ size_t lms_count = SaisImpl::BuildSLPartition(
+ str.begin(), str.size(), kNumChar, sl_partition.rbegin());
+ std::vector<size_t> lms_indices(lms_count);
+ SaisImpl::FindLmsSuffixes(sl_partition, lms_indices.begin());
+ auto buckets = SaisImpl::MakeBucketCount(str.begin(), str.size(), kNumChar);
+
+ std::vector<size_t> suffix_array(str.size());
+ SaisImpl::InducedSort(str, str.size(), sl_partition, lms_indices, buckets,
+ suffix_array.begin());
+
+ return suffix_array;
+}
+
+TEST(InducedSuffixSortTest, InducedSortSubstring) {
+ using vec = std::vector<size_t>;
+
+ auto us = MakeUnsignedString;
+
+ // L; a$
+ EXPECT_EQ(vec({0}), InducedSortSubstring(us("a")));
+
+ // SL; ab$, b$
+ EXPECT_EQ(vec({0, 1}), InducedSortSubstring(us("ab")));
+
+ // LL; a$, ba$
+ EXPECT_EQ(vec({1, 0}), InducedSortSubstring(us("ba")));
+
+ // SLL; a$, aba$, ba$
+ EXPECT_EQ(vec({2, 0, 1}), InducedSortSubstring(us("aba")));
+
+ // LSL; ab$, b$, ba
+ EXPECT_EQ(vec({1, 2, 0}), InducedSortSubstring(us("bab")));
+
+ // SSL; aab$, ab$, b$
+ EXPECT_EQ(vec({0, 1, 2}), InducedSortSubstring(us("aab")));
+
+ // LSSL; aab$, ab$, b$, ba
+ EXPECT_EQ(vec({1, 2, 3, 0}), InducedSortSubstring(us("baab")));
+}
+
+template <class Algorithm>
+void TestSuffixSort(ustring test_str) {
+ std::vector<size_t> suffix_array =
+ MakeSuffixArray<Algorithm>(test_str, kNumChar);
+ EXPECT_EQ(test_str.size(), suffix_array.size());
+
+ // Expect that I[] is a permutation of [0, len].
+ std::vector<size_t> sorted_suffix(suffix_array.begin(), suffix_array.end());
+ std::sort(sorted_suffix.begin(), sorted_suffix.end());
+ for (size_t i = 0; i < test_str.size(); ++i)
+ EXPECT_EQ(i, sorted_suffix[i]);
+
+ // Expect that all suffixes are strictly ordered.
+ auto end = test_str.end();
+ for (size_t i = 1; i < test_str.size(); ++i) {
+ auto suf1 = test_str.begin() + suffix_array[i - 1];
+ auto suf2 = test_str.begin() + suffix_array[i];
+ bool is_less = std::lexicographical_compare(suf1, end, suf2, end);
+ EXPECT_TRUE(is_less);
+ }
+}
+
+constexpr const char* test_strs[] = {
+ "",
+ "a",
+ "aa",
+ "za",
+ "CACAO",
+ "aaaaa",
+ "banana",
+ "tobeornottobe",
+ "The quick brown fox jumps over the lazy dog.",
+ "elephantelephantelephantelephantelephant",
+ "walawalawashington",
+ "-------------------------",
+ "011010011001011010010110011010010",
+ "3141592653589793238462643383279502884197169399375105",
+ "\xFF\xFE\xFF\xFE\xFD\x80\x30\x31\x32\x80\x30\xFF\x01\xAB\xCD",
+ "abccbaabccbaabccbaabccbaabccbaabccbaabccbaabccba",
+ "0123456789876543210",
+ "9876543210123456789",
+ "aababcabcdabcdeabcdefabcdefg",
+ "asdhklgalksdjghalksdjghalksdjgh",
+};
+
+TEST(SuffixSortTest, NaiveSuffixSort) {
+ for (const std::string& test_str : test_strs) {
+ TestSuffixSort<NaiveSuffixSort>(MakeUnsignedString(test_str));
+ }
+}
+
+TEST(SuffixSortTest, InducedSuffixSortSort) {
+ for (const std::string& test_str : test_strs) {
+ TestSuffixSort<InducedSuffixSort>(MakeUnsignedString(test_str));
+ }
+}
+
+// Test with sequence that has every character.
+TEST(SuffixSortTest, AllChar) {
+ std::vector<unsigned char> all_char(kNumChar);
+ std::iota(all_char.begin(), all_char.end(), 0);
+
+ {
+ std::vector<size_t> suffix_array =
+ MakeSuffixArray<InducedSuffixSort>(all_char, kNumChar);
+ for (size_t i = 0; i < kNumChar; ++i)
+ EXPECT_EQ(i, suffix_array[i]);
+ }
+
+ std::vector<unsigned char> all_char_reverse(all_char.rbegin(),
+ all_char.rend());
+ {
+ std::vector<size_t> suffix_array =
+ MakeSuffixArray<InducedSuffixSort>(all_char_reverse, kNumChar);
+ for (size_t i = 0; i < kNumChar; ++i)
+ EXPECT_EQ(kNumChar - i - 1, suffix_array[i]);
+ }
+}
+
+void TestSuffixLowerBound(ustring base_str, ustring search_str) {
+ std::vector<size_t> suffix_array =
+ MakeSuffixArray<NaiveSuffixSort>(base_str, kNumChar);
+
+ auto pos = SuffixLowerBound(suffix_array, base_str.begin(),
+ search_str.begin(), search_str.end());
+
+ auto end = base_str.end();
+ if (pos != suffix_array.begin()) {
+ // Previous suffix is less than |search_str|.
+ auto suf = base_str.begin() + pos[-1];
+ bool is_less = std::lexicographical_compare(suf, end, search_str.begin(),
+ search_str.end());
+ EXPECT_TRUE(is_less);
+ }
+ if (pos != suffix_array.end()) {
+ // Current suffix is greater of equal to |search_str|.
+ auto suf = base_str.begin() + *pos;
+ bool is_less = std::lexicographical_compare(suf, end, search_str.begin(),
+ search_str.end());
+ EXPECT_FALSE(is_less);
+ }
+}
+
+TEST(SuffixArrayTest, LowerBound) {
+ auto us = MakeUnsignedString;
+
+ TestSuffixLowerBound(us(""), us(""));
+ TestSuffixLowerBound(us(""), us("a"));
+ TestSuffixLowerBound(us("b"), us(""));
+ TestSuffixLowerBound(us("b"), us("a"));
+ TestSuffixLowerBound(us("b"), us("c"));
+ TestSuffixLowerBound(us("b"), us("bc"));
+ TestSuffixLowerBound(us("aa"), us("a"));
+ TestSuffixLowerBound(us("aa"), us("aa"));
+
+ ustring sentence = us("the quick brown fox jumps over the lazy dog.");
+ // Entire string: exact and unique.
+ TestSuffixLowerBound(sentence, sentence);
+ // Empty string: exact and non-unique.
+ TestSuffixLowerBound(sentence, us(""));
+ // Exact and unique suffix matches.
+ TestSuffixLowerBound(sentence, us("."));
+ TestSuffixLowerBound(sentence, us("the lazy dog."));
+ // Exact and unique non-suffix matches.
+ TestSuffixLowerBound(sentence, us("quick"));
+ TestSuffixLowerBound(sentence, us("the quick"));
+ // Partial and unique matches.
+ TestSuffixLowerBound(sentence, us("fox jumps with the hosps"));
+ TestSuffixLowerBound(sentence, us("xyz"));
+ // Exact and non-unique match: take lexicographical first.
+ TestSuffixLowerBound(sentence, us("the"));
+ TestSuffixLowerBound(sentence, us(" "));
+ // Partial and non-unique match.
+ // query < "the l"... < "the q"...
+ TestSuffixLowerBound(sentence, us("the apple"));
+ // "the l"... < query < "the q"...
+ TestSuffixLowerBound(sentence, us("the opera"));
+ // "the l"... < "the q"... < query
+ TestSuffixLowerBound(sentence, us("the zebra"));
+ // Prefix match dominates suffix match (unique).
+ TestSuffixLowerBound(sentence, us("over quick brown fox"));
+ // Empty matchs.
+ TestSuffixLowerBound(sentence, us(","));
+ TestSuffixLowerBound(sentence, us("1234"));
+ TestSuffixLowerBound(sentence, us("THE QUICK BROWN FOX"));
+ TestSuffixLowerBound(sentence, us("(the"));
+}
+
+TEST(SuffixArrayTest, LowerBoundExact) {
+ for (const std::string& test_str : test_strs) {
+ ustring test_ustr = MakeUnsignedString(test_str);
+
+ std::vector<size_t> suffix_array =
+ MakeSuffixArray<InducedSuffixSort>(test_ustr, kNumChar);
+
+ for (size_t lo = 0; lo < test_str.size(); ++lo) {
+ for (size_t hi = lo + 1; hi <= test_str.size(); ++hi) {
+ ustring query(test_ustr.begin() + lo, test_ustr.begin() + hi);
+ ASSERT_EQ(query.size(), hi - lo);
+ auto pos = SuffixLowerBound(suffix_array, test_ustr.begin(),
+ query.begin(), query.end());
+ EXPECT_TRUE(
+ std::equal(query.begin(), query.end(), test_ustr.begin() + *pos));
+ }
+ }
+ }
+}
+
+} // namespace zucchini
diff --git a/target_pool.cc b/target_pool.cc
new file mode 100644
index 0000000..0c1e0a5
--- /dev/null
+++ b/target_pool.cc
@@ -0,0 +1,84 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/target_pool.h"
+
+#include <algorithm>
+#include <iterator>
+#include <utility>
+
+#include "base/logging.h"
+#include "components/zucchini/algorithm.h"
+#include "components/zucchini/equivalence_map.h"
+
+namespace zucchini {
+
+TargetPool::TargetPool() = default;
+
+TargetPool::TargetPool(std::vector<offset_t>&& targets) {
+ DCHECK(targets_.empty());
+ DCHECK(std::is_sorted(targets.begin(), targets.end()));
+ targets_ = std::move(targets);
+}
+
+TargetPool::TargetPool(TargetPool&&) = default;
+TargetPool::TargetPool(const TargetPool&) = default;
+TargetPool::~TargetPool() = default;
+
+void TargetPool::InsertTargets(const std::vector<offset_t>& targets) {
+ std::copy(targets.begin(), targets.end(), std::back_inserter(targets_));
+ SortAndUniquify(&targets_);
+}
+
+void TargetPool::InsertTargets(TargetSource* targets) {
+ for (auto target = targets->GetNext(); target.has_value();
+ target = targets->GetNext()) {
+ targets_.push_back(*target);
+ }
+ // InsertTargets() can be called many times (number of reference types for the
+ // pool) in succession. Calling SortAndUniquify() every time enables deduping
+ // to occur more often. This prioritizes peak memory reduction over running
+ // time.
+ SortAndUniquify(&targets_);
+}
+
+void TargetPool::InsertTargets(const std::vector<Reference>& references) {
+ // This can be called many times, so it's better to let std::back_inserter()
+ // manage |targets_| resize, instead of manually reserving space.
+ std::transform(references.begin(), references.end(),
+ std::back_inserter(targets_),
+ [](const Reference& ref) { return ref.target; });
+ SortAndUniquify(&targets_);
+}
+
+void TargetPool::InsertTargets(ReferenceReader&& references) {
+ for (auto ref = references.GetNext(); ref.has_value();
+ ref = references.GetNext()) {
+ targets_.push_back(ref->target);
+ }
+ SortAndUniquify(&targets_);
+}
+
+key_t TargetPool::KeyForOffset(offset_t offset) const {
+ auto pos = std::lower_bound(targets_.begin(), targets_.end(), offset);
+ DCHECK(pos != targets_.end() && *pos == offset);
+ return static_cast<offset_t>(pos - targets_.begin());
+}
+
+key_t TargetPool::KeyForNearestOffset(offset_t offset) const {
+ auto pos = std::lower_bound(targets_.begin(), targets_.end(), offset);
+ if (pos != targets_.begin()) {
+ // If distances are equal, prefer lower key.
+ if (pos == targets_.end() || *pos - offset >= offset - pos[-1])
+ --pos;
+ }
+ return static_cast<offset_t>(pos - targets_.begin());
+}
+
+void TargetPool::FilterAndProject(const OffsetMapper& offset_mapper) {
+ offset_mapper.ForwardProjectAll(&targets_);
+ std::sort(targets_.begin(), targets_.end());
+}
+
+} // namespace zucchini
diff --git a/target_pool.h b/target_pool.h
new file mode 100644
index 0000000..b881b1e
--- /dev/null
+++ b/target_pool.h
@@ -0,0 +1,77 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_TARGET_POOL_H_
+#define COMPONENTS_ZUCCHINI_TARGET_POOL_H_
+
+#include <stddef.h>
+
+#include <vector>
+
+#include "components/zucchini/image_utils.h"
+#include "components/zucchini/patch_reader.h"
+
+namespace zucchini {
+
+class OffsetMapper;
+class TargetSource;
+
+// Ordered container of distinct targets that have the same semantics, along
+// with a list of associated reference types, only used during patch generation.
+class TargetPool {
+ public:
+ using const_iterator = std::vector<offset_t>::const_iterator;
+
+ TargetPool();
+ // Initializes the object with given sorted and unique |targets|.
+ explicit TargetPool(std::vector<offset_t>&& targets);
+ TargetPool(TargetPool&&);
+ TargetPool(const TargetPool&);
+ ~TargetPool();
+
+ // Insert new targets from various sources. These invalidate all previous key
+ // lookups.
+ // - From a list of targets, useful for adding extra targets in Zucchini-gen:
+ void InsertTargets(const std::vector<offset_t>& targets);
+ // - From TargetSource, useful for adding extra targets in Zucchini-apply:
+ void InsertTargets(TargetSource* targets);
+ // - From list of References, useful for listing targets in Zucchini-gen:
+ void InsertTargets(const std::vector<Reference>& references);
+ // - From ReferenceReader, useful for listing targets in Zucchini-apply:
+ void InsertTargets(ReferenceReader&& references);
+
+ // Adds |type| as a reference type associated with the pool of targets.
+ void AddType(TypeTag type) { types_.push_back(type); }
+
+ // Returns a canonical key associated with a valid target at |offset|.
+ key_t KeyForOffset(offset_t offset) const;
+
+ // Returns a canonical key associated with the target nearest to |offset|.
+ key_t KeyForNearestOffset(offset_t offset) const;
+
+ // Returns the target for a |key|, which is assumed to be valid and held by
+ // this class.
+ offset_t OffsetForKey(key_t key) const { return targets_[key]; }
+
+ // Uses |offset_mapper| to transform "old" |targets_| to "new" |targets_|,
+ // resulting in sorted and unique targets.
+ void FilterAndProject(const OffsetMapper& offset_mapper);
+
+ // Accessors for testing.
+ const std::vector<offset_t>& targets() const { return targets_; }
+ const std::vector<TypeTag>& types() const { return types_; }
+
+ // Returns the number of targets.
+ size_t size() const { return targets_.size(); }
+ const_iterator begin() const { return targets_.cbegin(); }
+ const_iterator end() const { return targets_.cend(); }
+
+ private:
+ std::vector<TypeTag> types_; // Enumerates type_tag for this pool.
+ std::vector<offset_t> targets_; // Targets for pool in ascending order.
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_TARGET_POOL_H_
diff --git a/target_pool_unittest.cc b/target_pool_unittest.cc
new file mode 100644
index 0000000..4c3efec
--- /dev/null
+++ b/target_pool_unittest.cc
@@ -0,0 +1,64 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/target_pool.h"
+
+#include <cmath>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "components/zucchini/image_utils.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+namespace {
+
+using OffsetVector = std::vector<offset_t>;
+
+} // namespace
+
+TEST(TargetPoolTest, InsertTargetsFromReferences) {
+ auto test_insert = [](std::vector<Reference>&& references) -> OffsetVector {
+ TargetPool target_pool;
+ target_pool.InsertTargets(references);
+ // Return copy since |target_pool| goes out of scope.
+ return target_pool.targets();
+ };
+
+ EXPECT_EQ(OffsetVector(), test_insert({}));
+ EXPECT_EQ(OffsetVector({0, 1}), test_insert({{0, 0}, {10, 1}}));
+ EXPECT_EQ(OffsetVector({0, 1}), test_insert({{0, 1}, {10, 0}}));
+ EXPECT_EQ(OffsetVector({0, 1, 2}), test_insert({{0, 1}, {10, 0}, {20, 2}}));
+ EXPECT_EQ(OffsetVector({0}), test_insert({{0, 0}, {10, 0}}));
+ EXPECT_EQ(OffsetVector({0, 1}), test_insert({{0, 0}, {10, 0}, {20, 1}}));
+}
+
+TEST(TargetPoolTest, KeyOffset) {
+ auto test_key_offset = [](const std::string& nearest_offsets_key,
+ OffsetVector&& targets) {
+ TargetPool target_pool(std::move(targets));
+ for (offset_t offset : target_pool.targets()) {
+ offset_t key = target_pool.KeyForOffset(offset);
+ EXPECT_LT(key, target_pool.size());
+ EXPECT_EQ(offset, target_pool.OffsetForKey(key));
+ }
+ for (offset_t offset = 0; offset < nearest_offsets_key.size(); ++offset) {
+ key_t key = target_pool.KeyForNearestOffset(offset);
+ EXPECT_EQ(key, static_cast<key_t>(nearest_offsets_key[offset] - '0'));
+ }
+ };
+ test_key_offset("0000000000000000", {});
+ test_key_offset("0000000000000000", {0});
+ test_key_offset("0000000000000000", {1});
+ test_key_offset("0111111111111111", {0, 1});
+ test_key_offset("0011111111111111", {0, 2});
+ test_key_offset("0011111111111111", {1, 2});
+ test_key_offset("0001111111111111", {1, 3});
+ test_key_offset("0001112223334444", {1, 3, 7, 9, 13});
+ test_key_offset("0000011112223333", {1, 7, 9, 13});
+}
+
+} // namespace zucchini
diff --git a/targets_affinity.cc b/targets_affinity.cc
new file mode 100644
index 0000000..11903a9
--- /dev/null
+++ b/targets_affinity.cc
@@ -0,0 +1,108 @@
+// Copyright 2016 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/targets_affinity.h"
+
+#include <algorithm>
+
+#include "base/logging.h"
+#include "components/zucchini/equivalence_map.h"
+
+namespace zucchini {
+
+namespace {
+
+constexpr uint32_t kNoLabel = 0;
+}
+
+TargetsAffinity::TargetsAffinity() = default;
+TargetsAffinity::~TargetsAffinity() = default;
+
+void TargetsAffinity::InferFromSimilarities(
+ const EquivalenceMap& equivalences,
+ const std::vector<offset_t>& old_targets,
+ const std::vector<offset_t>& new_targets) {
+ forward_association_.assign(old_targets.size(), {});
+ backward_association_.assign(new_targets.size(), {});
+
+ if (old_targets.empty() || new_targets.empty())
+ return;
+
+ key_t new_key = 0;
+ for (auto candidate : equivalences) { // Sorted by |dst_offset|.
+ DCHECK_GT(candidate.similarity, 0.0);
+ while (new_key < new_targets.size() &&
+ new_targets[new_key] < candidate.eq.dst_offset) {
+ ++new_key;
+ }
+
+ // Visit each new target covered by |candidate.eq| and find / update its
+ // associated old target.
+ for (; new_key < new_targets.size() &&
+ new_targets[new_key] < candidate.eq.dst_end();
+ ++new_key) {
+ if (backward_association_[new_key].affinity >= candidate.similarity)
+ continue;
+
+ DCHECK_GE(new_targets[new_key], candidate.eq.dst_offset);
+ offset_t old_target = new_targets[new_key] - candidate.eq.dst_offset +
+ candidate.eq.src_offset;
+ auto old_it =
+ std::lower_bound(old_targets.begin(), old_targets.end(), old_target);
+ // If new target can be mapped via |candidate.eq| to an old target, then
+ // attempt to associate them. Multiple new targets can compete for the
+ // same old target. The heuristic here makes selections to maximize
+ // |candidate.similarity|, and if a tie occurs, minimize new target offset
+ // (by first-come, first-served).
+ if (old_it != old_targets.end() && *old_it == old_target) {
+ key_t old_key = static_cast<key_t>(old_it - old_targets.begin());
+ if (candidate.similarity > forward_association_[old_key].affinity) {
+ // Reset other associations.
+ if (forward_association_[old_key].affinity > 0.0)
+ backward_association_[forward_association_[old_key].other] = {};
+ if (backward_association_[new_key].affinity > 0.0)
+ forward_association_[backward_association_[new_key].other] = {};
+ // Assign new association.
+ forward_association_[old_key] = {new_key, candidate.similarity};
+ backward_association_[new_key] = {old_key, candidate.similarity};
+ }
+ }
+ }
+ }
+}
+
+uint32_t TargetsAffinity::AssignLabels(double min_affinity,
+ std::vector<uint32_t>* old_labels,
+ std::vector<uint32_t>* new_labels) {
+ old_labels->assign(forward_association_.size(), kNoLabel);
+ new_labels->assign(backward_association_.size(), kNoLabel);
+
+ uint32_t label = kNoLabel + 1;
+ for (key_t old_key = 0; old_key < forward_association_.size(); ++old_key) {
+ Association association = forward_association_[old_key];
+ if (association.affinity >= min_affinity) {
+ (*old_labels)[old_key] = label;
+ DCHECK_EQ(0U, (*new_labels)[association.other]);
+ (*new_labels)[association.other] = label;
+ ++label;
+ }
+ }
+ return label;
+}
+
+double TargetsAffinity::AffinityBetween(key_t old_key, key_t new_key) const {
+ DCHECK_LT(old_key, forward_association_.size());
+ DCHECK_LT(new_key, backward_association_.size());
+ if (forward_association_[old_key].affinity > 0.0 &&
+ forward_association_[old_key].other == new_key) {
+ DCHECK_EQ(backward_association_[new_key].other, old_key);
+ DCHECK_EQ(forward_association_[old_key].affinity,
+ backward_association_[new_key].affinity);
+ return forward_association_[old_key].affinity;
+ }
+ return -std::max(forward_association_[old_key].affinity,
+ backward_association_[new_key].affinity);
+}
+
+} // namespace zucchini
diff --git a/targets_affinity.h b/targets_affinity.h
new file mode 100644
index 0000000..3a154e7
--- /dev/null
+++ b/targets_affinity.h
@@ -0,0 +1,74 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_TARGETS_AFFINITY_H_
+#define COMPONENTS_ZUCCHINI_TARGETS_AFFINITY_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "base/macros.h"
+#include "components/zucchini/image_utils.h"
+
+namespace zucchini {
+
+class EquivalenceMap;
+
+// Computes and stores affinity between old and new targets for a single target
+// pool. This is only used during patch generation.
+class TargetsAffinity {
+ public:
+ TargetsAffinity();
+ ~TargetsAffinity();
+
+ // Infers affinity between |old_targets| and |new_targets| using similarities
+ // described by |equivalence_map|, and updates internal state for retrieval of
+ // affinity scores. Both |old_targets| and |new_targets| are targets in the
+ // same pool and are sorted in ascending order.
+ void InferFromSimilarities(const EquivalenceMap& equivalence_map,
+ const std::vector<offset_t>& old_targets,
+ const std::vector<offset_t>& new_targets);
+
+ // Assigns labels to targets based on associations previously inferred, using
+ // |min_affinity| to reject associations with weak |affinity|. Label 0 is
+ // assigned to unassociated targets. Labels for old targets are written to
+ // |old_labels| and labels for new targets are written to |new_labels|.
+ // Returns the upper bound on assigned labels (>= 1 since 0 is used).
+ uint32_t AssignLabels(double min_affinity,
+ std::vector<uint32_t>* old_labels,
+ std::vector<uint32_t>* new_labels);
+
+ // Returns the affinity score between targets identified by |old_key| and
+ // |new_keys|. Affinity > 0 means an association is likely, < 0 means
+ // incompatible association, and 0 means neither targets have been associated.
+ double AffinityBetween(key_t old_key, key_t new_key) const;
+
+ private:
+ struct Association {
+ key_t other = 0;
+ double affinity = 0.0;
+ };
+
+ // Forward and backward associations between old and new targets. For each
+ // Association element, if |affinity == 0.0| then no association is defined
+ // (and |other| is meaningless|. Otherwise |affinity > 0.0|, and the
+ // association between |old_labels[old_key]| and |new_labels[new_key]| is
+ // represented by:
+ // forward_association_[old_key].other == new_key;
+ // backward_association_[new_key].other == old_key;
+ // forward_association_[old_key].affinity ==
+ // backward_association_[new_key].affinity;
+ // The two lists contain the same information, but having both enables quick
+ // lookup, given |old_key| or |new_key|.
+ std::vector<Association> forward_association_;
+ std::vector<Association> backward_association_;
+
+ DISALLOW_COPY_AND_ASSIGN(TargetsAffinity);
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_TARGETS_AFFINITY_H_
diff --git a/targets_affinity_unittest.cc b/targets_affinity_unittest.cc
new file mode 100644
index 0000000..86182f9
--- /dev/null
+++ b/targets_affinity_unittest.cc
@@ -0,0 +1,131 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/targets_affinity.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "components/zucchini/equivalence_map.h"
+#include "components/zucchini/image_utils.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+TEST(TargetsAffinityTest, AffinityBetween) {
+ using AffinityVector = std::vector<std::vector<double>>;
+
+ // A common TargetsAffinity is used across independent tests. This is to
+ // reflect actual usage, in which common TargetsAffinity is used so that
+ // internal buffers get reused.
+ TargetsAffinity targets_affinity;
+
+ auto test_affinity = [&targets_affinity](
+ const EquivalenceMap& equivalence_map,
+ const std::vector<offset_t>& old_targets,
+ const std::vector<offset_t>& new_targets) {
+ targets_affinity.InferFromSimilarities(equivalence_map, old_targets,
+ new_targets);
+ AffinityVector affinities(old_targets.size());
+ for (key_t i = 0; i < old_targets.size(); ++i) {
+ for (key_t j = 0; j < new_targets.size(); ++j) {
+ affinities[i].push_back(targets_affinity.AffinityBetween(i, j));
+ }
+ }
+ return affinities;
+ };
+
+ EXPECT_EQ(AffinityVector({}), test_affinity(EquivalenceMap(), {}, {}));
+ EXPECT_EQ(AffinityVector({}),
+ test_affinity(EquivalenceMap({{{0, 0, 8}, 1.0}}), {}, {}));
+
+ EXPECT_EQ(AffinityVector({{0.0, 0.0}, {0.0, 0.0}}),
+ test_affinity(EquivalenceMap(), {0, 10}, {0, 5}));
+
+ EXPECT_EQ(AffinityVector({{1.0, -1.0}, {-1.0, 0.0}}),
+ test_affinity(EquivalenceMap({{{0, 0, 1}, 1.0}}), {0, 10}, {0, 5}));
+
+ EXPECT_EQ(AffinityVector({{1.0, -1.0}, {-1.0, 0.0}}),
+ test_affinity(EquivalenceMap({{{0, 0, 2}, 1.0}}), {1, 10}, {1, 5}));
+
+ EXPECT_EQ(AffinityVector({{0.0, 0.0}, {0.0, 0.0}}),
+ test_affinity(EquivalenceMap({{{0, 1, 2}, 1.0}}), {1, 10}, {1, 5}));
+
+ EXPECT_EQ(AffinityVector({{1.0, -1.0}, {-1.0, 0.0}}),
+ test_affinity(EquivalenceMap({{{0, 1, 2}, 1.0}}), {0, 10}, {1, 5}));
+
+ EXPECT_EQ(AffinityVector({{2.0, -2.0}, {-2.0, 0.0}}),
+ test_affinity(EquivalenceMap({{{0, 0, 1}, 2.0}}), {0, 10}, {0, 5}));
+
+ EXPECT_EQ(
+ AffinityVector({{1.0, -1.0}, {-1.0, 1.0}, {-1.0, -1.0}}),
+ test_affinity(EquivalenceMap({{{0, 0, 6}, 1.0}}), {0, 5, 10}, {0, 5}));
+
+ EXPECT_EQ(AffinityVector({{-2.0, 2.0}, {1.0, -2.0}, {-1.0, -2.0}}),
+ test_affinity(EquivalenceMap({{{5, 0, 2}, 1.0}, {{0, 5, 2}, 2.0}}),
+ {0, 5, 10}, {0, 5}));
+
+ EXPECT_EQ(AffinityVector({{-2.0, 2.0}, {0.0, -2.0}, {0.0, -2.0}}),
+ test_affinity(EquivalenceMap({{{0, 0, 2}, 1.0}, {{0, 5, 2}, 2.0}}),
+ {0, 5, 10}, {0, 5}));
+}
+
+TEST(TargetsAffinityTest, AssignLabels) {
+ // A common TargetsAffinity is used across independent tests. This is to
+ // reflect actual usage, in which common TargetsAffinity is used so that
+ // internal buffers get reused.
+ TargetsAffinity targets_affinity;
+
+ auto test_labels_assignment =
+ [&targets_affinity](const EquivalenceMap& equivalence_map,
+ const std::vector<offset_t>& old_targets,
+ const std::vector<offset_t>& new_targets,
+ double min_affinity,
+ const std::vector<uint32_t>& expected_old_labels,
+ const std::vector<uint32_t>& expected_new_labels) {
+ targets_affinity.InferFromSimilarities(equivalence_map, old_targets,
+ new_targets);
+ std::vector<uint32_t> old_labels;
+ std::vector<uint32_t> new_labels;
+ size_t bound = targets_affinity.AssignLabels(min_affinity, &old_labels,
+ &new_labels);
+ EXPECT_EQ(expected_old_labels, old_labels);
+ EXPECT_EQ(expected_new_labels, new_labels);
+ return bound;
+ };
+
+ EXPECT_EQ(1U, test_labels_assignment(EquivalenceMap(), {}, {}, 1.0, {}, {}));
+ EXPECT_EQ(1U, test_labels_assignment(EquivalenceMap({{{0, 0, 8}, 1.0}}), {},
+ {}, 1.0, {}, {}));
+
+ EXPECT_EQ(1U, test_labels_assignment(EquivalenceMap(), {0, 10}, {0, 5}, 1.0,
+ {0, 0}, {0, 0}));
+
+ EXPECT_EQ(2U, test_labels_assignment(EquivalenceMap({{{0, 0, 1}, 1.0}}),
+ {0, 10}, {0, 5}, 1.0, {1, 0}, {1, 0}));
+ EXPECT_EQ(1U, test_labels_assignment(EquivalenceMap({{{0, 0, 1}, 0.99}}),
+ {0, 10}, {0, 5}, 1.0, {0, 0}, {0, 0}));
+ EXPECT_EQ(1U, test_labels_assignment(EquivalenceMap({{{0, 0, 1}, 1.0}}),
+ {0, 10}, {0, 5}, 1.01, {0, 0}, {0, 0}));
+ EXPECT_EQ(1U, test_labels_assignment(EquivalenceMap({{{0, 0, 1}, 1.0}}),
+ {0, 10}, {0, 5}, 15.0, {0, 0}, {0, 0}));
+ EXPECT_EQ(2U, test_labels_assignment(EquivalenceMap({{{0, 0, 1}, 15.0}}),
+ {0, 10}, {0, 5}, 15.0, {1, 0}, {1, 0}));
+
+ EXPECT_EQ(2U, test_labels_assignment(EquivalenceMap({{{0, 1, 2}, 1.0}}),
+ {0, 10}, {1, 5}, 1.0, {1, 0}, {1, 0}));
+ EXPECT_EQ(
+ 3U, test_labels_assignment(EquivalenceMap({{{0, 0, 6}, 1.0}}), {0, 5, 10},
+ {0, 5}, 1.0, {1, 2, 0}, {1, 2}));
+ EXPECT_EQ(3U, test_labels_assignment(
+ EquivalenceMap({{{5, 0, 2}, 1.0}, {{0, 5, 2}, 2.0}}),
+ {0, 5, 10}, {0, 5}, 1.0, {1, 2, 0}, {2, 1}));
+ EXPECT_EQ(2U, test_labels_assignment(
+ EquivalenceMap({{{0, 0, 2}, 1.0}, {{0, 5, 2}, 2.0}}),
+ {0, 5, 10}, {0, 5}, 1.0, {1, 0, 0}, {0, 1}));
+}
+
+} // namespace zucchini
diff --git a/test_disassembler.cc b/test_disassembler.cc
new file mode 100644
index 0000000..8d59a93
--- /dev/null
+++ b/test_disassembler.cc
@@ -0,0 +1,58 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/test_disassembler.h"
+
+#include "components/zucchini/test_reference_reader.h"
+
+namespace zucchini {
+
+TestDisassembler::TestDisassembler(const ReferenceTypeTraits& traits1,
+ const std::vector<Reference>& refs1,
+ const ReferenceTypeTraits& traits2,
+ const std::vector<Reference>& refs2,
+ const ReferenceTypeTraits& traits3,
+ const std::vector<Reference>& refs3)
+ : traits_{traits1, traits2, traits3}, refs_{refs1, refs2, refs3} {}
+
+TestDisassembler::~TestDisassembler() = default;
+
+ExecutableType TestDisassembler::GetExeType() const {
+ return kExeTypeUnknown;
+}
+
+std::string TestDisassembler::GetExeTypeString() const {
+ return "(Unknown)";
+}
+
+std::vector<ReferenceGroup> TestDisassembler::MakeReferenceGroups() const {
+ return {
+ {traits_[0], &TestDisassembler::MakeReadRefs1,
+ &TestDisassembler::MakeWriteRefs1},
+ {traits_[1], &TestDisassembler::MakeReadRefs2,
+ &TestDisassembler::MakeWriteRefs2},
+ {traits_[2], &TestDisassembler::MakeReadRefs3,
+ &TestDisassembler::MakeWriteRefs3},
+ };
+}
+
+bool TestDisassembler::Parse(ConstBufferView image) {
+ return true;
+}
+
+std::unique_ptr<ReferenceReader> TestDisassembler::MakeReadRefs(int type) {
+ return std::make_unique<TestReferenceReader>(refs_[type]);
+}
+
+std::unique_ptr<ReferenceWriter> TestDisassembler::MakeWriteRefs(
+ MutableBufferView image) {
+ class NoOpWriter : public ReferenceWriter {
+ public:
+ // ReferenceWriter:
+ void PutNext(Reference) override {}
+ };
+ return std::make_unique<NoOpWriter>();
+}
+
+} // namespace zucchini
diff --git a/test_disassembler.h b/test_disassembler.h
new file mode 100644
index 0000000..427ed91
--- /dev/null
+++ b/test_disassembler.h
@@ -0,0 +1,78 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_TEST_DISASSEMBLER_H_
+#define COMPONENTS_ZUCCHINI_TEST_DISASSEMBLER_H_
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "base/macros.h"
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/disassembler.h"
+#include "components/zucchini/image_utils.h"
+
+namespace zucchini {
+
+// A trivial Disassembler that reads injected references of 3 different types.
+// This is only meant for testing and is not a full implementation of a
+// disassembler. Reading reference ignores bounds, and writing references does
+// nothing.
+class TestDisassembler : public Disassembler {
+ public:
+ TestDisassembler(const ReferenceTypeTraits& traits1,
+ const std::vector<Reference>& refs1,
+ const ReferenceTypeTraits& traits2,
+ const std::vector<Reference>& refs2,
+ const ReferenceTypeTraits& traits3,
+ const std::vector<Reference>& refs3);
+ ~TestDisassembler() override;
+
+ // Disassembler:
+ ExecutableType GetExeType() const override;
+ std::string GetExeTypeString() const override;
+ std::vector<ReferenceGroup> MakeReferenceGroups() const override;
+
+ // Disassembler::ReaderFactory:
+ std::unique_ptr<ReferenceReader> MakeReadRefs1(offset_t /*lower*/,
+ offset_t /*upper*/) {
+ return MakeReadRefs(0);
+ }
+ std::unique_ptr<ReferenceReader> MakeReadRefs2(offset_t /*lower*/,
+ offset_t /*upper*/) {
+ return MakeReadRefs(1);
+ }
+ std::unique_ptr<ReferenceReader> MakeReadRefs3(offset_t /*lower*/,
+ offset_t /*upper*/) {
+ return MakeReadRefs(2);
+ }
+
+ // Disassembler::WriterFactory:
+ std::unique_ptr<ReferenceWriter> MakeWriteRefs1(MutableBufferView image) {
+ return MakeWriteRefs(image);
+ }
+ std::unique_ptr<ReferenceWriter> MakeWriteRefs2(MutableBufferView image) {
+ return MakeWriteRefs(image);
+ }
+ std::unique_ptr<ReferenceWriter> MakeWriteRefs3(MutableBufferView image) {
+ return MakeWriteRefs(image);
+ }
+
+ private:
+ // Disassembler:
+ bool Parse(ConstBufferView image) override;
+
+ std::unique_ptr<ReferenceReader> MakeReadRefs(int type);
+ std::unique_ptr<ReferenceWriter> MakeWriteRefs(MutableBufferView image);
+
+ ReferenceTypeTraits traits_[3];
+ std::vector<Reference> refs_[3];
+
+ DISALLOW_COPY_AND_ASSIGN(TestDisassembler);
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_TEST_DISASSEMBLER_H_
diff --git a/test_reference_reader.cc b/test_reference_reader.cc
new file mode 100644
index 0000000..5517fa0
--- /dev/null
+++ b/test_reference_reader.cc
@@ -0,0 +1,20 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/test_reference_reader.h"
+
+namespace zucchini {
+
+TestReferenceReader::TestReferenceReader(const std::vector<Reference>& refs)
+ : references_(refs) {}
+
+TestReferenceReader::~TestReferenceReader() = default;
+
+base::Optional<Reference> TestReferenceReader::GetNext() {
+ if (index_ == references_.size())
+ return base::nullopt;
+ return references_[index_++];
+}
+
+} // namespace zucchini
diff --git a/test_reference_reader.h b/test_reference_reader.h
new file mode 100644
index 0000000..afae188
--- /dev/null
+++ b/test_reference_reader.h
@@ -0,0 +1,32 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_TEST_REFERENCE_READER_H_
+#define COMPONENTS_ZUCCHINI_TEST_REFERENCE_READER_H_
+
+#include <stddef.h>
+
+#include <vector>
+
+#include "base/optional.h"
+#include "components/zucchini/image_utils.h"
+
+namespace zucchini {
+
+// A trivial ReferenceReader that reads injected references.
+class TestReferenceReader : public ReferenceReader {
+ public:
+ explicit TestReferenceReader(const std::vector<Reference>& refs);
+ ~TestReferenceReader() override;
+
+ base::Optional<Reference> GetNext() override;
+
+ private:
+ std::vector<Reference> references_;
+ size_t index_ = 0;
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_TEST_REFERENCE_READER_H_
diff --git a/test_utils.cc b/test_utils.cc
new file mode 100644
index 0000000..91c8a39
--- /dev/null
+++ b/test_utils.cc
@@ -0,0 +1,26 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/test_utils.h"
+
+#include <ios>
+#include <sstream>
+
+#include "base/logging.h"
+
+namespace zucchini {
+
+std::vector<uint8_t> ParseHexString(const std::string& hex_string) {
+ std::vector<uint8_t> ret;
+ std::istringstream iss(hex_string);
+ iss >> std::hex;
+ uint32_t temp = 0; // Cannot be uint8_t: istringstream treats this as char!
+ while (iss >> temp) {
+ CHECK_LE(temp, 0xFFU);
+ ret.push_back(temp);
+ }
+ return ret;
+}
+
+} // namespace zucchini
diff --git a/test_utils.h b/test_utils.h
new file mode 100644
index 0000000..7ed735d
--- /dev/null
+++ b/test_utils.h
@@ -0,0 +1,20 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_TEST_UTILS_H_
+#define COMPONENTS_ZUCCHINI_TEST_UTILS_H_
+
+#include <stdint.h>
+
+#include <string>
+#include <vector>
+
+namespace zucchini {
+
+// Parses space-separated list of byte hex values into list.
+std::vector<uint8_t> ParseHexString(const std::string& hex_string);
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_TEST_UTILS_H_
diff --git a/testdata/chrome64_1.exe.sha1 b/testdata/chrome64_1.exe.sha1
new file mode 100644
index 0000000..9b4f113
--- /dev/null
+++ b/testdata/chrome64_1.exe.sha1
@@ -0,0 +1 @@
+4970ef6f342f6a0da9ae7a4ed462f93ef68f142c \ No newline at end of file
diff --git a/testdata/chrome64_2.exe.sha1 b/testdata/chrome64_2.exe.sha1
new file mode 100644
index 0000000..e4a96a2
--- /dev/null
+++ b/testdata/chrome64_2.exe.sha1
@@ -0,0 +1 @@
+c3a974589d50956a3c8c17572fee078b9276ad9b \ No newline at end of file
diff --git a/testdata/patch_fuzzer/empty.zuc b/testdata/patch_fuzzer/empty.zuc
new file mode 100644
index 0000000..34b2d66
--- /dev/null
+++ b/testdata/patch_fuzzer/empty.zuc
Binary files differ
diff --git a/testdata/setup1.exe.sha1 b/testdata/setup1.exe.sha1
new file mode 100644
index 0000000..2304621
--- /dev/null
+++ b/testdata/setup1.exe.sha1
@@ -0,0 +1 @@
+5d0e8fed8e9e091e184adb2e2e0e668def9cd2c5 \ No newline at end of file
diff --git a/testdata/setup2.exe.sha1 b/testdata/setup2.exe.sha1
new file mode 100644
index 0000000..9fa4d0c
--- /dev/null
+++ b/testdata/setup2.exe.sha1
@@ -0,0 +1 @@
+12194273e8d509b6e81e4a6b63621081e1426028 \ No newline at end of file
diff --git a/type_win_pe.h b/type_win_pe.h
new file mode 100644
index 0000000..d385ca7
--- /dev/null
+++ b/type_win_pe.h
@@ -0,0 +1,188 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_TYPE_WIN_PE_H_
+#define COMPONENTS_ZUCCHINI_TYPE_WIN_PE_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+namespace zucchini {
+
+// Structures and constants taken from WINNT.h and following identical layout.
+// This is used for parsing of Portable Executable (PE) file format.
+namespace pe {
+// Supported by MSVC, g++, and clang++. Ensures no gaps in packing.
+#pragma pack(push, 1)
+
+// IMAGE_NUMBEROF_DIRECTORY_ENTRIES
+constexpr size_t kImageNumberOfDirectoryEntries = 16;
+
+// IMAGE_FILE_BASE_RELOCATION_TABLE
+constexpr size_t kIndexOfBaseRelocationTable = 5;
+
+constexpr uint32_t kImageScnMemExecute = 0x20000000; // IMAGE_SCN_MEM_EXECUTE
+constexpr uint32_t kImageScnMemRead = 0x40000000; // IMAGE_SCN_MEM_READ
+
+// IMAGE_DOS_HEADER
+struct ImageDOSHeader {
+ uint16_t e_magic; // 0x00
+ uint16_t e_cblp;
+ uint16_t e_cp;
+ uint16_t e_crlc;
+ uint16_t e_cparhdr;
+ uint16_t e_minalloc;
+ uint16_t e_maxalloc;
+ uint16_t e_ss;
+ uint16_t e_sp; // 0x10
+ uint16_t e_csum;
+ uint16_t e_ip;
+ uint16_t e_cs;
+ uint16_t e_lfarlc;
+ uint16_t e_ovno;
+ uint16_t e_res[4];
+ uint16_t e_oemid; // 0x24
+ uint16_t e_oeminfo;
+ uint16_t e_res2[10];
+ uint32_t e_lfanew; // 0x3C
+};
+static_assert(sizeof(ImageDOSHeader) == 0x40,
+ "DOS header size should be 0x40 bytes");
+
+// IMAGE_SECTION_HEADER
+struct ImageSectionHeader {
+ char name[8];
+ uint32_t virtual_size;
+ uint32_t virtual_address;
+ uint32_t size_of_raw_data;
+ uint32_t file_offset_of_raw_data;
+ uint32_t pointer_to_relocations; // Always zero in an image.
+ uint32_t pointer_to_line_numbers; // Always zero in an image.
+ uint16_t number_of_relocations; // Always zero in an image.
+ uint16_t number_of_line_numbers; // Always zero in an image.
+ uint32_t characteristics;
+};
+static_assert(sizeof(ImageSectionHeader) == 0x28,
+ "Section header size should be 0x28 bytes");
+
+// IMAGE_DATA_DIRECTORY
+struct ImageDataDirectory {
+ uint32_t virtual_address;
+ uint32_t size;
+};
+static_assert(sizeof(ImageDataDirectory) == 0x08,
+ "Data directory size should be 0x08 bytes");
+
+// IMAGE_FILE_HEADER
+struct ImageFileHeader {
+ uint16_t machine;
+ uint16_t number_of_sections;
+ uint32_t time_date_stamp;
+ uint32_t pointer_to_symbol_table;
+ uint32_t number_of_symbols;
+ uint16_t size_of_optional_header;
+ uint16_t characteristics;
+};
+static_assert(sizeof(ImageFileHeader) == 0x14,
+ "File header size should be 0x14 bytes");
+
+// IMAGE_OPTIONAL_HEADER
+struct ImageOptionalHeader {
+ uint16_t magic; // 0x00: 0x10B
+ uint8_t major_linker_version;
+ uint8_t minor_linker_version;
+ uint32_t size_of_code;
+ uint32_t size_of_initialized_data;
+ uint32_t size_of_uninitialized_data;
+ uint32_t address_of_entry_point; // 0x10
+ uint32_t base_of_code;
+ uint32_t base_of_data;
+
+ uint32_t image_base;
+ uint32_t section_alignment; // 0x20
+ uint32_t file_alignment;
+ uint16_t major_operating_system_version;
+ uint16_t minor_operating_system_version;
+ uint16_t major_image_version;
+ uint16_t minor_image_version;
+ uint16_t major_subsystem_version; // 0x30
+ uint16_t minor_subsystem_version;
+ uint32_t win32_version_value;
+ uint32_t size_of_image;
+ uint32_t size_of_headers;
+ uint32_t check_sum; // 0x40
+ uint16_t subsystem;
+ uint16_t dll_characteristics;
+ uint32_t size_of_stack_reserve;
+ uint32_t size_of_stack_commit;
+ uint32_t size_of_heap_reserve; // 0x50
+ uint32_t size_of_heap_commit;
+ uint32_t loader_flags;
+ uint32_t number_of_rva_and_sizes;
+ ImageDataDirectory data_directory[kImageNumberOfDirectoryEntries]; // 0x60
+ /* 0xE0 */
+};
+static_assert(sizeof(ImageOptionalHeader) == 0xE0,
+ "Optional header (32) size should be 0xE0 bytes");
+
+// IMAGE_OPTIONAL_HEADER64
+struct ImageOptionalHeader64 {
+ uint16_t magic; // 0x00: 0x20B
+ uint8_t major_linker_version;
+ uint8_t minor_linker_version;
+ uint32_t size_of_code;
+ uint32_t size_of_initialized_data;
+ uint32_t size_of_uninitialized_data;
+ uint32_t address_of_entry_point; // 0x10
+ uint32_t base_of_code;
+
+ uint64_t image_base;
+ uint32_t section_alignment; // 0x20
+ uint32_t file_alignment;
+ uint16_t major_operating_system_version;
+ uint16_t minor_operating_system_version;
+ uint16_t major_image_version;
+ uint16_t minor_image_version;
+ uint16_t major_subsystem_version; // 0x30
+ uint16_t minor_subsystem_version;
+ uint32_t win32_version_value;
+ uint32_t size_of_image;
+ uint32_t size_of_headers;
+ uint32_t check_sum; // 0x40
+ uint16_t subsystem;
+ uint16_t dll_characteristics;
+ uint64_t size_of_stack_reserve;
+ uint64_t size_of_stack_commit; // 0x50
+ uint64_t size_of_heap_reserve;
+ uint64_t size_of_heap_commit; // 0x60
+ uint32_t loader_flags;
+ uint32_t number_of_rva_and_sizes;
+ ImageDataDirectory data_directory[kImageNumberOfDirectoryEntries]; // 0x70
+ /* 0xF0 */
+};
+static_assert(sizeof(ImageOptionalHeader64) == 0xF0,
+ "Optional header (64) size should be 0xF0 bytes");
+
+struct RelocHeader {
+ uint32_t rva_hi;
+ uint32_t size;
+};
+static_assert(sizeof(RelocHeader) == 8, "RelocHeader size should be 8 bytes");
+
+#pragma pack(pop)
+
+} // namespace pe
+
+// Constants and offsets gleaned from WINNT.h and various articles on the
+// format of Windows PE executables.
+
+constexpr char const* kTextSectionName = ".text";
+
+// Bitfield with characteristics usually associated with code sections.
+const uint32_t kCodeCharacteristics =
+ pe::kImageScnMemExecute | pe::kImageScnMemRead;
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_TYPE_WIN_PE_H_
diff --git a/typed_value.h b/typed_value.h
new file mode 100644
index 0000000..868397c
--- /dev/null
+++ b/typed_value.h
@@ -0,0 +1,57 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_TYPED_VALUE_H_
+#define COMPONENTS_ZUCCHINI_TYPED_VALUE_H_
+
+#include <ostream>
+
+namespace zucchini {
+
+// Strong typed values, with compare and convert functions for underlying data.
+// Typically one would use strongly typed enums for this. However, for Zucchini,
+// the number of bytes is not fixed, and must be represented as an integer for
+// iteration.
+// |Tag| is a type tag used to uniquely identify TypedValue.
+// |T| is an integral type used to hold values.
+// Example:
+// struct Foo : TypedValue<Foo, int> {
+// using Foo::TypedValue::TypedValue; // inheriting constructor.
+// };
+// Foo will be used to hold values of type |int|, but with a distinct type from
+// any other TypedValue.
+template <class Tag, class T>
+class TypedValue {
+ public:
+ constexpr TypedValue() = default;
+ explicit constexpr TypedValue(const T& value) : value_(value) {}
+
+ explicit operator T() const { return value_; }
+ const T value() const { return value_; }
+
+ friend bool operator==(const TypedValue& a, const TypedValue& b) {
+ return a.value_ == b.value_;
+ }
+ friend bool operator!=(const TypedValue& a, const TypedValue& b) {
+ return !(a == b);
+ }
+ friend bool operator<(const TypedValue& a, const TypedValue& b) {
+ return a.value_ < b.value_;
+ }
+ friend bool operator>(const TypedValue& a, const TypedValue& b) {
+ return b < a;
+ }
+
+ private:
+ T value_ = {};
+};
+
+template <class Tag, class T>
+std::ostream& operator<<(std::ostream& os, const TypedValue<Tag, T>& tag) {
+ return os << tag.value();
+}
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_TYPED_VALUE_H_
diff --git a/typed_value_unittest.cc b/typed_value_unittest.cc
new file mode 100644
index 0000000..bc0d4f1
--- /dev/null
+++ b/typed_value_unittest.cc
@@ -0,0 +1,40 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/typed_value.h"
+
+#include <type_traits>
+
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+struct ValueA : TypedValue<ValueA, int> {
+ using ValueA::TypedValue::TypedValue;
+};
+
+struct ValueB : TypedValue<ValueB, int> {
+ using ValueB::TypedValue::TypedValue;
+};
+
+TEST(TypedIdTest, Value) {
+ EXPECT_EQ(42, ValueA(42).value());
+ EXPECT_EQ(42, static_cast<int>(ValueA(42))); // explicit cast
+}
+
+TEST(TypedIdTest, Comparison) {
+ EXPECT_TRUE(ValueA(0) == ValueA(0));
+ EXPECT_FALSE(ValueA(0) == ValueA(42));
+ EXPECT_FALSE(ValueA(0) != ValueA(0));
+ EXPECT_TRUE(ValueA(0) != ValueA(42));
+}
+
+TEST(TypedIdTest, StrongType) {
+ static_assert(!std::is_convertible<ValueA, ValueB>::value,
+ "ValueA should not be convertible to ValueB");
+ static_assert(!std::is_convertible<ValueB, ValueA>::value,
+ "ValueB should not be convertible to ValueA");
+}
+
+} // namespace zucchini
diff --git a/zucchini.h b/zucchini.h
new file mode 100644
index 0000000..9100709
--- /dev/null
+++ b/zucchini.h
@@ -0,0 +1,54 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_ZUCCHINI_H_
+#define COMPONENTS_ZUCCHINI_ZUCCHINI_H_
+
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/patch_reader.h"
+#include "components/zucchini/patch_writer.h"
+
+// Definitions, structures, and interfaces for the Zucchini library.
+
+namespace zucchini {
+
+namespace status {
+
+// Zucchini status code, which can also be used as process exit code. Therefore
+// success is explicitly 0.
+enum Code {
+ kStatusSuccess = 0,
+ kStatusInvalidParam = 1,
+ kStatusFileReadError = 2,
+ kStatusFileWriteError = 3,
+ kStatusPatchReadError = 4,
+ kStatusPatchWriteError = 5,
+ kStatusInvalidOldImage = 6,
+ kStatusInvalidNewImage = 7,
+ kStatusFatal = 8,
+};
+
+} // namespace status
+
+// Generates ensemble patch from |old_image| to |new_image|, and writes it to
+// |patch_writer|.
+status::Code GenerateEnsemble(ConstBufferView old_image,
+ ConstBufferView new_image,
+ EnsemblePatchWriter* patch_writer);
+
+// Generates raw patch from |old_image| to |new_image|, and writes it to
+// |patch_writer|.
+status::Code GenerateRaw(ConstBufferView old_image,
+ ConstBufferView new_image,
+ EnsemblePatchWriter* patch_writer);
+
+// Applies |patch_reader| to |old_image| to build |new_image|, which refers to
+// preallocated memory of sufficient size.
+status::Code Apply(ConstBufferView old_image,
+ const EnsemblePatchReader& patch_reader,
+ MutableBufferView new_image);
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_ZUCCHINI_H_
diff --git a/zucchini_apply.cc b/zucchini_apply.cc
new file mode 100644
index 0000000..1532874
--- /dev/null
+++ b/zucchini_apply.cc
@@ -0,0 +1,202 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/zucchini_apply.h"
+
+#include <algorithm>
+#include <map>
+#include <memory>
+#include <utility>
+
+#include "base/logging.h"
+#include "components/zucchini/disassembler.h"
+#include "components/zucchini/element_detection.h"
+#include "components/zucchini/equivalence_map.h"
+#include "components/zucchini/image_index.h"
+
+namespace zucchini {
+
+bool ApplyEquivalenceAndExtraData(ConstBufferView old_image,
+ const PatchElementReader& patch_reader,
+ MutableBufferView new_image) {
+ EquivalenceSource equiv_source = patch_reader.GetEquivalenceSource();
+ ExtraDataSource extra_data_source = patch_reader.GetExtraDataSource();
+ MutableBufferView::iterator dst_it = new_image.begin();
+
+ for (auto equivalence = equiv_source.GetNext(); equivalence.has_value();
+ equivalence = equiv_source.GetNext()) {
+ // TODO(etiennep): Guard against out of range errors and return false
+ // instead.
+ MutableBufferView::iterator next_dst_it =
+ new_image.begin() + equivalence->dst_offset;
+ CHECK(next_dst_it >= dst_it);
+ offset_t gap = static_cast<offset_t>(next_dst_it - dst_it);
+ base::Optional<ConstBufferView> extra_data = extra_data_source.GetNext(gap);
+ if (!extra_data) {
+ LOG(ERROR) << "Error reading extra_data";
+ return false;
+ }
+ dst_it = std::copy(extra_data->begin(), extra_data->end(), dst_it);
+ CHECK_EQ(dst_it, next_dst_it);
+ dst_it = std::copy_n(old_image.begin() + equivalence->src_offset,
+ equivalence->length, dst_it);
+ CHECK_EQ(dst_it, next_dst_it + equivalence->length);
+ }
+ offset_t gap = static_cast<offset_t>(new_image.end() - dst_it);
+ base::Optional<ConstBufferView> extra_data = extra_data_source.GetNext(gap);
+ if (!extra_data) {
+ LOG(ERROR) << "Error reading extra_data";
+ return false;
+ }
+ std::copy(extra_data->begin(), extra_data->end(), dst_it);
+ if (!equiv_source.Done() || !extra_data_source.Done()) {
+ LOG(ERROR) << "Found trailing equivalence and extra_data";
+ return false;
+ }
+ return true;
+}
+
+bool ApplyRawDelta(const PatchElementReader& patch_reader,
+ MutableBufferView new_image) {
+ EquivalenceSource equiv_source = patch_reader.GetEquivalenceSource();
+ RawDeltaSource raw_delta_source = patch_reader.GetRawDeltaSource();
+ // Traverse |equiv_source| and |raw_delta_source| in lockstep.
+ auto equivalence = equiv_source.GetNext();
+ offset_t base_copy_offset = 0;
+ for (auto delta = raw_delta_source.GetNext(); delta.has_value();
+ delta = raw_delta_source.GetNext()) {
+ while (equivalence.has_value() &&
+ base_copy_offset + equivalence->length <= delta->copy_offset) {
+ base_copy_offset += equivalence->length;
+ equivalence = equiv_source.GetNext();
+ }
+ if (!equivalence.has_value()) {
+ LOG(ERROR) << "Error reading equivalences";
+ return false;
+ }
+ CHECK_GE(delta->copy_offset, base_copy_offset);
+ CHECK_LT(delta->copy_offset, base_copy_offset + equivalence->length);
+
+ // Invert byte diff.
+ new_image[equivalence->dst_offset - base_copy_offset +
+ delta->copy_offset] += delta->diff;
+ }
+ if (!raw_delta_source.Done()) {
+ LOG(ERROR) << "Found trailing raw_delta";
+ return false;
+ }
+ return true;
+}
+
+bool ApplyReferencesCorrection(ExecutableType exe_type,
+ ConstBufferView old_image,
+ const PatchElementReader& patch,
+ MutableBufferView new_image) {
+ auto old_disasm = MakeDisassemblerOfType(old_image, exe_type);
+ auto new_disasm =
+ MakeDisassemblerOfType(ConstBufferView(new_image), exe_type);
+ if (!old_disasm || !new_disasm) {
+ LOG(ERROR) << "Failed to create Disassembler";
+ return false;
+ }
+
+ ReferenceDeltaSource ref_delta_source = patch.GetReferenceDeltaSource();
+ std::map<PoolTag, std::vector<ReferenceGroup>> pool_groups;
+ for (const auto& ref_group : old_disasm->MakeReferenceGroups())
+ pool_groups[ref_group.pool_tag()].push_back(ref_group);
+
+ OffsetMapper offset_mapper(patch.GetEquivalenceSource());
+
+ std::vector<ReferenceGroup> new_groups = new_disasm->MakeReferenceGroups();
+ for (const auto& pool_and_sub_groups : pool_groups) {
+ PoolTag pool_tag = pool_and_sub_groups.first;
+ const std::vector<ReferenceGroup>& sub_groups = pool_and_sub_groups.second;
+
+ TargetPool targets;
+ // Load "old" targets, then filter and map them to "new" targets.
+ for (ReferenceGroup group : sub_groups)
+ targets.InsertTargets(std::move(*group.GetReader(old_disasm.get())));
+ targets.FilterAndProject(offset_mapper);
+
+ // Load extra targets from patch.
+ TargetSource target_source = patch.GetExtraTargetSource(pool_tag);
+ targets.InsertTargets(&target_source);
+ if (!target_source.Done()) {
+ LOG(ERROR) << "Found trailing extra_targets";
+ return false;
+ }
+
+ // Correct all new references, and write results to |new_disasm|.
+ for (ReferenceGroup group : sub_groups) {
+ std::unique_ptr<ReferenceWriter> ref_writer =
+ new_groups[group.type_tag().value()].GetWriter(new_image,
+ new_disasm.get());
+
+ EquivalenceSource equiv_source = patch.GetEquivalenceSource();
+ for (auto equivalence = equiv_source.GetNext(); equivalence.has_value();
+ equivalence = equiv_source.GetNext()) {
+ std::unique_ptr<ReferenceReader> ref_gen = group.GetReader(
+ equivalence->src_offset, equivalence->src_end(), old_disasm.get());
+ for (auto ref = ref_gen->GetNext(); ref.has_value();
+ ref = ref_gen->GetNext()) {
+ DCHECK_GE(ref->location, equivalence->src_offset);
+ DCHECK_LT(ref->location, equivalence->src_end());
+
+ offset_t projected_target = offset_mapper.ForwardProject(ref->target);
+ offset_t expected_key = targets.KeyForNearestOffset(projected_target);
+ auto delta = ref_delta_source.GetNext();
+ if (!delta.has_value()) {
+ LOG(ERROR) << "Error reading reference_delta";
+ return false;
+ }
+ ref->target = targets.OffsetForKey(expected_key + delta.value());
+ ref->location =
+ ref->location - equivalence->src_offset + equivalence->dst_offset;
+ ref_writer->PutNext(*ref);
+ }
+ }
+ }
+ }
+ if (!ref_delta_source.Done()) {
+ LOG(ERROR) << "Found trailing ref_delta_source";
+ return false;
+ }
+ return true;
+}
+
+bool ApplyElement(ExecutableType exe_type,
+ ConstBufferView old_image,
+ const PatchElementReader& patch_reader,
+ MutableBufferView new_image) {
+ return ApplyEquivalenceAndExtraData(old_image, patch_reader, new_image) &&
+ ApplyRawDelta(patch_reader, new_image) &&
+ ApplyReferencesCorrection(exe_type, old_image, patch_reader,
+ new_image);
+}
+
+/******** Exported Functions ********/
+
+status::Code Apply(ConstBufferView old_image,
+ const EnsemblePatchReader& patch_reader,
+ MutableBufferView new_image) {
+ if (!patch_reader.CheckOldFile(old_image)) {
+ LOG(ERROR) << "Invalid old_image.";
+ return status::kStatusInvalidOldImage;
+ }
+
+ for (const auto& element_patch : patch_reader.elements()) {
+ ElementMatch match = element_patch.element_match();
+ if (!ApplyElement(match.exe_type(), old_image[match.old_element.region()],
+ element_patch, new_image[match.new_element.region()]))
+ return status::kStatusFatal;
+ }
+
+ if (!patch_reader.CheckNewFile(ConstBufferView(new_image))) {
+ LOG(ERROR) << "Invalid new_image.";
+ return status::kStatusInvalidNewImage;
+ }
+ return status::kStatusSuccess;
+}
+
+} // namespace zucchini
diff --git a/zucchini_apply.h b/zucchini_apply.h
new file mode 100644
index 0000000..559812e
--- /dev/null
+++ b/zucchini_apply.h
@@ -0,0 +1,43 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_ZUCCHINI_APPLY_H_
+#define COMPONENTS_ZUCCHINI_ZUCCHINI_APPLY_H_
+
+#include <vector>
+
+#include "components/zucchini/image_utils.h"
+#include "components/zucchini/patch_reader.h"
+#include "components/zucchini/zucchini.h"
+
+namespace zucchini {
+
+// Reads equivalences from |patch_reader| to form preliminary |new_image|,
+// copying regions from |old_image| and writing extra data from |patch_reader|.
+bool ApplyEquivalenceAndExtraData(ConstBufferView old_image,
+ const PatchElementReader& patch_reader,
+ MutableBufferView new_image);
+
+// Reads raw delta from |patch_reader| and applies corrections to |new_image|.
+bool ApplyRawDelta(const PatchElementReader& patch_reader,
+ MutableBufferView new_image);
+
+// Corrects references in |new_image| by projecting references from |old_image|
+// and applying corrections from |patch_reader|. Both |old_image| and
+// |new_image| are matching elements associated with |exe_type|.
+bool ApplyReferencesCorrection(ExecutableType exe_type,
+ ConstBufferView old_image,
+ const PatchElementReader& patch_reader,
+ MutableBufferView new_image);
+
+// Applies patch element with type |exe_type| from |patch_reader| on |old_image|
+// to produce |new_image|.
+bool ApplyElement(ExecutableType exe_type,
+ ConstBufferView old_image,
+ const PatchElementReader& patch_reader,
+ MutableBufferView new_image);
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_ZUCCHINI_APPLY_H_
diff --git a/zucchini_apply_unittest.cc b/zucchini_apply_unittest.cc
new file mode 100644
index 0000000..7e26b7b
--- /dev/null
+++ b/zucchini_apply_unittest.cc
@@ -0,0 +1,22 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/zucchini_apply.h"
+
+#include <vector>
+
+#include "components/zucchini/image_utils.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+namespace {
+
+using OffsetVector = std::vector<offset_t>;
+
+} // namespace
+
+// TODO(huangs): Add more tests.
+
+} // namespace zucchini
diff --git a/zucchini_commands.cc b/zucchini_commands.cc
new file mode 100644
index 0000000..60b87cb
--- /dev/null
+++ b/zucchini_commands.cc
@@ -0,0 +1,176 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/zucchini_commands.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <iostream>
+#include <ostream>
+#include <utility>
+
+#include "base/command_line.h"
+#include "base/files/file.h"
+#include "base/files/file_path.h"
+#include "base/files/memory_mapped_file.h"
+#include "base/logging.h"
+#include "base/macros.h"
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/crc32.h"
+#include "components/zucchini/io_utils.h"
+#include "components/zucchini/mapped_file.h"
+#include "components/zucchini/patch_writer.h"
+#include "components/zucchini/zucchini_integration.h"
+#include "components/zucchini/zucchini_tools.h"
+
+namespace {
+
+/******** Command-line Switches ********/
+
+constexpr char kSwitchDump[] = "dump";
+constexpr char kSwitchRaw[] = "raw";
+
+} // namespace
+
+zucchini::status::Code MainGen(MainParams params) {
+ CHECK_EQ(3U, params.file_paths.size());
+
+ // TODO(huangs): Move implementation to zucchini_integration.cc.
+ using base::File;
+ File old_file(params.file_paths[0], File::FLAG_OPEN | File::FLAG_READ);
+ zucchini::MappedFileReader old_image(std::move(old_file));
+ if (old_image.HasError()) {
+ LOG(ERROR) << "Error with file " << params.file_paths[0].value() << ": "
+ << old_image.error();
+ return zucchini::status::kStatusFileReadError;
+ }
+ File new_file(params.file_paths[1], File::FLAG_OPEN | File::FLAG_READ);
+ zucchini::MappedFileReader new_image(std::move(new_file));
+ if (new_image.HasError()) {
+ LOG(ERROR) << "Error with file " << params.file_paths[1].value() << ": "
+ << new_image.error();
+ return zucchini::status::kStatusFileReadError;
+ }
+ zucchini::EnsemblePatchWriter patch_writer(old_image.region(),
+ new_image.region());
+
+ auto generate = params.command_line.HasSwitch(kSwitchRaw)
+ ? zucchini::GenerateRaw
+ : zucchini::GenerateEnsemble;
+ zucchini::status::Code result =
+ generate(old_image.region(), new_image.region(), &patch_writer);
+ if (result != zucchini::status::kStatusSuccess) {
+ params.out << "Fatal error encountered when generating patch." << std::endl;
+ return result;
+ }
+
+ // By default, delete patch on destruction, to avoid having lingering files in
+ // case of a failure. On Windows deletion can be done by the OS.
+ File patch_file(params.file_paths[2], File::FLAG_CREATE_ALWAYS |
+ File::FLAG_READ | File::FLAG_WRITE |
+ File::FLAG_SHARE_DELETE |
+ File::FLAG_CAN_DELETE_ON_CLOSE);
+ zucchini::MappedFileWriter patch(params.file_paths[2], std::move(patch_file),
+ patch_writer.SerializedSize());
+ if (patch.HasError()) {
+ LOG(ERROR) << "Error with file " << params.file_paths[2].value() << ": "
+ << patch.error();
+ return zucchini::status::kStatusFileWriteError;
+ }
+
+ if (!patch_writer.SerializeInto(patch.region()))
+ return zucchini::status::kStatusPatchWriteError;
+
+ // Successfully created patch. Explicitly request file to be kept.
+ if (!patch.Keep())
+ return zucchini::status::kStatusFileWriteError;
+ return zucchini::status::kStatusSuccess;
+}
+
+zucchini::status::Code MainApply(MainParams params) {
+ CHECK_EQ(3U, params.file_paths.size());
+ return zucchini::Apply(params.file_paths[0], params.file_paths[1],
+ params.file_paths[2]);
+}
+
+zucchini::status::Code MainRead(MainParams params) {
+ CHECK_EQ(1U, params.file_paths.size());
+ base::File input_file(params.file_paths[0],
+ base::File::FLAG_OPEN | base::File::FLAG_READ);
+ zucchini::MappedFileReader input(std::move(input_file));
+ if (input.HasError()) {
+ LOG(ERROR) << "Error with file " << params.file_paths[0].value() << ": "
+ << input.error();
+ return zucchini::status::kStatusFileReadError;
+ }
+
+ bool do_dump = params.command_line.HasSwitch(kSwitchDump);
+ zucchini::status::Code status = zucchini::ReadReferences(
+ {input.data(), input.length()}, do_dump, params.out);
+ if (status != zucchini::status::kStatusSuccess)
+ params.err << "Fatal error found when dumping references." << std::endl;
+ return status;
+}
+
+zucchini::status::Code MainDetect(MainParams params) {
+ CHECK_EQ(1U, params.file_paths.size());
+ base::File input_file(params.file_paths[0],
+ base::File::FLAG_OPEN | base::File::FLAG_READ);
+ zucchini::MappedFileReader input(std::move(input_file));
+ if (input.HasError()) {
+ LOG(ERROR) << "Error with file " << params.file_paths[0].value() << ": "
+ << input.error();
+ return zucchini::status::kStatusFileReadError;
+ }
+
+ std::vector<zucchini::ConstBufferView> sub_image_list;
+ zucchini::status::Code result = zucchini::DetectAll(
+ {input.data(), input.length()}, params.out, &sub_image_list);
+ if (result != zucchini::status::kStatusSuccess)
+ params.err << "Fatal error found when detecting executables." << std::endl;
+ return result;
+}
+
+zucchini::status::Code MainMatch(MainParams params) {
+ CHECK_EQ(2U, params.file_paths.size());
+ using base::File;
+ File old_file(params.file_paths[0], File::FLAG_OPEN | File::FLAG_READ);
+ zucchini::MappedFileReader old_image(std::move(old_file));
+ if (old_image.HasError()) {
+ LOG(ERROR) << "Error with file " << params.file_paths[0].value() << ": "
+ << old_image.error();
+ return zucchini::status::kStatusFileReadError;
+ }
+ File new_file(params.file_paths[1], File::FLAG_OPEN | File::FLAG_READ);
+ zucchini::MappedFileReader new_image(std::move(new_file));
+ if (old_image.HasError()) {
+ LOG(ERROR) << "Error with file " << params.file_paths[1].value() << ": "
+ << new_image.error();
+ return zucchini::status::kStatusFileReadError;
+ }
+ zucchini::status::Code status =
+ zucchini::MatchAll({old_image.data(), old_image.length()},
+ {new_image.data(), new_image.length()}, params.out);
+ if (status != zucchini::status::kStatusSuccess)
+ params.err << "Fatal error found when matching executables." << std::endl;
+ return status;
+}
+
+zucchini::status::Code MainCrc32(MainParams params) {
+ CHECK_EQ(1U, params.file_paths.size());
+ base::File image_file(params.file_paths[0],
+ base::File::FLAG_OPEN | base::File::FLAG_READ);
+ zucchini::MappedFileReader image(std::move(image_file));
+ if (image.HasError()) {
+ LOG(ERROR) << "Error with file " << params.file_paths[0].value() << ": "
+ << image.error();
+ return zucchini::status::kStatusFileReadError;
+ }
+
+ uint32_t crc =
+ zucchini::CalculateCrc32(image.data(), image.data() + image.length());
+ params.out << "CRC32: " << zucchini::AsHex<8>(crc) << std::endl;
+ return zucchini::status::kStatusSuccess;
+}
diff --git a/zucchini_commands.h b/zucchini_commands.h
new file mode 100644
index 0000000..cef18dc
--- /dev/null
+++ b/zucchini_commands.h
@@ -0,0 +1,51 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_ZUCCHINI_COMMANDS_H_
+#define COMPONENTS_ZUCCHINI_ZUCCHINI_COMMANDS_H_
+
+#include <iosfwd>
+#include <vector>
+
+#include "base/files/file_path.h"
+#include "components/zucchini/zucchini.h"
+
+// Zucchini commands and tools that can be invoked from command-line.
+
+namespace base {
+
+class CommandLine;
+
+} // namespace base
+
+// Aggregated parameter for Main*() functions, to simplify interface.
+struct MainParams {
+ const base::CommandLine& command_line;
+ const std::vector<base::FilePath>& file_paths;
+ std::ostream& out;
+ std::ostream& err;
+};
+
+// Signature of a Zucchini Command Function.
+using CommandFunction = zucchini::status::Code (*)(MainParams);
+
+// Command Function: Patch generation.
+zucchini::status::Code MainGen(MainParams params);
+
+// Command Function: Patch application.
+zucchini::status::Code MainApply(MainParams params);
+
+// Command Function: Read and dump references from an executable.
+zucchini::status::Code MainRead(MainParams params);
+
+// Command Function: Scan an archive file and detect executables.
+zucchini::status::Code MainDetect(MainParams params);
+
+// Command Function: Scan two archive files and match detected executables.
+zucchini::status::Code MainMatch(MainParams params);
+
+// Command Function: Compute CRC-32 of a file.
+zucchini::status::Code MainCrc32(MainParams params);
+
+#endif // COMPONENTS_ZUCCHINI_ZUCCHINI_COMMANDS_H_
diff --git a/zucchini_exe_version.rc.version b/zucchini_exe_version.rc.version
new file mode 100644
index 0000000..9d46a4b
--- /dev/null
+++ b/zucchini_exe_version.rc.version
@@ -0,0 +1,46 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <verrsrc.h>
+
+/////////////////////////////////////////////////////////////////////////////
+//
+// Version
+//
+
+VS_VERSION_INFO VERSIONINFO
+ FILEVERSION @MAJOR@,@MINOR@,@BUILD@,@PATCH@
+ PRODUCTVERSION @MAJOR@,@MINOR@,@BUILD@,@PATCH@
+ FILEFLAGSMASK 0x17L
+#ifdef _DEBUG
+ FILEFLAGS 0x1L
+#else
+ FILEFLAGS 0x0L
+#endif
+ FILEOS 0x4L
+ FILETYPE 0x1L
+ FILESUBTYPE 0x0L
+BEGIN
+ BLOCK "StringFileInfo"
+ BEGIN
+ BLOCK "040904b0"
+ BEGIN
+ VALUE "CompanyName", "@COMPANY_FULLNAME@"
+ VALUE "FileDescription", "Zucchini"
+ VALUE "FileVersion", "@MAJOR@.@MINOR@.@BUILD@.@PATCH@"
+ VALUE "InternalName", "zucchini"
+ VALUE "LegalCopyright", "@COPYRIGHT@"
+ VALUE "ProductName", "Zucchini"
+ VALUE "ProductVersion", "@MAJOR@.@MINOR@.@BUILD@.@PATCH@"
+ VALUE "CompanyShortName", "@COMPANY_SHORTNAME@"
+ VALUE "ProductShortName", "Zucchini"
+ VALUE "LastChange", "@LASTCHANGE@"
+ VALUE "Official Build", "@OFFICIAL_BUILD@"
+ END
+ END
+ BLOCK "VarFileInfo"
+ BEGIN
+ VALUE "Translation", 0x409, 1200
+ END
+END
diff --git a/zucchini_gen.cc b/zucchini_gen.cc
new file mode 100644
index 0000000..4be0b8b
--- /dev/null
+++ b/zucchini_gen.cc
@@ -0,0 +1,430 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/zucchini_gen.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <map>
+#include <memory>
+#include <utility>
+
+#include "base/logging.h"
+#include "base/numerics/safe_conversions.h"
+#include "components/zucchini/disassembler.h"
+#include "components/zucchini/element_detection.h"
+#include "components/zucchini/encoded_view.h"
+#include "components/zucchini/ensemble_matcher.h"
+#include "components/zucchini/equivalence_map.h"
+#include "components/zucchini/heuristic_ensemble_matcher.h"
+#include "components/zucchini/image_index.h"
+#include "components/zucchini/label_manager.h"
+#include "components/zucchini/patch_writer.h"
+#include "components/zucchini/suffix_array.h"
+#include "components/zucchini/targets_affinity.h"
+
+namespace zucchini {
+
+namespace {
+
+// Parameters for patch generation.
+constexpr double kMinEquivalenceSimilarity = 12.0;
+constexpr double kMinLabelAffinity = 64.0;
+constexpr size_t kNumIterations = 2;
+
+} // namespace
+
+std::vector<offset_t> FindExtraTargets(const TargetPool& projected_old_targets,
+ const TargetPool& new_targets) {
+ std::vector<offset_t> extra_targets;
+ std::set_difference(
+ new_targets.begin(), new_targets.end(), projected_old_targets.begin(),
+ projected_old_targets.end(), std::back_inserter(extra_targets));
+ return extra_targets;
+}
+
+EquivalenceMap CreateEquivalenceMap(const ImageIndex& old_image_index,
+ const ImageIndex& new_image_index) {
+ // Label matching (between "old" and "new") can guide EquivalenceMap
+ // construction; but EquivalenceMap induces Label matching. This apparent
+ // "chick and egg" problem is solved by multiple iterations alternating 2
+ // steps:
+ // - Association of targets based on previous EquivalenceMap. Note that the
+ // EquivalenceMap is empty on first iteration, so this is a no-op.
+ // - Construction of refined EquivalenceMap based on new targets associations.
+ size_t pool_count = old_image_index.PoolCount();
+ // |target_affinities| is outside the loop to reduce allocation.
+ std::vector<TargetsAffinity> target_affinities(pool_count);
+
+ EquivalenceMap equivalence_map;
+ for (size_t i = 0; i < kNumIterations; ++i) {
+ EncodedView old_view(old_image_index);
+ EncodedView new_view(new_image_index);
+
+ // Associate targets from "old" to "new" image based on |equivalence_map|
+ // for each reference pool.
+ for (const auto& old_pool_tag_and_targets :
+ old_image_index.target_pools()) {
+ PoolTag pool_tag = old_pool_tag_and_targets.first;
+ target_affinities[pool_tag.value()].InferFromSimilarities(
+ equivalence_map, old_pool_tag_and_targets.second.targets(),
+ new_image_index.pool(pool_tag).targets());
+
+ // Creates labels for strongly associated targets.
+ std::vector<uint32_t> old_labels;
+ std::vector<uint32_t> new_labels;
+ size_t label_bound = target_affinities[pool_tag.value()].AssignLabels(
+ kMinLabelAffinity, &old_labels, &new_labels);
+ old_view.SetLabels(pool_tag, std::move(old_labels), label_bound);
+ new_view.SetLabels(pool_tag, std::move(new_labels), label_bound);
+ }
+ // Build equivalence map, where references in "old" and "new" that share
+ // common semantics (i.e., their respective targets were associated earlier
+ // on) are considered equivalent.
+ equivalence_map.Build(
+ MakeSuffixArray<InducedSuffixSort>(old_view, old_view.Cardinality()),
+ old_view, new_view, target_affinities, kMinEquivalenceSimilarity);
+ }
+
+ return equivalence_map;
+}
+
+bool GenerateEquivalencesAndExtraData(ConstBufferView new_image,
+ const EquivalenceMap& equivalence_map,
+ PatchElementWriter* patch_writer) {
+ // Make 2 passes through |equivalence_map| to reduce write churn.
+ // Pass 1: Write all equivalences.
+ EquivalenceSink equivalences_sink;
+ for (const EquivalenceCandidate& candidate : equivalence_map)
+ equivalences_sink.PutNext(candidate.eq);
+ patch_writer->SetEquivalenceSink(std::move(equivalences_sink));
+
+ // Pass 2: Write data in gaps in |new_image| before / between after
+ // |equivalence_map| as "extra data".
+ ExtraDataSink extra_data_sink;
+ offset_t dst_offset = 0;
+ for (const EquivalenceCandidate& candidate : equivalence_map) {
+ extra_data_sink.PutNext(
+ new_image[{dst_offset, candidate.eq.dst_offset - dst_offset}]);
+ dst_offset = candidate.eq.dst_end();
+ DCHECK_LE(dst_offset, new_image.size());
+ }
+ extra_data_sink.PutNext(
+ new_image[{dst_offset, new_image.size() - dst_offset}]);
+ patch_writer->SetExtraDataSink(std::move(extra_data_sink));
+ return true;
+}
+
+bool GenerateRawDelta(ConstBufferView old_image,
+ ConstBufferView new_image,
+ const EquivalenceMap& equivalence_map,
+ const ImageIndex& new_image_index,
+ PatchElementWriter* patch_writer) {
+ RawDeltaSink raw_delta_sink;
+
+ // Visit |equivalence_map| blocks in |new_image| order. Find and emit all
+ // bytewise differences.
+ offset_t base_copy_offset = 0;
+ for (const EquivalenceCandidate& candidate : equivalence_map) {
+ Equivalence equivalence = candidate.eq;
+ // For each bytewise delta from |old_image| to |new_image|, compute "copy
+ // offset" and pass it along with delta to the sink.
+ for (offset_t i = 0; i < equivalence.length; ++i) {
+ if (new_image_index.IsReference(equivalence.dst_offset + i))
+ continue; // Skip references since they're handled elsewhere.
+
+ int8_t diff = new_image[equivalence.dst_offset + i] -
+ old_image[equivalence.src_offset + i];
+ if (diff)
+ raw_delta_sink.PutNext({base_copy_offset + i, diff});
+ }
+ base_copy_offset += equivalence.length;
+ }
+ patch_writer->SetRawDeltaSink(std::move(raw_delta_sink));
+ return true;
+}
+
+bool GenerateReferencesDelta(const ReferenceSet& src_refs,
+ const ReferenceSet& dst_refs,
+ const TargetPool& projected_target_pool,
+ const OffsetMapper& offset_mapper,
+ const EquivalenceMap& equivalence_map,
+ ReferenceDeltaSink* reference_delta_sink) {
+ size_t ref_width = src_refs.width();
+ auto dst_ref = dst_refs.begin();
+
+ // For each equivalence, for each covered |dst_ref| and the matching
+ // |src_ref|, emit the delta between the respective target labels. Note: By
+ // construction, each reference location (with |ref_width|) lies either
+ // completely inside an equivalence or completely outside. We perform
+ // "straddle checks" throughout to verify this assertion.
+ for (const auto& candidate : equivalence_map) {
+ const Equivalence equiv = candidate.eq;
+ // Increment |dst_ref| until it catches up to |equiv|.
+ while (dst_ref != dst_refs.end() && dst_ref->location < equiv.dst_offset)
+ ++dst_ref;
+ if (dst_ref == dst_refs.end())
+ break;
+ if (dst_ref->location >= equiv.dst_end())
+ continue;
+ // Straddle check.
+ DCHECK_LE(dst_ref->location + ref_width, equiv.dst_end());
+
+ offset_t src_loc =
+ equiv.src_offset + (dst_ref->location - equiv.dst_offset);
+ auto src_ref = std::lower_bound(
+ src_refs.begin(), src_refs.end(), src_loc,
+ [](const IndirectReference& a, offset_t b) { return a.location < b; });
+ for (; dst_ref != dst_refs.end() &&
+ dst_ref->location + ref_width <= equiv.dst_end();
+ ++dst_ref, ++src_ref) {
+ // Local offset of |src_ref| should match that of |dst_ref|.
+ DCHECK_EQ(src_ref->location - equiv.src_offset,
+ dst_ref->location - equiv.dst_offset);
+ offset_t old_offset =
+ src_refs.target_pool().OffsetForKey(src_ref->target_key);
+ offset_t new_estimated_offset = offset_mapper.ForwardProject(old_offset);
+ offset_t new_estimated_key =
+ projected_target_pool.KeyForNearestOffset(new_estimated_offset);
+ offset_t new_offset =
+ dst_refs.target_pool().OffsetForKey(dst_ref->target_key);
+ offset_t new_key = projected_target_pool.KeyForOffset(new_offset);
+
+ reference_delta_sink->PutNext(
+ static_cast<int32_t>(new_key - new_estimated_key));
+ }
+ if (dst_ref == dst_refs.end())
+ break; // Done.
+ // Straddle check.
+ DCHECK_GE(dst_ref->location, equiv.dst_end());
+ }
+ return true;
+}
+
+bool GenerateExtraTargets(const std::vector<offset_t>& extra_targets,
+ PoolTag pool_tag,
+ PatchElementWriter* patch_writer) {
+ TargetSink target_sink;
+ for (offset_t target : extra_targets)
+ target_sink.PutNext(target);
+ patch_writer->SetTargetSink(pool_tag, std::move(target_sink));
+ return true;
+}
+
+bool GenerateRawElement(const std::vector<offset_t>& old_sa,
+ ConstBufferView old_image,
+ ConstBufferView new_image,
+ PatchElementWriter* patch_writer) {
+ ImageIndex old_image_index(old_image);
+ ImageIndex new_image_index(new_image);
+
+ EquivalenceMap equivalences;
+ equivalences.Build(old_sa, EncodedView(old_image_index),
+ EncodedView(new_image_index), {},
+ kMinEquivalenceSimilarity);
+
+ patch_writer->SetReferenceDeltaSink({});
+ return GenerateEquivalencesAndExtraData(new_image, equivalences,
+ patch_writer) &&
+ GenerateRawDelta(old_image, new_image, equivalences, new_image_index,
+ patch_writer);
+}
+
+bool GenerateExecutableElement(ExecutableType exe_type,
+ ConstBufferView old_image,
+ ConstBufferView new_image,
+ PatchElementWriter* patch_writer) {
+ // Initialize Disassemblers.
+ std::unique_ptr<Disassembler> old_disasm =
+ MakeDisassemblerOfType(old_image, exe_type);
+ std::unique_ptr<Disassembler> new_disasm =
+ MakeDisassemblerOfType(new_image, exe_type);
+ if (!old_disasm || !new_disasm) {
+ LOG(ERROR) << "Failed to create Disassembler.";
+ return false;
+ }
+ DCHECK_EQ(old_disasm->GetExeType(), new_disasm->GetExeType());
+
+ // Initialize ImageIndexes.
+ ImageIndex old_image_index(old_image);
+ ImageIndex new_image_index(new_image);
+ if (!old_image_index.Initialize(old_disasm.get()) ||
+ !new_image_index.Initialize(new_disasm.get())) {
+ LOG(ERROR) << "Failed to create ImageIndex: Overlapping references found?";
+ return false;
+ }
+ DCHECK_EQ(old_image_index.PoolCount(), new_image_index.PoolCount());
+
+ EquivalenceMap equivalences =
+ CreateEquivalenceMap(old_image_index, new_image_index);
+ OffsetMapper offset_mapper(equivalences);
+
+ ReferenceDeltaSink reference_delta_sink;
+ for (const auto& old_targets : old_image_index.target_pools()) {
+ PoolTag pool_tag = old_targets.first;
+ TargetPool projected_old_targets = old_targets.second;
+ projected_old_targets.FilterAndProject(offset_mapper);
+ std::vector<offset_t> extra_target =
+ FindExtraTargets(projected_old_targets, new_image_index.pool(pool_tag));
+ projected_old_targets.InsertTargets(extra_target);
+
+ if (!GenerateExtraTargets(extra_target, pool_tag, patch_writer))
+ return false;
+ for (TypeTag type_tag : old_targets.second.types()) {
+ if (!GenerateReferencesDelta(old_image_index.refs(type_tag),
+ new_image_index.refs(type_tag),
+ projected_old_targets, offset_mapper,
+ equivalences, &reference_delta_sink)) {
+ return false;
+ }
+ }
+ }
+ patch_writer->SetReferenceDeltaSink(std::move(reference_delta_sink));
+
+ return GenerateEquivalencesAndExtraData(new_image, equivalences,
+ patch_writer) &&
+ GenerateRawDelta(old_image, new_image, equivalences, new_image_index,
+ patch_writer);
+}
+
+/******** Exported Functions ********/
+
+status::Code GenerateEnsemble(ConstBufferView old_image,
+ ConstBufferView new_image,
+ EnsemblePatchWriter* patch_writer) {
+ std::unique_ptr<EnsembleMatcher> matcher =
+ std::make_unique<HeuristicEnsembleMatcher>(nullptr);
+ if (!matcher->RunMatch(old_image, new_image)) {
+ LOG(INFO) << "RunMatch() failed, generating raw patch.";
+ return GenerateRaw(old_image, new_image, patch_writer);
+ }
+
+ const std::vector<ElementMatch>& matches = matcher->matches();
+ LOG(INFO) << "Matching: Found " << matches.size()
+ << " nontrivial matches and " << matcher->num_identical()
+ << " identical matches.";
+ size_t num_elements = matches.size();
+ if (num_elements == 0) {
+ LOG(INFO) << "No nontrival matches, generating raw patch.";
+ return GenerateRaw(old_image, new_image, patch_writer);
+ }
+
+ PatchType patch_type = PatchType::kRawPatch;
+ if (num_elements == 1 && matches[0].old_element.size == old_image.size() &&
+ matches[0].new_element.size == new_image.size()) {
+ // If |old_image| matches |new_image| entirely then we have single patch.
+ LOG(INFO) << "Old and new files are executables, "
+ << "generating single-file patch.";
+ patch_type = PatchType::kSinglePatch;
+ } else {
+ LOG(INFO) << "Generating ensemble patch.";
+ patch_type = PatchType::kEnsemblePatch;
+ }
+
+ // "Gaps" are |new_image| bytes not covered by new_elements in |matches|.
+ // These are treated as raw data, and patched against the entire |old_image|.
+
+ // |patch_element_map| (keyed by "new" offsets) stores PatchElementWriter
+ // results so elements and "gap" results can be computed separately (to reduce
+ // peak memory usage), and later, properly serialized to |patch_writer|
+ // ordered by "new" offset.
+ std::map<offset_t, PatchElementWriter> patch_element_map;
+
+ // Variables to track element patching successes.
+ std::vector<BufferRegion> covered_new_regions;
+ size_t covered_new_bytes = 0;
+
+ // Process elements first, since non-fatal failures may turn some into gaps.
+ for (const ElementMatch& match : matches) {
+ BufferRegion new_region = match.new_element.region();
+ LOG(INFO) << "--- Match [" << new_region.lo() << "," << new_region.hi()
+ << ")";
+
+ auto it_and_success = patch_element_map.emplace(
+ base::checked_cast<offset_t>(new_region.lo()), match);
+ DCHECK(it_and_success.second);
+ PatchElementWriter& patch_element = it_and_success.first->second;
+
+ ConstBufferView old_sub_image = old_image[match.old_element.region()];
+ ConstBufferView new_sub_image = new_image[new_region];
+ if (GenerateExecutableElement(match.exe_type(), old_sub_image,
+ new_sub_image, &patch_element)) {
+ covered_new_regions.push_back(new_region);
+ covered_new_bytes += new_region.size;
+ } else {
+ LOG(INFO) << "Fall back to raw patching.";
+ patch_element_map.erase(it_and_success.first);
+ }
+ }
+
+ if (covered_new_bytes == 0)
+ patch_type = PatchType::kRawPatch;
+
+ if (covered_new_bytes < new_image.size()) {
+ // Process all "gaps", which are patched against the entire "old" image. To
+ // compute equivalence maps, "gaps" share a common suffix array
+ // |old_sa_raw|, whose lifetime is kept separated from elements' suffix
+ // arrays to reduce peak memory.
+ Element entire_old_element(old_image.local_region(), kExeTypeNoOp);
+ ImageIndex old_image_index(old_image);
+ EncodedView old_view_raw(old_image_index);
+ std::vector<offset_t> old_sa_raw =
+ MakeSuffixArray<InducedSuffixSort>(old_view_raw, size_t(256));
+
+ offset_t gap_lo = 0;
+ // Add sentinel that points to end of "new" file, to simplify gap iteration.
+ covered_new_regions.emplace_back(BufferRegion{new_image.size(), 0});
+
+ for (const BufferRegion& covered : covered_new_regions) {
+ offset_t gap_hi = base::checked_cast<offset_t>(covered.lo());
+ DCHECK_GE(gap_hi, gap_lo);
+ offset_t gap_size = gap_hi - gap_lo;
+ if (gap_size > 0) {
+ LOG(INFO) << "--- Gap [" << gap_lo << "," << gap_hi << ")";
+
+ ElementMatch gap_match{{entire_old_element, kExeTypeNoOp},
+ {{gap_lo, gap_size}, kExeTypeNoOp}};
+ auto it_and_success = patch_element_map.emplace(gap_lo, gap_match);
+ DCHECK(it_and_success.second);
+ PatchElementWriter& patch_element = it_and_success.first->second;
+
+ ConstBufferView new_sub_image = new_image[{gap_lo, gap_size}];
+ if (!GenerateRawElement(old_sa_raw, old_image, new_sub_image,
+ &patch_element)) {
+ return status::kStatusFatal;
+ }
+ }
+ gap_lo = base::checked_cast<offset_t>(covered.hi());
+ }
+ }
+
+ patch_writer->SetPatchType(patch_type);
+ // Write all PatchElementWriter sorted by "new" offset.
+ for (auto& new_lo_and_patch_element : patch_element_map)
+ patch_writer->AddElement(std::move(new_lo_and_patch_element.second));
+
+ return status::kStatusSuccess;
+}
+
+status::Code GenerateRaw(ConstBufferView old_image,
+ ConstBufferView new_image,
+ EnsemblePatchWriter* patch_writer) {
+ patch_writer->SetPatchType(PatchType::kRawPatch);
+
+ ImageIndex old_image_index(old_image);
+ EncodedView old_view(old_image_index);
+ std::vector<offset_t> old_sa =
+ MakeSuffixArray<InducedSuffixSort>(old_view, old_view.Cardinality());
+
+ PatchElementWriter patch_element(
+ {Element(old_image.local_region()), Element(new_image.local_region())});
+ if (!GenerateRawElement(old_sa, old_image, new_image, &patch_element))
+ return status::kStatusFatal;
+ patch_writer->AddElement(std::move(patch_element));
+ return status::kStatusSuccess;
+}
+
+} // namespace zucchini
diff --git a/zucchini_gen.h b/zucchini_gen.h
new file mode 100644
index 0000000..a0f3630
--- /dev/null
+++ b/zucchini_gen.h
@@ -0,0 +1,84 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_ZUCCHINI_GEN_H_
+#define COMPONENTS_ZUCCHINI_ZUCCHINI_GEN_H_
+
+#include <vector>
+
+#include "base/optional.h"
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/image_utils.h"
+#include "components/zucchini/zucchini.h"
+
+namespace zucchini {
+
+class EquivalenceMap;
+class OffsetMapper;
+class ImageIndex;
+class PatchElementWriter;
+class ReferenceDeltaSink;
+class ReferenceSet;
+class TargetPool;
+
+// Extract all targets in |new_targets| with no associated target in
+// |projected_old_targets| and returns these targets in a new vector.
+std::vector<offset_t> FindExtraTargets(const TargetPool& projected_old_targets,
+ const TargetPool& new_targets);
+
+// Creates an EquivalenceMap from "old" image to "new" image and returns the
+// result. The params |*_image_index|:
+// - Provide "old" and "new" raw image data and references.
+// - Mediate Label matching, which links references between "old" and "new", and
+// guides EquivalenceMap construction.
+EquivalenceMap CreateEquivalenceMap(const ImageIndex& old_image_index,
+ const ImageIndex& new_image_index);
+
+// Writes equivalences from |equivalence_map|, and extra data from |new_image|
+// found in gaps between equivalences to |patch_writer|.
+bool GenerateEquivalencesAndExtraData(ConstBufferView new_image,
+ const EquivalenceMap& equivalence_map,
+ PatchElementWriter* patch_writer);
+
+// Writes raw delta between |old_image| and |new_image| matched by
+// |equivalence_map| to |patch_writer|, using |new_image_index| to ignore
+// reference bytes.
+bool GenerateRawDelta(ConstBufferView old_image,
+ ConstBufferView new_image,
+ const EquivalenceMap& equivalence_map,
+ const ImageIndex& new_image_index,
+ PatchElementWriter* patch_writer);
+
+// Writes reference delta between references from |old_refs| and from
+// |new_refs| to |patch_writer|. |projected_target_pool| contains projected
+// targets from old to new image for references pool associated with |new_refs|.
+bool GenerateReferencesDelta(const ReferenceSet& src_refs,
+ const ReferenceSet& dst_refs,
+ const TargetPool& projected_target_pool,
+ const OffsetMapper& offset_mapper,
+ const EquivalenceMap& equivalence_map,
+ ReferenceDeltaSink* reference_delta_sink);
+
+// Writes |extra_targets| associated with |pool_tag| to |patch_writer|.
+bool GenerateExtraTargets(const std::vector<offset_t>& extra_targets,
+ PoolTag pool_tag,
+ PatchElementWriter* patch_writer);
+
+// Generates raw patch element data between |old_image| and |new_image|, and
+// writes them to |patch_writer|. |old_sa| is the suffix array for |old_image|.
+bool GenerateRawElement(const std::vector<offset_t>& old_sa,
+ ConstBufferView old_image,
+ ConstBufferView new_image,
+ PatchElementWriter* patch_writer);
+
+// Generates patch element of type |exe_type| from |old_image| to |new_image|,
+// and writes it to |patch_writer|.
+bool GenerateExecutableElement(ExecutableType exe_type,
+ ConstBufferView old_image,
+ ConstBufferView new_image,
+ PatchElementWriter* patch_writer);
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_ZUCCHINI_GEN_H_
diff --git a/zucchini_gen_unittest.cc b/zucchini_gen_unittest.cc
new file mode 100644
index 0000000..29e84d6
--- /dev/null
+++ b/zucchini_gen_unittest.cc
@@ -0,0 +1,176 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/zucchini_gen.h"
+
+#include <stdint.h>
+
+#include <utility>
+#include <vector>
+
+#include "components/zucchini/equivalence_map.h"
+#include "components/zucchini/image_index.h"
+#include "components/zucchini/image_utils.h"
+#include "components/zucchini/label_manager.h"
+#include "components/zucchini/test_disassembler.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+namespace {
+
+using OffsetVector = std::vector<offset_t>;
+
+// In normal usage, 0.0 is an unrealistic similarity value for an
+// EquivalenceCandiate. Since similarity doesn't affect results for various unit
+// tests in this file, we use this dummy value for simplicity.
+constexpr double kDummySim = 0.0;
+
+// Helper function wrapping GenerateReferencesDelta().
+std::vector<int32_t> GenerateReferencesDeltaTest(
+ std::vector<Reference>&& old_references,
+ std::vector<Reference>&& new_references,
+ std::vector<offset_t>&& exp_old_targets,
+ std::vector<offset_t>&& exp_projected_old_targets,
+ EquivalenceMap&& equivalence_map) {
+ ReferenceDeltaSink reference_delta_sink;
+
+ TargetPool old_targets;
+ old_targets.InsertTargets(old_references);
+ ReferenceSet old_refs({1, TypeTag(0), PoolTag(0)}, old_targets);
+ old_refs.InitReferences(old_references);
+ EXPECT_EQ(exp_old_targets, old_targets.targets());
+
+ TargetPool new_targets;
+ new_targets.InsertTargets(new_references);
+ ReferenceSet new_refs({1, TypeTag(0), PoolTag(0)}, new_targets);
+ new_refs.InitReferences(new_references);
+
+ OffsetMapper offset_mapper(equivalence_map);
+ TargetPool projected_old_targets = old_targets;
+ projected_old_targets.FilterAndProject(offset_mapper);
+
+ std::vector<offset_t> extra_target =
+ FindExtraTargets(projected_old_targets, new_targets);
+ projected_old_targets.InsertTargets(extra_target);
+ EXPECT_EQ(exp_projected_old_targets, projected_old_targets.targets());
+
+ GenerateReferencesDelta(old_refs, new_refs, projected_old_targets,
+ offset_mapper, equivalence_map,
+ &reference_delta_sink);
+
+ // Serialize |reference_delta_sink| to patch format, and read it back as
+ // std::vector<int32_t>.
+ std::vector<uint8_t> buffer(reference_delta_sink.SerializedSize());
+ BufferSink sink(buffer.data(), buffer.size());
+ reference_delta_sink.SerializeInto(&sink);
+
+ BufferSource source(buffer.data(), buffer.size());
+ ReferenceDeltaSource reference_delta_source;
+ EXPECT_TRUE(reference_delta_source.Initialize(&source));
+ std::vector<int32_t> delta_vec;
+ for (auto delta = reference_delta_source.GetNext(); delta.has_value();
+ delta = reference_delta_source.GetNext()) {
+ delta_vec.push_back(*delta);
+ }
+ EXPECT_TRUE(reference_delta_source.Done());
+ return delta_vec;
+}
+
+} // namespace
+
+TEST(ZucchiniGenTest, FindExtraTargets) {
+ EXPECT_EQ(OffsetVector(), FindExtraTargets({}, {}));
+ EXPECT_EQ(OffsetVector(), FindExtraTargets(TargetPool({3}), {}));
+ EXPECT_EQ(OffsetVector(), FindExtraTargets(TargetPool({3}), TargetPool({3})));
+ EXPECT_EQ(OffsetVector({4}),
+ FindExtraTargets(TargetPool({3}), TargetPool({4})));
+ EXPECT_EQ(OffsetVector({4}),
+ FindExtraTargets(TargetPool({3}), TargetPool({3, 4})));
+ EXPECT_EQ(OffsetVector({4}),
+ FindExtraTargets(TargetPool({2, 3}), TargetPool({3, 4})));
+ EXPECT_EQ(OffsetVector({3, 5}),
+ FindExtraTargets(TargetPool({2, 4}), TargetPool({3, 5})));
+}
+
+TEST(ZucchiniGenTest, GenerateReferencesDelta) {
+ // No equivalences.
+ EXPECT_EQ(std::vector<int32_t>(),
+ GenerateReferencesDeltaTest({}, {}, {}, {}, EquivalenceMap()));
+ EXPECT_EQ(std::vector<int32_t>(),
+ GenerateReferencesDeltaTest({{10, 0}}, {{20, 0}}, {0}, {0},
+ EquivalenceMap()));
+
+ // Simple cases with one equivalence.
+ EXPECT_EQ(
+ std::vector<int32_t>({0}), // {0 - 0}.
+ GenerateReferencesDeltaTest(
+ {{10, 3}}, {{20, 3}}, {3}, {3},
+ EquivalenceMap({{{3, 3, 1}, kDummySim}, {{10, 20, 4}, kDummySim}})));
+ EXPECT_EQ(
+ std::vector<int32_t>({-1}), // {0 - 1}.
+ GenerateReferencesDeltaTest(
+ {{10, 3}}, {{20, 3}}, {3}, {3, 4},
+ EquivalenceMap({{{3, 4, 1}, kDummySim}, {{10, 20, 4}, kDummySim}})));
+ EXPECT_EQ(
+ std::vector<int32_t>({1}), // {1 - 0}.
+ GenerateReferencesDeltaTest(
+ {{10, 3}}, {{20, 3}}, {3}, {2, 3},
+ EquivalenceMap({{{3, 2, 1}, kDummySim}, {{10, 20, 4}, kDummySim}})));
+ EXPECT_EQ(std::vector<int32_t>({1, -1}), // {1 - 0, 0 - 1}.
+ GenerateReferencesDeltaTest(
+ {{10, 3}, {11, 4}}, {{20, 3}, {21, 4}}, {3, 4}, {2, 3, 4, 5},
+ EquivalenceMap({{{3, 2, 1}, kDummySim},
+ {{4, 5, 1}, kDummySim},
+ {{10, 20, 4}, kDummySim}})));
+
+ EXPECT_EQ(
+ std::vector<int32_t>({0, 0}), // {1 - 1, 2 - 2}.
+ GenerateReferencesDeltaTest(
+ {{10, 3}, {11, 4}, {12, 5}, {13, 6}},
+ {{20, 3}, {21, 4}, {22, 5}, {23, 6}}, {3, 4, 5, 6}, {3, 4, 5, 6},
+ EquivalenceMap({{{3, 3, 4}, kDummySim}, {{11, 21, 2}, kDummySim}})));
+
+ // Multiple equivalences.
+ EXPECT_EQ(std::vector<int32_t>({-1, 1}), // {0 - 1, 1 - 0}.
+ GenerateReferencesDeltaTest(
+ {{10, 0}, {12, 1}}, {{10, 0}, {12, 1}}, {0, 1}, {0, 1},
+ EquivalenceMap({{{0, 0, 2}, kDummySim},
+ {{12, 10, 2}, kDummySim},
+ {{10, 12, 2}, kDummySim}})));
+ EXPECT_EQ(
+ std::vector<int32_t>({0, 0}), // {0 - 0, 1 - 1}.
+ GenerateReferencesDeltaTest(
+ {{0, 0}, {2, 2}}, {{0, 0}, {2, 2}}, {0, 2}, {0, 2},
+ EquivalenceMap({{{2, 0, 2}, kDummySim}, {{0, 2, 2}, kDummySim}})));
+
+ EXPECT_EQ(std::vector<int32_t>({-2, 2}), // {0 - 2, 2 - 0}.
+ GenerateReferencesDeltaTest(
+ {{10, 0}, {12, 1}, {14, 2}}, {{10, 0}, {12, 1}, {14, 2}},
+ {0, 1, 2}, {0, 1, 2},
+ EquivalenceMap({{{0, 0, 3}, kDummySim},
+ {{14, 10, 2}, kDummySim},
+ {{10, 14, 2}, kDummySim}})));
+
+ EXPECT_EQ(std::vector<int32_t>({-2, 2}), // {0 - 2, 2 - 0}.
+ GenerateReferencesDeltaTest(
+ {{11, 0}, {14, 1}, {17, 2}}, {{11, 0}, {14, 1}, {17, 2}},
+ {0, 1, 2}, {0, 1, 2},
+ EquivalenceMap({{{0, 0, 3}, kDummySim},
+ {{16, 10, 3}, kDummySim},
+ {{10, 16, 3}, kDummySim}})));
+
+ EXPECT_EQ(
+ std::vector<int32_t>({-2, 2}), // {0 - 2, 2 - 0}.
+ GenerateReferencesDeltaTest({{10, 0}, {14, 2}, {16, 1}},
+ {{10, 0}, {14, 2}}, {0, 1, 2}, {0, 1, 2},
+ EquivalenceMap({{{0, 0, 3}, kDummySim},
+ {{14, 10, 2}, kDummySim},
+ {{12, 12, 2}, kDummySim},
+ {{10, 14, 2}, kDummySim}})));
+}
+
+// TODO(huangs): Add more tests.
+
+} // namespace zucchini
diff --git a/zucchini_integration.cc b/zucchini_integration.cc
new file mode 100644
index 0000000..3ca4601
--- /dev/null
+++ b/zucchini_integration.cc
@@ -0,0 +1,122 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/zucchini_integration.h"
+
+#include <utility>
+
+#include "base/logging.h"
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/mapped_file.h"
+#include "components/zucchini/patch_reader.h"
+
+namespace zucchini {
+
+namespace {
+
+struct FileNames {
+ FileNames() : is_dummy(true) {
+ // Use fake names. If |is_dummy| is true these files are only used for error
+ // output.
+ old_name = old_name.AppendASCII("old_name");
+ patch_name = patch_name.AppendASCII("patch_name");
+ new_name = new_name.AppendASCII("new_name");
+ }
+
+ FileNames(const base::FilePath& old_name,
+ const base::FilePath& patch_name,
+ const base::FilePath& new_name)
+ : old_name(old_name),
+ patch_name(patch_name),
+ new_name(new_name),
+ is_dummy(false) {}
+
+ base::FilePath old_name;
+ base::FilePath patch_name;
+ base::FilePath new_name;
+ const bool is_dummy;
+};
+
+status::Code ApplyCommon(base::File&& old_file_handle,
+ base::File&& patch_file_handle,
+ base::File&& new_file_handle,
+ const FileNames& names) {
+ MappedFileReader patch_file(std::move(patch_file_handle));
+ if (patch_file.HasError()) {
+ LOG(ERROR) << "Error with file " << names.patch_name.value() << ": "
+ << patch_file.error();
+ return status::kStatusFileReadError;
+ }
+
+ auto patch_reader =
+ zucchini::EnsemblePatchReader::Create(patch_file.region());
+ if (!patch_reader.has_value()) {
+ LOG(ERROR) << "Error reading patch header.";
+ return status::kStatusPatchReadError;
+ }
+
+ MappedFileReader old_file(std::move(old_file_handle));
+ if (old_file.HasError()) {
+ LOG(ERROR) << "Error with file " << names.old_name.value() << ": "
+ << old_file.error();
+ return status::kStatusFileReadError;
+ }
+ if (!patch_reader->CheckOldFile(old_file.region())) {
+ LOG(ERROR) << "Invalid old_file.";
+ return status::kStatusInvalidOldImage;
+ }
+
+ zucchini::PatchHeader header = patch_reader->header();
+ // By default, delete output on destruction, to avoid having lingering files
+ // in case of a failure. On Windows deletion can be done by the OS.
+ base::FilePath file_path;
+ if (!names.is_dummy)
+ file_path = base::FilePath(names.new_name);
+
+ MappedFileWriter new_file(file_path, std::move(new_file_handle),
+ header.new_size);
+ if (new_file.HasError()) {
+ LOG(ERROR) << "Error with file " << names.new_name.value() << ": "
+ << new_file.error();
+ return status::kStatusFileWriteError;
+ }
+
+ zucchini::status::Code result =
+ zucchini::Apply(old_file.region(), *patch_reader, new_file.region());
+ if (result != status::kStatusSuccess) {
+ LOG(ERROR) << "Fatal error encountered while applying patch.";
+ return result;
+ }
+
+ // Successfully patch |new_file|. Explicitly request file to be kept.
+ if (!new_file.Keep())
+ return status::kStatusFileWriteError;
+ return status::kStatusSuccess;
+}
+
+} // namespace
+
+status::Code Apply(base::File old_file_handle,
+ base::File patch_file_handle,
+ base::File new_file_handle) {
+ const FileNames file_names = FileNames();
+ return ApplyCommon(std::move(old_file_handle), std::move(patch_file_handle),
+ std::move(new_file_handle), file_names);
+}
+
+status::Code Apply(const base::FilePath& old_path,
+ const base::FilePath& patch_path,
+ const base::FilePath& new_path) {
+ using base::File;
+ File old_file(old_path, File::FLAG_OPEN | File::FLAG_READ);
+ File patch_file(patch_path, File::FLAG_OPEN | File::FLAG_READ);
+ File new_file(new_path, File::FLAG_CREATE_ALWAYS | File::FLAG_READ |
+ File::FLAG_WRITE | File::FLAG_SHARE_DELETE |
+ File::FLAG_CAN_DELETE_ON_CLOSE);
+ const FileNames file_names(old_path, patch_path, new_path);
+ return ApplyCommon(std::move(old_file), std::move(patch_file),
+ std::move(new_file), file_names);
+}
+
+} // namespace zucchini
diff --git a/zucchini_integration.h b/zucchini_integration.h
new file mode 100644
index 0000000..7c3fc40
--- /dev/null
+++ b/zucchini_integration.h
@@ -0,0 +1,34 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_ZUCCHINI_INTEGRATION_H_
+#define COMPONENTS_ZUCCHINI_ZUCCHINI_INTEGRATION_H_
+
+#include "base/files/file.h"
+#include "base/files/file_path.h"
+#include "components/zucchini/zucchini.h"
+
+namespace zucchini {
+
+// Applies the patch in |patch_file| to the bytes in |old_file| and writes the
+// result to |new_file|. Since this uses memory mapped files, crashes are
+// expected in case of I/O errors. On Windows |new_file| is kept iff returned
+// code is kStatusSuccess, and is deleted otherwise. For UNIX systems the
+// caller needs to do cleanup since it has ownership of the base::File params
+// and Zucchini has no knowledge of which base::FilePath to delete.
+status::Code Apply(base::File&& old_file,
+ base::File&& patch_file,
+ base::File&& new_file);
+
+// Applies the patch in |patch_path| to the bytes in |old_path| and writes the
+// result to |new_path|. Since this uses memory mapped files, crashes are
+// expected in case of I/O errors. |new_path| is kept iff returned code is
+// kStatusSuccess, and is deleted otherwise.
+status::Code Apply(const base::FilePath& old_path,
+ const base::FilePath& patch_path,
+ const base::FilePath& new_path);
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_ZUCCHINI_INTEGRATION_H_
diff --git a/zucchini_main.cc b/zucchini_main.cc
new file mode 100644
index 0000000..adff154
--- /dev/null
+++ b/zucchini_main.cc
@@ -0,0 +1,54 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <iostream>
+
+#include "base/command_line.h"
+#include "base/logging.h"
+#include "base/process/memory.h"
+#include "build/build_config.h"
+#include "components/zucchini/main_utils.h"
+
+#if defined(OS_WIN)
+#include "base/win/process_startup_helper.h"
+#endif // defined(OS_WIN)
+
+namespace {
+
+void InitLogging() {
+ logging::LoggingSettings settings;
+ settings.logging_dest = logging::LOG_TO_SYSTEM_DEBUG_LOG;
+ settings.log_file = nullptr;
+ settings.lock_log = logging::DONT_LOCK_LOG_FILE;
+ settings.delete_old = logging::APPEND_TO_OLD_LOG_FILE;
+ bool logging_res = logging::InitLogging(settings);
+ CHECK(logging_res);
+}
+
+void InitErrorHandling(const base::CommandLine& command_line) {
+ base::EnableTerminationOnHeapCorruption();
+ base::EnableTerminationOnOutOfMemory();
+#if defined(OS_WIN)
+ base::win::RegisterInvalidParamHandler();
+ base::win::SetupCRT(command_line);
+#endif // defined(OS_WIN)
+}
+
+} // namespace
+
+int main(int argc, const char* argv[]) {
+ // Initialize infrastructure from base.
+ base::CommandLine::Init(argc, argv);
+ const base::CommandLine& command_line =
+ *base::CommandLine::ForCurrentProcess();
+ InitLogging();
+ InitErrorHandling(command_line);
+ zucchini::status::Code status =
+ RunZucchiniCommand(command_line, std::cout, std::cerr);
+ if (!(status == zucchini::status::kStatusSuccess ||
+ status == zucchini::status::kStatusInvalidParam)) {
+ std::cerr << "Failed with code " << static_cast<int>(status) << std::endl;
+ }
+ return static_cast<int>(status);
+}
diff --git a/zucchini_tools.cc b/zucchini_tools.cc
new file mode 100644
index 0000000..784e355
--- /dev/null
+++ b/zucchini_tools.cc
@@ -0,0 +1,126 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/zucchini_tools.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <memory>
+#include <ostream>
+#include <string>
+
+#include "base/bind.h"
+#include "base/strings/stringprintf.h"
+#include "components/zucchini/disassembler.h"
+#include "components/zucchini/element_detection.h"
+#include "components/zucchini/ensemble_matcher.h"
+#include "components/zucchini/heuristic_ensemble_matcher.h"
+#include "components/zucchini/io_utils.h"
+
+namespace zucchini {
+
+status::Code ReadReferences(ConstBufferView image,
+ bool do_dump,
+ std::ostream& out) {
+ std::unique_ptr<Disassembler> disasm = MakeDisassemblerWithoutFallback(image);
+ if (!disasm) {
+ out << "Input file not recognized as executable." << std::endl;
+ return status::kStatusInvalidOldImage;
+ }
+
+ std::vector<offset_t> targets;
+ for (const auto& group : disasm->MakeReferenceGroups()) {
+ targets.clear();
+ auto refs = group.GetReader(disasm.get());
+ for (auto ref = refs->GetNext(); ref.has_value(); ref = refs->GetNext())
+ targets.push_back(ref->target);
+
+ size_t num_locations = targets.size();
+ std::sort(targets.begin(), targets.end());
+ targets.erase(std::unique(targets.begin(), targets.end()), targets.end());
+ size_t num_targets = targets.size();
+
+ out << "Type " << int(group.type_tag().value());
+ out << ": Pool=" << static_cast<uint32_t>(group.pool_tag().value());
+ out << ", width=" << group.width();
+ out << ", #locations=" << num_locations;
+ out << ", #targets=" << num_targets;
+ if (num_targets > 0) {
+ double ratio = static_cast<double>(num_locations) / num_targets;
+ out << " (ratio=" << base::StringPrintf("%.4f", ratio) << ")";
+ }
+ out << std::endl;
+
+ if (do_dump) {
+ auto refs = group.GetReader(disasm.get());
+
+ for (auto ref = refs->GetNext(); ref; ref = refs->GetNext()) {
+ out << " " << AsHex<8>(ref->location);
+ out << " " << AsHex<8>(ref->target) << std::endl;
+ }
+ }
+ }
+
+ return status::kStatusSuccess;
+}
+
+status::Code DetectAll(ConstBufferView image,
+ std::ostream& out,
+ std::vector<ConstBufferView>* sub_image_list) {
+ DCHECK_NE(sub_image_list, nullptr);
+ sub_image_list->clear();
+
+ const size_t size = image.size();
+ size_t last_out_pos = 0;
+ size_t total_bytes_found = 0;
+
+ auto print_range = [&out](size_t pos, size_t size, const std::string& msg) {
+ out << "-- " << AsHex<8, size_t>(pos) << " +" << AsHex<8, size_t>(size)
+ << ": " << msg << std::endl;
+ };
+
+ ElementFinder finder(image,
+ base::BindRepeating(DetectElementFromDisassembler));
+ for (auto element = finder.GetNext(); element.has_value();
+ element = finder.GetNext()) {
+ ConstBufferView sub_image = image[element->region()];
+ sub_image_list->push_back(sub_image);
+ size_t pos = sub_image.begin() - image.begin();
+ size_t prog_size = sub_image.size();
+ if (last_out_pos < pos)
+ print_range(last_out_pos, pos - last_out_pos, "?");
+ auto disasm = MakeDisassemblerOfType(sub_image, element->exe_type);
+ print_range(pos, prog_size, disasm->GetExeTypeString());
+ total_bytes_found += prog_size;
+ last_out_pos = pos + prog_size;
+ }
+ if (last_out_pos < size)
+ print_range(last_out_pos, size - last_out_pos, "?");
+ out << std::endl;
+
+ // Print summary, using decimal instead of hexadecimal.
+ out << "Detected " << total_bytes_found << "/" << size << " bytes => ";
+ double percent = total_bytes_found * 100.0 / size;
+ out << base::StringPrintf("%.2f", percent) << "%." << std::endl;
+
+ return status::kStatusSuccess;
+}
+
+status::Code MatchAll(ConstBufferView old_image,
+ ConstBufferView new_image,
+ std::ostream& out) {
+ HeuristicEnsembleMatcher matcher(&out);
+ if (!matcher.RunMatch(old_image, new_image)) {
+ out << "RunMatch() failed.";
+ return status::kStatusFatal;
+ }
+ out << "Found " << matcher.matches().size() << " nontrivial matches and "
+ << matcher.num_identical() << " identical matches." << std::endl;
+
+ return status::kStatusSuccess;
+}
+
+} // namespace zucchini
diff --git a/zucchini_tools.h b/zucchini_tools.h
new file mode 100644
index 0000000..6268745
--- /dev/null
+++ b/zucchini_tools.h
@@ -0,0 +1,38 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_ZUCCHINI_TOOLS_H_
+#define COMPONENTS_ZUCCHINI_ZUCCHINI_TOOLS_H_
+
+#include <iosfwd>
+#include <vector>
+
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/zucchini.h"
+
+namespace zucchini {
+
+// The functions below are called to print diagnosis information, so outputs are
+// printed using std::ostream instead of LOG().
+
+// Prints stats on references found in |image|. If |do_dump| is true, then
+// prints all references (locations and targets).
+status::Code ReadReferences(ConstBufferView image,
+ bool do_dump,
+ std::ostream& out);
+
+// Prints regions and types of all detected executables in |image|. Appends
+// detected subregions to |sub_image_list|.
+status::Code DetectAll(ConstBufferView image,
+ std::ostream& out,
+ std::vector<ConstBufferView>* sub_image_list);
+
+// Prints all matched regions from |old_image| to |new_image|.
+status::Code MatchAll(ConstBufferView old_image,
+ ConstBufferView new_image,
+ std::ostream& out);
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_ZUCCHINI_TOOLS_H_