aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTianjie <xunchang@google.com>2021-08-18 01:06:25 +0000
committerAutomerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>2021-08-18 01:06:25 +0000
commit45f3b91b74fbe1cfefb7576eaa3be167f91c9cd0 (patch)
tree53b4f7bd5a8f457ab0358cd7bc320c4778291445
parentdbee7e9d98c23beed6a03bb38fb007ba586c0e51 (diff)
parent08450315c3a4cc6ed94d0db406b53b98e1501322 (diff)
downloadzucchini-45f3b91b74fbe1cfefb7576eaa3be167f91c9cd0.tar.gz
Merge remote-tracking branch 'remotes/aosp/upstream-main' into clean am: fd73dddb6b am: 08450315c3
Original change: https://android-review.googlesource.com/c/platform/external/zucchini/+/1799387 Change-Id: I5f19b64147d90ca949afeb394d9cd6e456adb1ee
-rw-r--r--BUILD.gn242
-rw-r--r--DIR_METADATA3
-rw-r--r--README.md280
-rw-r--r--abs32_utils.cc211
-rw-r--r--abs32_utils.h142
-rw-r--r--abs32_utils_unittest.cc543
-rw-r--r--address_translator.cc258
-rw-r--r--address_translator.h199
-rw-r--r--address_translator_unittest.cc586
-rw-r--r--algorithm.h146
-rw-r--r--algorithm_unittest.cc347
-rw-r--r--arm_utils.cc597
-rw-r--r--arm_utils.h423
-rw-r--r--arm_utils_unittest.cc862
-rw-r--r--binary_data_histogram.cc91
-rw-r--r--binary_data_histogram.h90
-rw-r--r--binary_data_histogram_unittest.cc132
-rw-r--r--buffer_sink.cc11
-rw-r--r--buffer_sink.h68
-rw-r--r--buffer_sink_unittest.cc71
-rw-r--r--buffer_source.cc105
-rw-r--r--buffer_source.h141
-rw-r--r--buffer_source_unittest.cc347
-rw-r--r--buffer_view.h217
-rw-r--r--buffer_view_unittest.cc298
-rw-r--r--crc32.cc43
-rw-r--r--crc32.h17
-rw-r--r--crc32_unittest.cc47
-rw-r--r--disassembler.cc52
-rw-r--r--disassembler.h154
-rw-r--r--disassembler_dex.cc1670
-rw-r--r--disassembler_dex.h273
-rw-r--r--disassembler_dex_unittest.cc51
-rw-r--r--disassembler_elf.cc855
-rw-r--r--disassembler_elf.h383
-rw-r--r--disassembler_elf_unittest.cc179
-rw-r--r--disassembler_no_op.cc31
-rw-r--r--disassembler_no_op.h39
-rw-r--r--disassembler_win32.cc410
-rw-r--r--disassembler_win32.h131
-rw-r--r--disassembler_ztf.cc653
-rw-r--r--disassembler_ztf.h201
-rw-r--r--disassembler_ztf_unittest.cc402
-rw-r--r--element_detection.cc165
-rw-r--r--element_detection.h59
-rw-r--r--element_detection_unittest.cc102
-rw-r--r--encoded_view.cc78
-rw-r--r--encoded_view.h185
-rw-r--r--encoded_view_unittest.cc202
-rw-r--r--ensemble_matcher.cc37
-rw-r--r--ensemble_matcher.h60
-rw-r--r--equivalence_map.cc548
-rw-r--r--equivalence_map.h207
-rw-r--r--equivalence_map_unittest.cc635
-rw-r--r--fuzzers/BUILD.gn210
-rw-r--r--fuzzers/apply_fuzzer.cc59
-rwxr-xr-xfuzzers/create_seed_file_pair.py81
-rw-r--r--fuzzers/disassembler_dex_fuzzer.cc54
-rw-r--r--fuzzers/disassembler_elf_fuzzer.cc45
-rw-r--r--fuzzers/disassembler_win32_fuzzer.cc52
-rw-r--r--fuzzers/file_pair.proto21
-rw-r--r--fuzzers/fuzz_utils.cc40
-rw-r--r--fuzzers/fuzz_utils.h25
-rwxr-xr-xfuzzers/generate_fuzzer_data.py81
-rw-r--r--fuzzers/imposed_ensemble_matcher_fuzzer.cc67
-rw-r--r--fuzzers/patch_fuzzer.cc19
-rw-r--r--fuzzers/raw_gen_fuzzer.cc71
-rw-r--r--fuzzers/testdata/.gitignore4
-rw-r--r--fuzzers/testdata/imposed_ensemble_matcher_fuzzer/seed.asciipb90
-rw-r--r--fuzzers/testdata/new.ztf20
-rw-r--r--fuzzers/testdata/new_eventlog_provider.dll.sha11
-rw-r--r--fuzzers/testdata/new_imposed_archive.txt43
-rw-r--r--fuzzers/testdata/old.ztf21
-rw-r--r--fuzzers/testdata/old_eventlog_provider.dll.sha11
-rw-r--r--fuzzers/testdata/old_imposed_archive.txt45
-rw-r--r--fuzzers/testdata/patch_fuzzer/empty.zucbin0 -> 76 bytes
-rw-r--r--fuzzers/testdata/raw_or_ztf_gen_fuzzer/seed_proto.bin42
-rw-r--r--fuzzers/ztf_gen_fuzzer.cc67
-rw-r--r--heuristic_ensemble_matcher.cc369
-rw-r--r--heuristic_ensemble_matcher.h39
-rw-r--r--image_index.cc78
-rw-r--r--image_index.h116
-rw-r--r--image_index_unittest.cc131
-rw-r--r--image_utils.h225
-rw-r--r--image_utils_unittest.cc33
-rw-r--r--imposed_ensemble_matcher.cc143
-rw-r--r--imposed_ensemble_matcher.h83
-rw-r--r--imposed_ensemble_matcher_unittest.cc214
-rw-r--r--integration_test.cc103
-rw-r--r--io_utils.cc52
-rw-r--r--io_utils.h144
-rw-r--r--io_utils_unittest.cc160
-rw-r--r--main_utils.cc255
-rw-r--r--main_utils.h34
-rw-r--r--mapped_file.cc69
-rw-r--r--mapped_file.h82
-rw-r--r--mapped_file_unittest.cc61
-rw-r--r--patch_read_write_unittest.cc730
-rw-r--r--patch_reader.cc388
-rw-r--r--patch_reader.h285
-rw-r--r--patch_utils.h135
-rw-r--r--patch_utils_unittest.cc169
-rw-r--r--patch_writer.cc291
-rw-r--r--patch_writer.h272
-rw-r--r--reference_bytes_mixer.cc150
-rw-r--r--reference_bytes_mixer.h118
-rw-r--r--reference_set.cc60
-rw-r--r--reference_set.h64
-rw-r--r--reference_set_unittest.cc49
-rw-r--r--rel32_finder.cc294
-rw-r--r--rel32_finder.h284
-rw-r--r--rel32_finder_unittest.cc743
-rw-r--r--rel32_utils.cc67
-rw-r--r--rel32_utils.h184
-rw-r--r--rel32_utils_unittest.cc541
-rw-r--r--reloc_elf.cc163
-rw-r--r--reloc_elf.h102
-rw-r--r--reloc_elf_unittest.cc242
-rw-r--r--reloc_win32.cc196
-rw-r--r--reloc_win32.h140
-rw-r--r--reloc_win32_unittest.cc251
-rw-r--r--suffix_array.h475
-rw-r--r--suffix_array_unittest.cc342
-rw-r--r--target_pool.cc84
-rw-r--r--target_pool.h80
-rw-r--r--target_pool_unittest.cc64
-rw-r--r--targets_affinity.cc108
-rw-r--r--targets_affinity.h73
-rw-r--r--targets_affinity_unittest.cc131
-rw-r--r--test_disassembler.cc61
-rw-r--r--test_disassembler.h77
-rw-r--r--test_reference_reader.cc20
-rw-r--r--test_reference_reader.h32
-rw-r--r--test_utils.cc26
-rw-r--r--test_utils.h35
-rw-r--r--testdata/chrome64_1.exe.sha11
-rw-r--r--testdata/chrome64_2.exe.sha11
-rw-r--r--testdata/setup1.exe.sha11
-rw-r--r--testdata/setup2.exe.sha11
-rw-r--r--type_dex.h291
-rw-r--r--type_elf.h283
-rw-r--r--type_win_pe.h191
-rw-r--r--type_ztf.h54
-rw-r--r--typed_value.h57
-rw-r--r--typed_value_unittest.cc40
-rw-r--r--zucchini.h72
-rw-r--r--zucchini_apply.cc217
-rw-r--r--zucchini_apply.h41
-rw-r--r--zucchini_apply_unittest.cc14
-rw-r--r--zucchini_commands.cc141
-rw-r--r--zucchini_commands.h51
-rw-r--r--zucchini_exe_version.rc.version46
-rw-r--r--zucchini_gen.cc461
-rw-r--r--zucchini_gen.h85
-rw-r--r--zucchini_gen_unittest.cc180
-rw-r--r--zucchini_integration.cc209
-rw-r--r--zucchini_integration.h68
-rw-r--r--zucchini_main.cc55
-rw-r--r--zucchini_tools.cc140
-rw-r--r--zucchini_tools.h45
160 files changed, 28100 insertions, 0 deletions
diff --git a/BUILD.gn b/BUILD.gn
new file mode 100644
index 0000000..54b06ab
--- /dev/null
+++ b/BUILD.gn
@@ -0,0 +1,242 @@
+# Copyright 2017 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import("//build/buildflag_header.gni")
+import("//chrome/process_version_rc_template.gni")
+import("//testing/test.gni")
+
+buildflag_header("buildflags") {
+ header = "buildflags.h"
+
+ # Disable DEX on Windows Official Builds.
+ _enable_dex = !(is_win && is_official_build)
+ _enable_elf = true
+ _enable_win = true
+
+ # Disable ZTF (Zucchini Text Format) on official builds it is for testing only.
+ _enable_ztf = !is_official_build
+ flags = [
+ "ENABLE_DEX=$_enable_dex",
+ "ENABLE_ELF=$_enable_elf",
+ "ENABLE_WIN=$_enable_win",
+ "ENABLE_ZTF=$_enable_ztf",
+ ]
+}
+
+static_library("zucchini_lib") {
+ sources = [
+ "abs32_utils.cc",
+ "abs32_utils.h",
+ "address_translator.cc",
+ "address_translator.h",
+ "algorithm.h",
+ "arm_utils.cc",
+ "arm_utils.h",
+ "binary_data_histogram.cc",
+ "binary_data_histogram.h",
+ "buffer_sink.cc",
+ "buffer_sink.h",
+ "buffer_source.cc",
+ "buffer_source.h",
+ "buffer_view.h",
+ "crc32.cc",
+ "crc32.h",
+ "disassembler.cc",
+ "disassembler.h",
+ "disassembler_dex.cc",
+ "disassembler_dex.h",
+ "disassembler_elf.cc",
+ "disassembler_elf.h",
+ "disassembler_no_op.cc",
+ "disassembler_no_op.h",
+ "disassembler_win32.cc",
+ "disassembler_win32.h",
+ "disassembler_ztf.cc",
+ "disassembler_ztf.h",
+ "element_detection.cc",
+ "element_detection.h",
+ "encoded_view.cc",
+ "encoded_view.h",
+ "ensemble_matcher.cc",
+ "ensemble_matcher.h",
+ "equivalence_map.cc",
+ "equivalence_map.h",
+ "heuristic_ensemble_matcher.cc",
+ "heuristic_ensemble_matcher.h",
+ "image_index.cc",
+ "image_index.h",
+ "image_utils.h",
+ "imposed_ensemble_matcher.cc",
+ "imposed_ensemble_matcher.h",
+ "io_utils.cc",
+ "io_utils.h",
+ "patch_reader.cc",
+ "patch_reader.h",
+ "patch_utils.h",
+ "patch_writer.cc",
+ "patch_writer.h",
+ "reference_bytes_mixer.cc",
+ "reference_bytes_mixer.h",
+ "reference_set.cc",
+ "reference_set.h",
+ "rel32_finder.cc",
+ "rel32_finder.h",
+ "rel32_utils.cc",
+ "rel32_utils.h",
+ "reloc_elf.cc",
+ "reloc_elf.h",
+ "reloc_win32.cc",
+ "reloc_win32.h",
+ "suffix_array.h",
+ "target_pool.cc",
+ "target_pool.h",
+ "targets_affinity.cc",
+ "targets_affinity.h",
+ "type_dex.h",
+ "type_elf.h",
+ "type_win_pe.h",
+ "typed_value.h",
+ "zucchini.h",
+ "zucchini_apply.cc",
+ "zucchini_apply.h",
+ "zucchini_gen.cc",
+ "zucchini_gen.h",
+ "zucchini_tools.cc",
+ "zucchini_tools.h",
+ ]
+
+ deps = [
+ ":buildflags",
+ "//base",
+ ]
+}
+
+static_library("zucchini_io") {
+ sources = [
+ "mapped_file.cc",
+ "mapped_file.h",
+ "zucchini_integration.cc",
+ "zucchini_integration.h",
+ ]
+
+ deps = [
+ ":zucchini_lib",
+ "//base",
+ ]
+}
+
+executable("zucchini") {
+ sources = [
+ "main_utils.cc",
+ "main_utils.h",
+ "zucchini_commands.cc",
+ "zucchini_commands.h",
+ "zucchini_main.cc",
+ ]
+
+ deps = [
+ ":zucchini_io",
+ ":zucchini_lib",
+ "//base",
+ ]
+
+ if (is_win) {
+ deps += [ ":zucchini_exe_version" ]
+ }
+}
+
+if (is_win) {
+ process_version_rc_template("zucchini_exe_version") {
+ template_file = "zucchini_exe_version.rc.version"
+ output = "$target_gen_dir/zucchini_exe_version.rc"
+ }
+}
+
+test("zucchini_unittests") {
+ sources = [
+ "abs32_utils_unittest.cc",
+ "address_translator_unittest.cc",
+ "algorithm_unittest.cc",
+ "arm_utils_unittest.cc",
+ "binary_data_histogram_unittest.cc",
+ "buffer_sink_unittest.cc",
+ "buffer_source_unittest.cc",
+ "buffer_view_unittest.cc",
+ "crc32_unittest.cc",
+ "disassembler_dex_unittest.cc",
+ "disassembler_elf_unittest.cc",
+ "disassembler_ztf_unittest.cc",
+ "element_detection_unittest.cc",
+ "encoded_view_unittest.cc",
+ "equivalence_map_unittest.cc",
+ "image_index_unittest.cc",
+ "image_utils_unittest.cc",
+ "imposed_ensemble_matcher_unittest.cc",
+ "io_utils_unittest.cc",
+ "mapped_file_unittest.cc",
+ "patch_read_write_unittest.cc",
+ "patch_utils_unittest.cc",
+ "reference_set_unittest.cc",
+ "rel32_finder_unittest.cc",
+ "rel32_utils_unittest.cc",
+ "reloc_elf_unittest.cc",
+ "reloc_win32_unittest.cc",
+ "suffix_array_unittest.cc",
+ "target_pool_unittest.cc",
+ "targets_affinity_unittest.cc",
+ "test_disassembler.cc",
+ "test_disassembler.h",
+ "test_reference_reader.cc",
+ "test_reference_reader.h",
+ "test_utils.cc",
+ "test_utils.h",
+ "typed_value_unittest.cc",
+ "zucchini_apply_unittest.cc",
+ "zucchini_gen_unittest.cc",
+ ]
+
+ deps = [
+ ":zucchini_io",
+ ":zucchini_lib",
+ "//base",
+ "//base/test:run_all_unittests",
+ "//base/test:test_support",
+ "//testing/gtest",
+ ]
+}
+
+test("zucchini_integration_test") {
+ sources = [ "integration_test.cc" ]
+
+ deps = [
+ ":zucchini_lib",
+ "//base",
+ "//base/test:run_all_unittests",
+ "//base/test:test_support",
+ "//testing/gtest",
+ ]
+
+ data = [ "testdata" ]
+}
+
+# Group to build and depend on all the Zucchini related fuzzers.
+group("zucchini_fuzzers") {
+ testonly = true
+ deps = [
+ "//components/zucchini/fuzzers:zucchini_disassembler_dex_fuzzer",
+ "//components/zucchini/fuzzers:zucchini_disassembler_win32_fuzzer",
+ "//components/zucchini/fuzzers:zucchini_patch_fuzzer",
+ ]
+
+ # Ensure protoc is available.
+ # Disabled on Windows due to crbug/844826.
+ if (current_toolchain == host_toolchain && !is_win) {
+ deps += [
+ "//components/zucchini/fuzzers:zucchini_apply_fuzzer",
+ "//components/zucchini/fuzzers:zucchini_imposed_ensemble_matcher_fuzzer",
+ "//components/zucchini/fuzzers:zucchini_raw_gen_fuzzer",
+ "//components/zucchini/fuzzers:zucchini_ztf_gen_fuzzer",
+ ]
+ }
+}
diff --git a/DIR_METADATA b/DIR_METADATA
new file mode 100644
index 0000000..03fc466
--- /dev/null
+++ b/DIR_METADATA
@@ -0,0 +1,3 @@
+monorail {
+ component: "Internals>Installer>Diff"
+}
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..d3fd0a1
--- /dev/null
+++ b/README.md
@@ -0,0 +1,280 @@
+
+## Basic Definitions for Patching
+
+**Binary**: Executable image and data. Binaries may persist in an archive
+(e.g., chrome.7z), and need to be periodically updated. Formats for binaries
+include {PE files EXE / DLL, ELF, DEX}. Architectures binaries include
+{x86, x64, ARM, AArch64, Dalvik}. A binary is also referred to as an executable
+or an image file.
+
+**Patching**: Sending a "new" file to clients who have an "old" file by
+computing and transmitting a "patch" that can be used to transform "old" into
+"new". Patches are compressed for transmission. A key performance metric is
+patch size, which refers to the size of compressed patch file. For our
+experiments we use 7z.
+
+**Patch generation**: Computation of a "patch" from "old" and "new". This can be
+expensive (e.g., ~15-20 min for Chrome, using 1 GB of RAM), but since patch
+generation is a run-once step on the server-side when releasing "new" binaries,
+the expense is not too critical.
+
+**Patch application**: Transformation from "old" binaries to "new", using a
+(downloaded) "patch". This is executed on client side on updates, so resource
+constraints (e.g., time, RAM, disk space) is more stringent. Also, fault-
+tolerance is important. This is usually achieved by an update system by having
+a fallback method of directly downloading "new" in case of patching failure.
+
+**Offset**: Position relative to the start of a file.
+
+**Local offset**: An offset relative to the start of a region of a file.
+
+**Element**: A region in a file with associated executable type, represented by
+the tuple (exe_type, offset, length). Every Element in new file is associated
+with an Element in old file and patched independently.
+
+**Reference**: A directed connection between two offsets in a binary. For
+example, consider jump instructions in x86:
+
+ 00401000: E9 3D 00 00 00 jmp 00401042
+
+Here, the 4 bytes `[3D 00 00 00]` starting at address `00401001` point to
+address `00401042` in memory. This forms a reference from `offset(00401001)`
+(length 4) to `offset(00401042)`, where `offset(addr)` indicates the disk
+offset corresponding to `addr`. A reference has a location, length (implicitly
+determined by reference type), body, and target.
+
+**Location**: The starting offset of bytes that store a reference. In the
+preceding example, `offset(00401001)` is a location. Each location is the
+beginning of a reference body.
+
+**Body**: The span of bytes that encodes reference data, i.e.,
+[location, location + length) =
+[location, location + 1, ..., location + length - 1].
+In the preceding example, `length = 4`, so the reference body is
+`[00401001, 00401001 + 4) = [00401001, 00401002, 00401003, 00401004]`.
+All reference bodies in an image must not overlap, and often regions boundaries
+are required to not straddle a reference body.
+
+**Target**: The offset that's the destination of a reference. In the preceding
+example, `offset(00401042)` is the target. Different references can share common
+targets. For example, in
+
+ 00401000: E9 3D 00 00 00 jmp 00401042
+ 00401005: EB 3B jmp 00401042
+
+we have two references with different locations and bodies, but same target
+of `00401042`.
+
+Because the bytes that encode a reference depend on its target, and potentially
+on its location, they are more likely to get modified from an old version of a
+binary to a newer version. This is why "naive" patching does not do well on
+binaries.
+
+**Target Key**: An alternative representation of a Target for a fixed pool, as its
+index in the sorted list of Target offsets. Keys are useful since:
+ * Their numerical index are smaller than offsets, allowing more efficient
+ storage of target correction data in patch.
+ * They simplify association from Targets to Labels.
+
+**Disassembler**: Architecture specific data and operations, used to extract and
+correct references in a binary.
+
+**Type of reference**: The type of a reference determines the binary
+representation used to encode its target. This affects how references are parsed
+and written by a disassembler. There can be many types of references in the same
+binary.
+
+A reference is represented by the tuple (disassembler, location, target, type).
+This tuple contains sufficient information to write the reference in a binary.
+
+**Pool of targets**: Collection of targets that is assumed to have some semantic
+relationship. Each reference type belong to exactly one reference pool. Targets
+for references in the same pool are shared.
+
+For example, the following describes two pools defined for Dalvik Executable
+format (DEX). Both pools spawn multiple types of references.
+
+1. Index in string table.
+ - From bytecode to string index using 16 bits.
+ - From bytecode to string index using 32 bits.
+ - From field item to string index using 32 bits.
+2. Address in code.
+ - Relative 16 bits pointer.
+ - Relative 32 bits pointer.
+
+Boundaries between different pools can be ambiguous. Having all targets belong
+to the same pool can reduce redundancy, but will use more memory and might
+cause larger corrections to happen, so this is a trade-off that can be resolved
+with benchmarks.
+
+**Abs32 references**: References whose targets are adjusted by the OS during
+program load. In an image, a **relocation table** typically provides locations
+of abs32 references. At each abs32 location, the stored bytes then encode
+semantic information about the target (e.g., as RVA).
+
+**Rel32 references**: References embedded within machine code, in which targets
+are encoded as some delta relative to the reference's location. Typical examples
+of rel32 references are branching instructions and instruction pointer-relative
+memory access.
+
+**Equivalence**: A (src_offset, dst_offset, length) tuple describing a region of
+"old" binary, at an offset of |src_offset|, that is similar to a region of "new"
+binary, at an offset of |dst_offset|.
+
+**Raw delta unit**: Describes a raw modification to apply on the new image, as a
+pair (copy_offset, diff), where copy_offset describes the position in new file
+as an offset in the data that was copied from the old file, and diff is the
+bytewise difference to apply.
+
+**Associated Targets**: A target in "old" binary is associated with a target in
+"new" binary if both targets:
+1. are part of similar regions from the same equivalence, and
+2. have the same local offset (relative to respective start regions), and
+3. are not part of any larger region from a different equivalence.
+Not all targets are necessarily associated with another target.
+
+**Target Affinity**: Level of confidence in the association between two targets.
+The affinity between targets that are potentially associated is measured based
+on surrounding content, as well as reference type.
+
+**Label**: An integer assigned for each Target in "old" and "new" binary as part
+of generating a patch, and used to alias targets when searching for similar
+regions that will form equivalences. Labels are assigned such that
+associated targets in old and new binaries share the same Label. Unmatched
+Targets have a Label of 0. For example, given
+ * "Old" targets = [0x1111, 0x3333, 0x5555, 0x7777],
+ * "New" targets = [0x2222, 0x4444, 0x6666, 0x8888],
+to represent matchings 0x1111 <=> 0x6666, 0x3333 <=> 0x2222, we'd assign
+ * Label 1 to 0x1111 (in "old") and 0x6666 (in "new"),
+ * Label 2 to 0x3333 (in "old") and 0x2222 (in "new").
+ Represented as arrays indexed over Target Keys, we'd have:
+ * "Old" labels = [1, 2, 0 ,0],
+ * "New" labels = [2, 0, 1, 0].
+
+**Encoded Image**: The result of projecting the content of an image to scalar
+values that describe content on a higher level of abstraction, masking away
+undesirable noise in raw content. Notably, the projection encodes references
+based on their associated label.
+
+## Interfaces
+
+zucchini_lib: Core Zucchini library that operate on buffers to generate and
+apply patches.
+
+zucchini_io: Wrapper on zucchini_lib that handles file I/O, using memory-mapped
+I/O to interface with zucchini_lib.
+
+zucchini: Stand-alone executable that parses command-line arguments, and passes
+the results to zucchini_io. Also implements various helper flows.
+
+## Zucchini Ensemble Patch Format
+
+### Types
+
+**int8**: 8-bit unsigned int.
+
+**uint32**: 32-bit unsigned int, little-endian.
+
+**int32**: 32-bit signed int, little-endian.
+
+**Varints**: This is a generic variable-length encoding for integer quantities
+that strips away leading (most-significant) null bytes.
+The Varints format is borrowed from protocol-buffers, see
+[documentation](https://developers.google.com/protocol-buffers/docs/encoding#varints)
+for more info.
+
+**varuint32**: A uint32 encoded using Varints format.
+
+**varint32**: A int32 encoded using Varints format.
+
+### File Layout
+
+Name | Format | Description
+--- | --- | ---
+header | PatchHeader | The header.
+elements_count | uint32 | Number of patch units.
+elements | PatchElement[elements_count] | List of all patch elements.
+
+Position of elements in new file is ascending.
+
+### Structures
+
+**PatchHeader**
+
+Name | Format | Description
+--- | --- | ---
+magic | uint32 = kMagic | Magic value.
+old_size | uint32 | Size of old file in bytes.
+old_crc | uint32 | CRC32 of old file.
+new_size | uint32 | Size of new file in bytes.
+new_crc | uint32 | CRC32 of new file.
+
+**kMagic** == `'Z' | ('u' << 8) | ('c' << 16)`
+
+**PatchElement**
+Contains all the information required to produce a single element in new file.
+
+Name | Format | Description
+--- | --- | ---
+header | PatchElementHeader | The header.
+equivalences | EquivalenceList | List of equivalences.
+raw_deltas | RawDeltaList | List of raw deltas.
+reference_deltas | ReferenceDeltaList | List of reference deltas.
+pool_count | uint32 | Number of pools.
+extra_targets | ExtraTargetList[pool_count] | Lists of extra targets.
+
+**PatchElementHeader**
+Describes a correspondence between an element in old and in new files. Some
+redundancy arise from storing |new_offset|, but it is necessary to make
+PatchElement self contained.
+
+Name | Format | Description
+--- | --- | ---
+old_offset | uint32 | Starting offset of the element in old file.
+old_length | uint32 | Length of the element in old file.
+new_offset | uint32 | Starting offset of the element in new file.
+new_length | uint32 | Length of the element in new file.
+exe_type | uint32 | Executable type for this unit, see `enum ExecutableType`.
+
+**EquivalenceList**
+Encodes a list of equivalences, where dst offsets (in new image) are ascending.
+
+Name | Format | Description
+--- | --- | ---
+src_skip | Buffer<varint32> | Src offset for each equivalence, delta encoded.
+dst_skip | Buffer<varuint32> | Dst offset for each equivalence, delta encoded.
+copy_count | Buffer<varuint32> | Length for each equivalence.
+
+**RawDeltaList**
+Encodes a list of raw delta units, with ascending copy offsets.
+
+Name | Format | Description
+--- | --- | ---
+raw_delta_skip | Buffer<varuint32> | Copy offset for each delta unit, delta encoded and biased by -1.
+raw_delta_diff | Buffer<int8> | Bytewise difference for each delta unit.
+
+**ReferenceDeltaList**
+Encodes a list of reference deltas, in the order they appear in the new
+image file. A reference delta is a signed integer representing a jump through a
+list of targets.
+
+Name | Format | Description
+--- | --- | ---
+reference_delta | Buffer<varuint32> | Vector of reference deltas.
+
+**ExtraTargetList**
+Encodes a list of additional targets in the new image file, in ascending
+order.
+
+Name | Format | Description
+--- | --- | ---
+pool_tag | uint8_t | Unique identifier for this pool of targets.
+extra_targets | Buffer<varuint32> | Additional targets, delta encoded and biased by -1.
+
+**Buffer<T>**
+A generic vector of data.
+
+Name | Format | Description
+--- | --- | ---
+size |uint32 | Size of content in bytes.
+content |T[] | List of integers.
diff --git a/abs32_utils.cc b/abs32_utils.cc
new file mode 100644
index 0000000..ad1c85e
--- /dev/null
+++ b/abs32_utils.cc
@@ -0,0 +1,211 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/abs32_utils.h"
+
+#include <algorithm>
+#include <type_traits>
+#include <utility>
+
+#include "base/check_op.h"
+#include "components/zucchini/io_utils.h"
+
+namespace zucchini {
+
+namespace {
+
+// Templated helper for AbsoluteAddress::Read().
+template <typename T>
+bool ReadAbs(ConstBufferView image, offset_t offset, uint64_t* value) {
+ static_assert(std::is_unsigned<T>::value, "Value type must be unsigned.");
+ if (!image.can_access<T>(offset))
+ return false;
+ *value = static_cast<uint64_t>(image.read<T>(offset));
+ return true;
+}
+
+// Templated helper for AbsoluteAddress::Write().
+template <typename T>
+bool WriteAbs(offset_t offset, T value, MutableBufferView* image) {
+ static_assert(std::is_unsigned<T>::value, "Value type must be unsigned.");
+ if (!image->can_access<T>(offset))
+ return false;
+ image->write<T>(offset, value);
+ return true;
+}
+
+} // namespace
+
+/******** AbsoluteAddress ********/
+
+AbsoluteAddress::AbsoluteAddress(Bitness bitness, uint64_t image_base)
+ : bitness_(bitness), image_base_(image_base), value_(image_base) {
+ CHECK(bitness_ == kBit64 || image_base_ < 0x100000000ULL);
+}
+
+AbsoluteAddress::AbsoluteAddress(AbsoluteAddress&&) = default;
+
+AbsoluteAddress::~AbsoluteAddress() = default;
+
+bool AbsoluteAddress::FromRva(rva_t rva) {
+ if (rva >= kRvaBound)
+ return false;
+ uint64_t value = image_base_ + rva;
+ // Check overflow, which manifests as |value| "wrapping around", resulting in
+ // |value| less than |image_base_| (preprocessing needed for 32-bit).
+ if (((bitness_ == kBit32) ? (value & 0xFFFFFFFFU) : value) < image_base_)
+ return false;
+ value_ = value;
+ return true;
+}
+
+rva_t AbsoluteAddress::ToRva() const {
+ if (value_ < image_base_)
+ return kInvalidRva;
+ uint64_t raw_rva = value_ - image_base_;
+ if (raw_rva >= kRvaBound)
+ return kInvalidRva;
+ return static_cast<rva_t>(raw_rva);
+}
+
+bool AbsoluteAddress::Read(offset_t offset, const ConstBufferView& image) {
+ // Read raw data; |value_| is not guaranteed to represent a valid RVA.
+ if (bitness_ == kBit32)
+ return ReadAbs<uint32_t>(image, offset, &value_);
+ DCHECK_EQ(kBit64, bitness_);
+ return ReadAbs<uint64_t>(image, offset, &value_);
+}
+
+bool AbsoluteAddress::Write(offset_t offset, MutableBufferView* image) {
+ if (bitness_ == kBit32)
+ return WriteAbs<uint32_t>(offset, static_cast<uint32_t>(value_), image);
+ DCHECK_EQ(kBit64, bitness_);
+ return WriteAbs<uint64_t>(offset, value_, image);
+}
+
+/******** Abs32RvaExtractorWin32 ********/
+
+Abs32RvaExtractorWin32::Abs32RvaExtractorWin32(
+ ConstBufferView image,
+ AbsoluteAddress&& addr,
+ const std::vector<offset_t>& abs32_locations,
+ offset_t lo,
+ offset_t hi)
+ : image_(image), addr_(std::move(addr)) {
+ CHECK_LE(lo, hi);
+ auto find_and_check = [this](const std::vector<offset_t>& locations,
+ offset_t offset) {
+ auto it = std::lower_bound(locations.begin(), locations.end(), offset);
+ // Ensure that |offset| does not straddle a reference body.
+ CHECK(it == locations.begin() || offset - *(it - 1) >= addr_.width());
+ return it;
+ };
+ cur_abs32_ = find_and_check(abs32_locations, lo);
+ end_abs32_ = find_and_check(abs32_locations, hi);
+}
+
+Abs32RvaExtractorWin32::Abs32RvaExtractorWin32(Abs32RvaExtractorWin32&&) =
+ default;
+
+Abs32RvaExtractorWin32::~Abs32RvaExtractorWin32() = default;
+
+absl::optional<Abs32RvaExtractorWin32::Unit> Abs32RvaExtractorWin32::GetNext() {
+ while (cur_abs32_ < end_abs32_) {
+ offset_t location = *(cur_abs32_++);
+ if (!addr_.Read(location, image_))
+ continue;
+ rva_t target_rva = addr_.ToRva();
+ if (target_rva == kInvalidRva)
+ continue;
+ return Unit{location, target_rva};
+ }
+ return absl::nullopt;
+}
+
+/******** Abs32ReaderWin32 ********/
+
+Abs32ReaderWin32::Abs32ReaderWin32(Abs32RvaExtractorWin32&& abs32_rva_extractor,
+ const AddressTranslator& translator)
+ : abs32_rva_extractor_(std::move(abs32_rva_extractor)),
+ target_rva_to_offset_(translator) {}
+
+Abs32ReaderWin32::~Abs32ReaderWin32() = default;
+
+absl::optional<Reference> Abs32ReaderWin32::GetNext() {
+ for (auto unit = abs32_rva_extractor_.GetNext(); unit.has_value();
+ unit = abs32_rva_extractor_.GetNext()) {
+ offset_t location = unit->location;
+ offset_t unsafe_target = target_rva_to_offset_.Convert(unit->target_rva);
+ if (unsafe_target != kInvalidOffset)
+ return Reference{location, unsafe_target};
+ }
+ return absl::nullopt;
+}
+
+/******** Abs32WriterWin32 ********/
+
+Abs32WriterWin32::Abs32WriterWin32(MutableBufferView image,
+ AbsoluteAddress&& addr,
+ const AddressTranslator& translator)
+ : image_(image),
+ addr_(std::move(addr)),
+ target_offset_to_rva_(translator) {}
+
+Abs32WriterWin32::~Abs32WriterWin32() = default;
+
+void Abs32WriterWin32::PutNext(Reference ref) {
+ rva_t target_rva = target_offset_to_rva_.Convert(ref.target);
+ if (target_rva != kInvalidRva) {
+ addr_.FromRva(target_rva);
+ addr_.Write(ref.location, &image_);
+ }
+}
+
+/******** Exported Functions ********/
+
+size_t RemoveUntranslatableAbs32(ConstBufferView image,
+ AbsoluteAddress&& addr,
+ const AddressTranslator& translator,
+ std::vector<offset_t>* locations) {
+ AddressTranslator::RvaToOffsetCache target_rva_checker(translator);
+ Abs32RvaExtractorWin32 extractor(image, std::move(addr), *locations, 0,
+ image.size());
+ Abs32ReaderWin32 reader(std::move(extractor), translator);
+ std::vector<offset_t>::iterator write_it = locations->begin();
+ // |reader| reads |locations| while |write_it| modifies it. However, there's
+ // no conflict since read occurs before write, and can skip ahead.
+ for (auto ref = reader.GetNext(); ref.has_value(); ref = reader.GetNext())
+ *(write_it++) = ref->location;
+ DCHECK(write_it <= locations->end());
+ size_t num_removed = locations->end() - write_it;
+ locations->erase(write_it, locations->end());
+ return num_removed;
+}
+
+size_t RemoveOverlappingAbs32Locations(uint32_t width,
+ std::vector<offset_t>* locations) {
+ if (locations->size() <= 1)
+ return 0;
+
+ auto slow = locations->begin();
+ auto fast = locations->begin() + 1;
+ for (;;) {
+ // Find next good location.
+ while (fast != locations->end() && *fast - *slow < width)
+ ++fast;
+ // Advance |slow|. For the last iteration this becomes the new sentinel.
+ ++slow;
+ if (fast == locations->end())
+ break;
+ // Compactify good locations (potentially overwrite bad locations).
+ if (slow != fast)
+ *slow = *fast;
+ ++fast;
+ }
+ size_t num_removed = locations->end() - slow;
+ locations->erase(slow, locations->end());
+ return num_removed;
+}
+
+} // namespace zucchini
diff --git a/abs32_utils.h b/abs32_utils.h
new file mode 100644
index 0000000..07503b5
--- /dev/null
+++ b/abs32_utils.h
@@ -0,0 +1,142 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_ABS32_UTILS_H_
+#define COMPONENTS_ZUCCHINI_ABS32_UTILS_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "components/zucchini/address_translator.h"
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/image_utils.h"
+#include "third_party/abseil-cpp/absl/types/optional.h"
+
+namespace zucchini {
+
+// A class to represent an abs32 address (32-bit or 64-bit). Accessors are
+// provided to translate from / to RVA, and to read / write the represented
+// abs32 address from / to an image.
+class AbsoluteAddress {
+ public:
+ AbsoluteAddress(Bitness bitness, uint64_t image_base);
+ AbsoluteAddress(AbsoluteAddress&&);
+ ~AbsoluteAddress();
+
+ // Attempts to translate |rva| to an abs32 address. On success, assigns
+ // |value_| to the result and returns true. On failure (invalid |rva| or
+ // overflow), returns false.
+ bool FromRva(rva_t rva);
+
+ // Returns the RVA for |value_|, or |kInvalidRva| if the represented value
+ // address does not correspond to a valid RVA.
+ rva_t ToRva() const;
+
+ // Attempts to read the abs32 address at |image[offset]| into |value_|. On
+ // success, updates |value_| and returns true. On failure (invalid |offset|),
+ // returns false.
+ bool Read(offset_t offset, const ConstBufferView& image);
+
+ // Attempts to write |value_| to to |(*image)[offset]|. On success, performs
+ // the write and returns true. On failure (invalid |offset|), returns false.
+ bool Write(offset_t offset, MutableBufferView* image);
+
+ uint32_t width() const { return WidthOf(bitness_); }
+
+ // Exposing |value_| for testing.
+ uint64_t* mutable_value() { return &value_; }
+
+ private:
+ const Bitness bitness_;
+ const uint64_t image_base_; // Accommodates 32-bit and 64-bit.
+ uint64_t value_; // Accommodates 32-bit and 64-bit.
+};
+
+// A class to extract Win32 abs32 references from |abs32_locations| within
+// |image_| bounded by |[lo, hi)|. GetNext() is used to successively return
+// data as Units, which are locations and (potentially out-of-bound) RVAs.
+// |addr| determines the bitness of abs32 values stored, and mediates all reads.
+class Abs32RvaExtractorWin32 {
+ public:
+ struct Unit {
+ offset_t location;
+ rva_t target_rva;
+ };
+
+ // Requires |lo| <= |hi|, and they must not straddle a reference body (with
+ // length |addr.width()|) in |abs32_locations|.
+ Abs32RvaExtractorWin32(ConstBufferView image,
+ AbsoluteAddress&& addr,
+ const std::vector<offset_t>& abs32_locations,
+ offset_t lo,
+ offset_t hi);
+ Abs32RvaExtractorWin32(Abs32RvaExtractorWin32&&);
+ ~Abs32RvaExtractorWin32();
+
+ // Visits given abs32 locations, rejects invalid locations and non-existent
+ // RVAs, and returns reference as Unit, or absl::nullopt on completion.
+ absl::optional<Unit> GetNext();
+
+ private:
+ ConstBufferView image_;
+ AbsoluteAddress addr_;
+ std::vector<offset_t>::const_iterator cur_abs32_;
+ std::vector<offset_t>::const_iterator end_abs32_;
+};
+
+// A reader for Win32 abs32 references that filters and translates results from
+// |abs32_rva_extractor_|.
+class Abs32ReaderWin32 : public ReferenceReader {
+ public:
+ Abs32ReaderWin32(Abs32RvaExtractorWin32&& abs32_rva_extractor,
+ const AddressTranslator& translator);
+ Abs32ReaderWin32(const Abs32ReaderWin32&) = delete;
+ const Abs32ReaderWin32& operator=(const Abs32ReaderWin32&) = delete;
+ ~Abs32ReaderWin32() override;
+
+ // ReferenceReader:
+ absl::optional<Reference> GetNext() override;
+
+ private:
+ Abs32RvaExtractorWin32 abs32_rva_extractor_;
+ AddressTranslator::RvaToOffsetCache target_rva_to_offset_;
+};
+
+// A writer for Win32 abs32 references. |addr| determines the bitness of the
+// abs32 values stored, and mediates all writes.
+class Abs32WriterWin32 : public ReferenceWriter {
+ public:
+ Abs32WriterWin32(MutableBufferView image,
+ AbsoluteAddress&& addr,
+ const AddressTranslator& translator);
+ Abs32WriterWin32(const Abs32WriterWin32&) = delete;
+ const Abs32WriterWin32& operator=(const Abs32WriterWin32&) = delete;
+ ~Abs32WriterWin32() override;
+
+ // ReferenceWriter:
+ void PutNext(Reference ref) override;
+
+ private:
+ MutableBufferView image_;
+ AbsoluteAddress addr_;
+ AddressTranslator::OffsetToRvaCache target_offset_to_rva_;
+};
+
+// Given a list of abs32 |locations|, removes all elements whose targets cannot
+// be translated. Returns the number of elements removed.
+size_t RemoveUntranslatableAbs32(ConstBufferView image,
+ AbsoluteAddress&& addr,
+ const AddressTranslator& translator,
+ std::vector<offset_t>* locations);
+
+// Given a sorted list of abs32 |locations|, removes all elements whose body
+// (with |width| given) overlaps with the body of a previous element.
+size_t RemoveOverlappingAbs32Locations(uint32_t width,
+ std::vector<offset_t>* locations);
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_ABS32_UTILS_H_
diff --git a/abs32_utils_unittest.cc b/abs32_utils_unittest.cc
new file mode 100644
index 0000000..ddbb685
--- /dev/null
+++ b/abs32_utils_unittest.cc
@@ -0,0 +1,543 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/abs32_utils.h"
+
+#include <stdint.h>
+
+#include <algorithm>
+#include <string>
+#include <utility>
+
+#include "base/numerics/safe_conversions.h"
+#include "components/zucchini/address_translator.h"
+#include "components/zucchini/image_utils.h"
+#include "components/zucchini/test_utils.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+namespace {
+
+// A trivial AddressTranslator that applies constant shift.
+class TestAddressTranslator : public AddressTranslator {
+ public:
+ TestAddressTranslator(size_t image_size, rva_t rva_begin) {
+ DCHECK_GE(rva_begin, 0U);
+ CHECK_EQ(AddressTranslator::kSuccess,
+ Initialize({{0, base::checked_cast<offset_t>(image_size),
+ rva_begin, base::checked_cast<rva_t>(image_size)}}));
+ }
+};
+
+// Helper to translate address |value| to RVA. May return |kInvalidRva|.
+rva_t AddrValueToRva(uint64_t value, AbsoluteAddress* addr) {
+ *addr->mutable_value() = value;
+ return addr->ToRva();
+}
+
+} // namespace
+
+TEST(Abs32UtilsTest, AbsoluteAddress32) {
+ std::vector<uint8_t> data32 = ParseHexString(
+ "00 00 32 00 21 43 65 4A 00 00 00 00 FF FF FF FF FF FF 31 00");
+ ConstBufferView image32(data32.data(), data32.size());
+ MutableBufferView mutable_image32(data32.data(), data32.size());
+
+ AbsoluteAddress addr32(kBit32, 0x00320000U);
+ EXPECT_TRUE(addr32.Read(0x0U, image32));
+ EXPECT_EQ(0x00000000U, addr32.ToRva());
+ EXPECT_TRUE(addr32.Read(0x4U, image32));
+ EXPECT_EQ(0x4A334321U, addr32.ToRva());
+ EXPECT_TRUE(addr32.Read(0x8U, image32));
+ EXPECT_EQ(kInvalidRva, addr32.ToRva()); // Underflow.
+ EXPECT_TRUE(addr32.Read(0xCU, image32));
+ EXPECT_EQ(kInvalidRva, addr32.ToRva()); // Translated RVA would be too large.
+ EXPECT_TRUE(addr32.Read(0x10U, image32));
+ EXPECT_EQ(kInvalidRva, addr32.ToRva()); // Underflow (boundary case).
+
+ EXPECT_FALSE(addr32.Read(0x11U, image32));
+ EXPECT_FALSE(addr32.Read(0x14U, image32));
+ EXPECT_FALSE(addr32.Read(0x100000U, image32));
+ EXPECT_FALSE(addr32.Read(0x80000000U, image32));
+ EXPECT_FALSE(addr32.Read(0xFFFFFFFFU, image32));
+
+ EXPECT_TRUE(addr32.FromRva(0x11223344U));
+ EXPECT_TRUE(addr32.Write(0x2U, &mutable_image32));
+ EXPECT_TRUE(addr32.Write(0x10U, &mutable_image32));
+ std::vector<uint8_t> expected_data32 = ParseHexString(
+ "00 00 44 33 54 11 65 4A 00 00 00 00 FF FF FF FF 44 33 54 11");
+ EXPECT_EQ(expected_data32, data32);
+ EXPECT_FALSE(addr32.Write(0x11U, &mutable_image32));
+ EXPECT_FALSE(addr32.Write(0xFFFFFFFFU, &mutable_image32));
+ EXPECT_EQ(expected_data32, data32);
+}
+
+TEST(Abs32UtilsTest, AbsoluteAddress32Overflow) {
+ AbsoluteAddress addr32(kBit32, 0xC0000000U);
+ EXPECT_TRUE(addr32.FromRva(0x00000000U));
+ EXPECT_TRUE(addr32.FromRva(0x11223344U));
+ EXPECT_TRUE(addr32.FromRva(0x3FFFFFFFU));
+ EXPECT_FALSE(addr32.FromRva(0x40000000U));
+ EXPECT_FALSE(addr32.FromRva(0x40000001U));
+ EXPECT_FALSE(addr32.FromRva(0x80000000U));
+ EXPECT_FALSE(addr32.FromRva(0xFFFFFFFFU));
+
+ EXPECT_EQ(0x00000000U, AddrValueToRva(0xC0000000U, &addr32));
+ EXPECT_EQ(kInvalidRva, AddrValueToRva(0xBFFFFFFFU, &addr32));
+ EXPECT_EQ(kInvalidRva, AddrValueToRva(0x00000000U, &addr32));
+ EXPECT_EQ(0x3FFFFFFFU, AddrValueToRva(0xFFFFFFFFU, &addr32));
+}
+
+TEST(Abs32UtilsTest, AbsoluteAddress64) {
+ std::vector<uint8_t> data64 = ParseHexString(
+ "00 00 00 00 64 00 00 00 21 43 65 4A 64 00 00 00 "
+ "00 00 00 00 00 00 00 00 FF FF FF FF FF FF FF FF "
+ "00 00 00 00 64 00 00 80 FF FF FF FF 63 00 00 00");
+ ConstBufferView image64(data64.data(), data64.size());
+ MutableBufferView mutable_image64(data64.data(), data64.size());
+
+ AbsoluteAddress addr64(kBit64, 0x0000006400000000ULL);
+ EXPECT_TRUE(addr64.Read(0x0U, image64));
+ EXPECT_EQ(0x00000000U, addr64.ToRva());
+ EXPECT_TRUE(addr64.Read(0x8U, image64));
+ EXPECT_EQ(0x4A654321U, addr64.ToRva());
+ EXPECT_TRUE(addr64.Read(0x10U, image64)); // Succeeds, in spite of value.
+ EXPECT_EQ(kInvalidRva, addr64.ToRva()); // Underflow.
+ EXPECT_TRUE(addr64.Read(0x18U, image64));
+ EXPECT_EQ(kInvalidRva, addr64.ToRva()); // Translated RVA too large.
+ EXPECT_TRUE(addr64.Read(0x20U, image64));
+ EXPECT_EQ(kInvalidRva, addr64.ToRva()); // Translated RVA toolarge.
+ EXPECT_TRUE(addr64.Read(0x28U, image64));
+ EXPECT_EQ(kInvalidRva, addr64.ToRva()); // Underflow.
+
+ EXPECT_FALSE(addr64.Read(0x29U, image64)); // Extends outside.
+ EXPECT_FALSE(addr64.Read(0x30U, image64)); // Entirely outside (note: hex).
+ EXPECT_FALSE(addr64.Read(0x100000U, image64));
+ EXPECT_FALSE(addr64.Read(0x80000000U, image64));
+ EXPECT_FALSE(addr64.Read(0xFFFFFFFFU, image64));
+
+ EXPECT_TRUE(addr64.FromRva(0x11223344U));
+ EXPECT_TRUE(addr64.Write(0x13U, &mutable_image64));
+ EXPECT_TRUE(addr64.Write(0x20U, &mutable_image64));
+ std::vector<uint8_t> expected_data64 = ParseHexString(
+ "00 00 00 00 64 00 00 00 21 43 65 4A 64 00 00 00 "
+ "00 00 00 44 33 22 11 64 00 00 00 FF FF FF FF FF "
+ "44 33 22 11 64 00 00 00 FF FF FF FF 63 00 00 00");
+ EXPECT_EQ(expected_data64, data64);
+ EXPECT_FALSE(addr64.Write(0x29U, &mutable_image64));
+ EXPECT_FALSE(addr64.Write(0x30U, &mutable_image64));
+ EXPECT_FALSE(addr64.Write(0xFFFFFFFFU, &mutable_image64));
+ EXPECT_EQ(expected_data64, data64);
+
+ EXPECT_FALSE(addr64.FromRva(0xFFFFFFFFU));
+}
+
+TEST(Abs32UtilsTest, AbsoluteAddress64Overflow) {
+ {
+ // Counterpart to AbsoluteAddress632verflow test.
+ AbsoluteAddress addr64(kBit64, 0xFFFFFFFFC0000000ULL);
+ EXPECT_TRUE(addr64.FromRva(0x00000000U));
+ EXPECT_TRUE(addr64.FromRva(0x11223344U));
+ EXPECT_TRUE(addr64.FromRva(0x3FFFFFFFU));
+ EXPECT_FALSE(addr64.FromRva(0x40000000U));
+ EXPECT_FALSE(addr64.FromRva(0x40000001U));
+ EXPECT_FALSE(addr64.FromRva(0x80000000U));
+ EXPECT_FALSE(addr64.FromRva(0xFFFFFFFFU));
+
+ EXPECT_EQ(0x00000000U, AddrValueToRva(0xFFFFFFFFC0000000U, &addr64));
+ EXPECT_EQ(kInvalidRva, AddrValueToRva(0xFFFFFFFFBFFFFFFFU, &addr64));
+ EXPECT_EQ(kInvalidRva, AddrValueToRva(0x0000000000000000U, &addr64));
+ EXPECT_EQ(kInvalidRva, AddrValueToRva(0xFFFFFFFF00000000U, &addr64));
+ EXPECT_EQ(0x3FFFFFFFU, AddrValueToRva(0xFFFFFFFFFFFFFFFFU, &addr64));
+ }
+ {
+ // Pseudo-counterpart to AbsoluteAddress632verflow test: Some now pass.
+ AbsoluteAddress addr64(kBit64, 0xC0000000U);
+ EXPECT_TRUE(addr64.FromRva(0x00000000U));
+ EXPECT_TRUE(addr64.FromRva(0x11223344U));
+ EXPECT_TRUE(addr64.FromRva(0x3FFFFFFFU));
+ EXPECT_TRUE(addr64.FromRva(0x40000000U));
+ EXPECT_TRUE(addr64.FromRva(0x40000001U));
+ EXPECT_FALSE(addr64.FromRva(0x80000000U));
+ EXPECT_FALSE(addr64.FromRva(0xFFFFFFFFU));
+
+ // ToRva() still fail though.
+ EXPECT_EQ(0x00000000U, AddrValueToRva(0xC0000000U, &addr64));
+ EXPECT_EQ(kInvalidRva, AddrValueToRva(0xBFFFFFFFU, &addr64));
+ EXPECT_EQ(kInvalidRva, AddrValueToRva(0x00000000U, &addr64));
+ EXPECT_EQ(0x3FFFFFFFU, AddrValueToRva(0xFFFFFFFFU, &addr64));
+ }
+ {
+ AbsoluteAddress addr64(kBit64, 0xC000000000000000ULL);
+ EXPECT_TRUE(addr64.FromRva(0x00000000ULL));
+ EXPECT_TRUE(addr64.FromRva(0x11223344ULL));
+ EXPECT_TRUE(addr64.FromRva(0x3FFFFFFFULL));
+ EXPECT_TRUE(addr64.FromRva(0x40000000ULL));
+ EXPECT_TRUE(addr64.FromRva(0x40000001ULL));
+ EXPECT_FALSE(addr64.FromRva(0x80000000ULL));
+ EXPECT_FALSE(addr64.FromRva(0xFFFFFFFFULL));
+
+ EXPECT_EQ(0x00000000U, AddrValueToRva(0xC000000000000000ULL, &addr64));
+ EXPECT_EQ(kInvalidRva, AddrValueToRva(0xBFFFFFFFFFFFFFFFULL, &addr64));
+ EXPECT_EQ(kInvalidRva, AddrValueToRva(0x0000000000000000ULL, &addr64));
+ EXPECT_EQ(0x3FFFFFFFU, AddrValueToRva(0xC00000003FFFFFFFULL, &addr64));
+ EXPECT_EQ(kInvalidRva, AddrValueToRva(0xFFFFFFFFFFFFFFFFULL, &addr64));
+ }
+}
+
+TEST(Abs32UtilsTest, Win32Read32) {
+ constexpr uint32_t kImageBase = 0xA0000000U;
+ constexpr uint32_t kRvaBegin = 0x00C00000U;
+ struct {
+ std::vector<uint8_t> data32;
+ std::vector<offset_t> abs32_locations; // Assumtion: Sorted.
+ offset_t lo; // Assumption: In range, does not straddle |abs32_location|.
+ offset_t hi; // Assumption: Also >= |lo|.
+ std::vector<Reference> expected_refs;
+ } test_cases[] = {
+ // Targets at beginning and end.
+ {ParseHexString("FF FF FF FF 0F 00 C0 A0 00 00 C0 A0 FF FF FF FF"),
+ {0x4U, 0x8U},
+ 0x0U,
+ 0x10U,
+ {{0x4U, 0xFU}, {0x8U, 0x0U}}},
+ // Targets at beginning and end are out of bound: Rejected.
+ {ParseHexString("FF FF FF FF 10 00 C0 A0 FF FF BF A0 FF FF FF FF"),
+ {0x4U, 0x8U},
+ 0x0U,
+ 0x10U,
+ std::vector<Reference>()},
+ // Same with more extreme target values: Rejected.
+ {ParseHexString("FF FF FF FF FF FF FF FF 00 00 00 00 FF FF FF FF"),
+ {0x4U, 0x8U},
+ 0x0U,
+ 0x10U,
+ std::vector<Reference>()},
+ // Locations at beginning and end, plus invalid locations.
+ {ParseHexString("08 00 C0 A0 FF FF FF FF FF FF FF FF 04 00 C0 A0"),
+ {0x0U, 0xCU, 0x10U, 0x1000U, 0x80000000U, 0xFFFFFFFFU},
+ 0x0U,
+ 0x10U,
+ {{0x0U, 0x8U}, {0xCU, 0x4U}}},
+ // Odd size, location, target.
+ {ParseHexString("FF FF FF 09 00 C0 A0 FF FF FF FF FF FF FF FF FF "
+ "FF FF FF"),
+ {0x3U},
+ 0x0U,
+ 0x13U,
+ {{0x3U, 0x9U}}},
+ // No location given.
+ {ParseHexString("FF FF FF FF 0C 00 C0 A0 00 00 C0 A0 FF FF FF FF"),
+ std::vector<offset_t>(), 0x0U, 0x10U, std::vector<Reference>()},
+ // Simple alternation.
+ {ParseHexString("04 00 C0 A0 FF FF FF FF 0C 00 C0 A0 FF FF FF FF "
+ "14 00 C0 A0 FF FF FF FF 1C 00 C0 A0 FF FF FF FF"),
+ {0x0U, 0x8U, 0x10U, 0x18U},
+ 0x0U,
+ 0x20U,
+ {{0x0U, 0x4U}, {0x8U, 0xCU}, {0x10U, 0x14U}, {0x18U, 0x1CU}}},
+ // Same, with locations limited by |lo| and |hi|. By assumption these must
+ // not cut accross Reference body.
+ {ParseHexString("04 00 C0 A0 FF FF FF FF 0C 00 C0 A0 FF FF FF FF "
+ "14 00 C0 A0 FF FF FF FF 1C 00 C0 A0 FF FF FF FF"),
+ {0x0U, 0x8U, 0x10U, 0x18U},
+ 0x04U,
+ 0x17U,
+ {{0x8U, 0xCU}, {0x10U, 0x14U}}},
+ // Same, with very limiting |lo| and |hi|.
+ {ParseHexString("04 00 C0 A0 FF FF FF FF 0C 00 C0 A0 FF FF FF FF "
+ "14 00 C0 A0 FF FF FF FF 1C 00 C0 A0 FF FF FF FF"),
+ {0x0U, 0x8U, 0x10U, 0x18U},
+ 0x0CU,
+ 0x10U,
+ std::vector<Reference>()},
+ // Same, |lo| == |hi|.
+ {ParseHexString("04 00 C0 A0 FF FF FF FF 0C 00 C0 A0 FF FF FF FF "
+ "14 00 C0 A0 FF FF FF FF 1C 00 C0 A0 FF FF FF FF"),
+ {0x0U, 0x8U, 0x10U, 0x18U},
+ 0x14U,
+ 0x14U,
+ std::vector<Reference>()},
+ // Same, |lo| and |hi| at end.
+ {ParseHexString("04 00 C0 A0 FF FF FF FF 0C 00 C0 A0 FF FF FF FF "
+ "14 00 C0 A0 FF FF FF FF 1C 00 C0 A0 FF FF FF FF"),
+ {0x0U, 0x8U, 0x10U, 0x18U},
+ 0x20U,
+ 0x20U,
+ std::vector<Reference>()},
+ // Mix. Note that targets can overlap.
+ {ParseHexString("FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF "
+ "06 00 C0 A0 2C 00 C0 A0 FF FF C0 A0 2B 00 C0 A0 "
+ "FF 06 00 C0 A0 00 00 C0 A0 FF FF FF FF FF FF FF"),
+ {0x10U, 0x14U, 0x18U, 0x1CU, 0x21U, 0x25U, 0xAAAAU},
+ 0x07U,
+ 0x25U,
+ {{0x10U, 0x6U}, {0x14U, 0x2CU}, {0x1CU, 0x2BU}, {0x21, 0x6U}}},
+ };
+
+ for (const auto& test_case : test_cases) {
+ ConstBufferView image32(test_case.data32.data(), test_case.data32.size());
+ Abs32RvaExtractorWin32 extractor(image32, {kBit32, kImageBase},
+ test_case.abs32_locations, test_case.lo,
+ test_case.hi);
+
+ TestAddressTranslator translator(test_case.data32.size(), kRvaBegin);
+ Abs32ReaderWin32 reader(std::move(extractor), translator);
+
+ // Loop over |expected_ref| to check element-by-element.
+ absl::optional<Reference> ref;
+ for (const auto& expected_ref : test_case.expected_refs) {
+ ref = reader.GetNext();
+ EXPECT_TRUE(ref.has_value());
+ EXPECT_EQ(expected_ref, ref.value());
+ }
+ // Check that nothing is left.
+ ref = reader.GetNext();
+ EXPECT_FALSE(ref.has_value());
+ }
+}
+
+TEST(Abs32UtilsTest, Win32Read64) {
+ constexpr uint64_t kImageBase = 0x31415926A0000000U;
+ constexpr uint32_t kRvaBegin = 0x00C00000U;
+ // For simplicity, just test mixed case.
+ std::vector<uint8_t> data64 = ParseHexString(
+ "FF FF FF FF FF FF FF FF 00 00 C0 A0 26 59 41 31 "
+ "06 00 C0 A0 26 59 41 31 02 00 C0 A0 26 59 41 31 "
+ "FF FF FF BF 26 59 41 31 FF FF FF FF FF FF FF FF "
+ "02 00 C0 A0 26 59 41 31 07 00 C0 A0 26 59 41 31");
+ std::vector<offset_t> abs32_locations = {0x8U, 0x10U, 0x18U, 0x20U,
+ 0x28U, 0x30U, 0x38U, 0x40U};
+ offset_t lo = 0x10U;
+ offset_t hi = 0x38U;
+ std::vector<Reference> expected_refs = {
+ {0x10U, 0x06U}, {0x18U, 0x02U}, {0x30U, 0x02U}};
+
+ ConstBufferView image64(data64.data(), data64.size());
+ Abs32RvaExtractorWin32 extractor(image64, {kBit64, kImageBase},
+ abs32_locations, lo, hi);
+ TestAddressTranslator translator(data64.size(), kRvaBegin);
+ Abs32ReaderWin32 reader(std::move(extractor), translator);
+
+ std::vector<Reference> refs;
+ absl::optional<Reference> ref;
+ for (ref = reader.GetNext(); ref.has_value(); ref = reader.GetNext())
+ refs.push_back(ref.value());
+ EXPECT_EQ(expected_refs, refs);
+}
+
+TEST(Abs32UtilsTest, Win32ReadFail) {
+ // Make |bitness| a state to reduce repetition.
+ Bitness bitness = kBit32;
+
+ constexpr uint32_t kImageBase = 0xA0000000U; // Shared for 32-bit and 64-bit.
+ std::vector<uint8_t> data(32U, 0xFFU);
+ ConstBufferView image(data.data(), data.size());
+
+ auto try_make = [&](std::vector<offset_t>&& abs32_locations, offset_t lo,
+ offset_t hi) {
+ Abs32RvaExtractorWin32 extractor(image, {bitness, kImageBase},
+ abs32_locations, lo, hi);
+ extractor.GetNext(); // Dummy call so |extractor| gets used.
+ };
+
+ // 32-bit tests.
+ bitness = kBit32;
+ try_make({8U, 24U}, 0U, 32U);
+ EXPECT_DEATH(try_make({4U, 24U}, 32U, 0U), ""); // |lo| > |hi|.
+ try_make({8U, 24U}, 0U, 12U);
+ try_make({8U, 24U}, 0U, 28U);
+ try_make({8U, 24U}, 8U, 32U);
+ try_make({8U, 24U}, 24U, 32U);
+ EXPECT_DEATH(try_make({8U, 24U}, 0U, 11U), ""); // |hi| straddles.
+ EXPECT_DEATH(try_make({8U, 24U}, 26U, 32U), ""); // |lo| straddles.
+ try_make({8U, 24U}, 12U, 24U);
+
+ // 64-bit tests.
+ bitness = kBit64;
+ try_make({6U, 22U}, 0U, 32U);
+ // |lo| > |hi|.
+ EXPECT_DEATH(try_make(std::vector<offset_t>(), 32U, 31U), "");
+ try_make({6U, 22U}, 0U, 14U);
+ try_make({6U, 22U}, 0U, 30U);
+ try_make({6U, 22U}, 6U, 32U);
+ try_make({6U, 22U}, 22U, 32U);
+ EXPECT_DEATH(try_make({6U, 22U}, 0U, 29U), ""); // |hi| straddles.
+ EXPECT_DEATH(try_make({6U, 22U}, 7U, 32U), ""); // |lo| straddles.
+ try_make({6U, 22U}, 14U, 20U);
+ try_make({16U}, 16U, 24U);
+ EXPECT_DEATH(try_make({16U}, 18U, 18U), ""); // |lo|, |hi| straddle.
+}
+
+TEST(Abs32UtilsTest, Win32Write32) {
+ constexpr uint32_t kImageBase = 0xA0000000U;
+ constexpr uint32_t kRvaBegin = 0x00C00000U;
+ std::vector<uint8_t> data32(0x30, 0xFFU);
+ MutableBufferView image32(data32.data(), data32.size());
+ AbsoluteAddress addr(kBit32, kImageBase);
+ TestAddressTranslator translator(data32.size(), kRvaBegin);
+ Abs32WriterWin32 writer(image32, std::move(addr), translator);
+
+ // Successful writes.
+ writer.PutNext({0x02U, 0x10U});
+ writer.PutNext({0x0BU, 0x21U});
+ writer.PutNext({0x16U, 0x10U});
+ writer.PutNext({0x2CU, 0x00U});
+
+ // Invalid data: For simplicity, Abs32WriterWin32 simply ignores bad writes.
+ // Invalid location.
+ writer.PutNext({0x2DU, 0x20U});
+ writer.PutNext({0x80000000U, 0x20U});
+ writer.PutNext({0xFFFFFFFFU, 0x20U});
+ // Invalid target.
+ writer.PutNext({0x1CU, 0x00001111U});
+ writer.PutNext({0x10U, 0xFFFFFF00U});
+
+ std::vector<uint8_t> expected_data32 = ParseHexString(
+ "FF FF 10 00 C0 A0 FF FF FF FF FF 21 00 C0 A0 FF "
+ "FF FF FF FF FF FF 10 00 C0 A0 FF FF FF FF FF FF "
+ "FF FF FF FF FF FF FF FF FF FF FF FF 00 00 C0 A0");
+ EXPECT_EQ(expected_data32, data32);
+}
+
+TEST(Abs32UtilsTest, Win32Write64) {
+ constexpr uint64_t kImageBase = 0x31415926A0000000U;
+ constexpr uint32_t kRvaBegin = 0x00C00000U;
+ std::vector<uint8_t> data64(0x30, 0xFFU);
+ MutableBufferView image32(data64.data(), data64.size());
+ AbsoluteAddress addr(kBit64, kImageBase);
+ TestAddressTranslator translator(data64.size(), kRvaBegin);
+ Abs32WriterWin32 writer(image32, std::move(addr), translator);
+
+ // Successful writes.
+ writer.PutNext({0x02U, 0x10U});
+ writer.PutNext({0x0BU, 0x21U});
+ writer.PutNext({0x16U, 0x10U});
+ writer.PutNext({0x28U, 0x00U});
+
+ // Invalid data: For simplicity, Abs32WriterWin32 simply ignores bad writes.
+ // Invalid location.
+ writer.PutNext({0x29U, 0x20U});
+ writer.PutNext({0x80000000U, 0x20U});
+ writer.PutNext({0xFFFFFFFFU, 0x20U});
+ // Invalid target.
+ writer.PutNext({0x1CU, 0x00001111U});
+ writer.PutNext({0x10U, 0xFFFFFF00U});
+
+ std::vector<uint8_t> expected_data64 = ParseHexString(
+ "FF FF 10 00 C0 A0 26 59 41 31 FF 21 00 C0 A0 26 "
+ "59 41 31 FF FF FF 10 00 C0 A0 26 59 41 31 FF FF "
+ "FF FF FF FF FF FF FF FF 00 00 C0 A0 26 59 41 31");
+ EXPECT_EQ(expected_data64, data64);
+}
+
+TEST(Abs32UtilsTest, RemoveUntranslatableAbs32) {
+ Bitness kBitness = kBit32;
+ uint64_t kImageBase = 0x2BCD0000;
+
+ // Valid RVAs: [0x00001A00, 0x00001A28) and [0x00003A00, 0x00004000).
+ // Valid AVAs: [0x2BCD1A00, 0x2BCD1A28) and [0x2BCD3A00, 0x2BCD4000).
+ // Notice that the second section has has dangling RVA.
+ AddressTranslator translator;
+ ASSERT_EQ(AddressTranslator::kSuccess,
+ translator.Initialize(
+ {{0x04, +0x28, 0x1A00, +0x28}, {0x30, +0x30, 0x3A00, +0x600}}));
+
+ std::vector<uint8_t> data = ParseHexString(
+ "FF FF FF FF 0B 3A CD 2B 00 00 00 04 3A CD 2B 00 "
+ "FC 3F CD 2B 14 1A CD 2B 44 00 00 00 CC 00 00 00 "
+ "00 00 55 00 00 00 1E 1A CD 2B 00 99 FF FF FF FF "
+ "10 3A CD 2B 22 00 00 00 00 00 00 11 00 00 00 00 "
+ "66 00 00 00 28 1A CD 2B 00 00 CD 2B 27 1A CD 2B "
+ "FF 39 CD 2B 00 00 00 00 18 1A CD 2B 00 00 00 00 "
+ "FF FF FF FF FF FF FF FF");
+ MutableBufferView image(data.data(), data.size());
+
+ const offset_t kAbs1 = 0x04; // a:2BCD3A0B = r:3A0B = o:3B
+ const offset_t kAbs2 = 0x0B; // a:2BCD3A04 = r:3A04 = o:34
+ const offset_t kAbs3 = 0x10; // a:2BCD3FFF = r:3FFF (dangling)
+ const offset_t kAbs4 = 0x14; // a:2BCD1A14 = r:1A14 = o:18
+ const offset_t kAbs5 = 0x26; // a:2BCD1A1E = r:1A1E = o:22
+ const offset_t kAbs6 = 0x30; // a:2BCD3A10 = r:3A10 = 0x40
+ const offset_t kAbs7 = 0x44; // a:2BCD1A28 = r:1A28 (bad: sentinel)
+ const offset_t kAbs8 = 0x48; // a:2BCD0000 = r:0000 (bad: not covered)
+ const offset_t kAbs9 = 0x4C; // a:2BCD1A27 = r:1A27 = 0x2B
+ const offset_t kAbsA = 0x50; // a:2BCD39FF (bad: not covered)
+ const offset_t kAbsB = 0x54; // a:00000000 (bad: underflow)
+ const offset_t kAbsC = 0x58; // a:2BCD1A18 = r:1A18 = 0x1C
+
+ std::vector<offset_t> locations = {kAbs1, kAbs2, kAbs3, kAbs4, kAbs5, kAbs6,
+ kAbs7, kAbs8, kAbs9, kAbsA, kAbsB, kAbsC};
+ std::vector<offset_t> exp_locations = {kAbs1, kAbs2, kAbs3, kAbs4,
+ kAbs5, kAbs6, kAbs9, kAbsC};
+ size_t exp_num_removed = locations.size() - exp_locations.size();
+ size_t num_removed = RemoveUntranslatableAbs32(image, {kBitness, kImageBase},
+ translator, &locations);
+ EXPECT_EQ(exp_num_removed, num_removed);
+ EXPECT_EQ(exp_locations, locations);
+}
+
+TEST(Abs32UtilsTest, RemoveOverlappingAbs32Locations) {
+ // Make |width| a state to reduce repetition.
+ uint32_t width = WidthOf(kBit32);
+
+ auto run_test = [&width](const std::vector<offset_t>& expected_locations,
+ std::vector<offset_t>&& locations) {
+ ASSERT_TRUE(std::is_sorted(locations.begin(), locations.end()));
+ size_t expected_removals = locations.size() - expected_locations.size();
+ size_t removals = RemoveOverlappingAbs32Locations(width, &locations);
+ EXPECT_EQ(expected_removals, removals);
+ EXPECT_EQ(expected_locations, locations);
+ };
+
+ // 32-bit tests.
+ width = WidthOf(kBit32);
+ run_test(std::vector<offset_t>(), std::vector<offset_t>());
+ run_test({4U}, {4U});
+ run_test({4U, 10U}, {4U, 10U});
+ run_test({4U, 8U}, {4U, 8U});
+ run_test({4U}, {4U, 7U});
+ run_test({4U}, {4U, 4U});
+ run_test({4U, 8U}, {4U, 7U, 8U});
+ run_test({4U, 10U}, {4U, 7U, 10U});
+ run_test({4U, 9U}, {4U, 9U, 10U});
+ run_test({3U}, {3U, 5U, 6U});
+ run_test({3U, 7U}, {3U, 4U, 5U, 6U, 7U, 8U, 9U, 10U});
+ run_test({3U, 7U, 11U}, {3U, 4U, 5U, 6U, 7U, 8U, 9U, 10U, 11U, 12U});
+ run_test({4U, 8U, 12U}, {4U, 6U, 8U, 10U, 12U});
+ run_test({4U, 8U, 12U, 16U}, {4U, 8U, 12U, 16U});
+ run_test({4U, 8U, 12U}, {4U, 8U, 9U, 12U});
+ run_test({4U}, {4U, 4U, 4U, 4U, 4U, 4U});
+ run_test({3U}, {3U, 4U, 4U, 4U, 5U, 5U});
+ run_test({3U, 7U}, {3U, 4U, 4U, 4U, 7U, 7U, 8U});
+ run_test({10U, 20U, 30U, 40U}, {10U, 20U, 22U, 22U, 30U, 40U});
+ run_test({1000000U, 1000004U}, {1000000U, 1000004U});
+ run_test({1000000U}, {1000000U, 1000002U});
+
+ // 64-bit tests.
+ width = WidthOf(kBit64);
+ run_test(std::vector<offset_t>(), std::vector<offset_t>());
+ run_test({4U}, {4U});
+ run_test({4U, 20U}, {4U, 20U});
+ run_test({4U, 12U}, {4U, 12U});
+ run_test({4U}, {4U, 11U});
+ run_test({4U}, {4U, 5U});
+ run_test({4U}, {4U, 4U});
+ run_test({4U, 12U, 20U}, {4U, 12U, 20U});
+ run_test({1U, 9U, 17U}, {1U, 9U, 17U});
+ run_test({1U, 17U}, {1U, 8U, 17U});
+ run_test({1U, 10U}, {1U, 10U, 17U});
+ run_test({3U, 11U}, {3U, 4U, 5U, 6U, 7U, 8U, 9U, 10U, 11U, 12U});
+ run_test({4U, 12U}, {4U, 6U, 8U, 10U, 12U});
+ run_test({4U, 12U}, {4U, 12U, 16U});
+ run_test({4U, 12U, 20U, 28U}, {4U, 12U, 20U, 28U});
+ run_test({4U}, {4U, 4U, 4U, 4U, 5U, 5U});
+ run_test({3U, 11U}, {3U, 4U, 4U, 4U, 11U, 11U, 12U});
+ run_test({10U, 20U, 30U, 40U}, {10U, 20U, 22U, 22U, 30U, 40U});
+ run_test({1000000U, 1000008U}, {1000000U, 1000008U});
+ run_test({1000000U}, {1000000U, 1000004U});
+}
+
+} // namespace zucchini
diff --git a/address_translator.cc b/address_translator.cc
new file mode 100644
index 0000000..d7d7201
--- /dev/null
+++ b/address_translator.cc
@@ -0,0 +1,258 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/address_translator.h"
+
+#include <algorithm>
+#include <utility>
+
+#include "base/containers/cxx20_erase.h"
+
+namespace zucchini {
+
+/******** AddressTranslator::OffsetToRvaCache ********/
+
+AddressTranslator::OffsetToRvaCache::OffsetToRvaCache(
+ const AddressTranslator& translator)
+ : translator_(translator) {}
+
+rva_t AddressTranslator::OffsetToRvaCache::Convert(offset_t offset) const {
+ if (offset >= translator_.fake_offset_begin_) {
+ // Rely on |translator_| to handle this special case.
+ return translator_.OffsetToRva(offset);
+ }
+ if (cached_unit_ && cached_unit_->CoversOffset(offset))
+ return cached_unit_->OffsetToRvaUnsafe(offset);
+ const AddressTranslator::Unit* unit = translator_.OffsetToUnit(offset);
+ if (!unit)
+ return kInvalidRva;
+ cached_unit_ = unit;
+ return unit->OffsetToRvaUnsafe(offset);
+}
+
+/******** AddressTranslator::RvaToOffsetCache ********/
+
+AddressTranslator::RvaToOffsetCache::RvaToOffsetCache(
+ const AddressTranslator& translator)
+ : translator_(translator) {}
+
+bool AddressTranslator::RvaToOffsetCache::IsValid(rva_t rva) const {
+ if (rva == kInvalidRva)
+ return false;
+ if (!cached_unit_ || !cached_unit_->CoversRva(rva)) {
+ const AddressTranslator::Unit* unit = translator_.RvaToUnit(rva);
+ if (!unit)
+ return false;
+ cached_unit_ = unit;
+ }
+ return true;
+}
+
+offset_t AddressTranslator::RvaToOffsetCache::Convert(rva_t rva) const {
+ if (!cached_unit_ || !cached_unit_->CoversRva(rva)) {
+ const AddressTranslator::Unit* unit = translator_.RvaToUnit(rva);
+ if (!unit)
+ return kInvalidOffset;
+ cached_unit_ = unit;
+ }
+ return cached_unit_->RvaToOffsetUnsafe(rva, translator_.fake_offset_begin_);
+}
+
+/******** AddressTranslator ********/
+
+AddressTranslator::AddressTranslator() = default;
+
+AddressTranslator::AddressTranslator(AddressTranslator&&) = default;
+
+AddressTranslator::~AddressTranslator() = default;
+
+AddressTranslator::Status AddressTranslator::Initialize(
+ std::vector<Unit>&& units) {
+ for (Unit& unit : units) {
+ // Check for overflows and fail if found.
+ if (!RangeIsBounded<offset_t>(unit.offset_begin, unit.offset_size,
+ kOffsetBound) ||
+ !RangeIsBounded<rva_t>(unit.rva_begin, unit.rva_size, kRvaBound)) {
+ return kErrorOverflow;
+ }
+ // If |rva_size < offset_size|: Just shrink |offset_size| to accommodate.
+ unit.offset_size = std::min(unit.offset_size, unit.rva_size);
+ // Now |rva_size >= offset_size|. Note that |rva_size > offset_size| is
+ // allowed; these lead to dangling RVA.
+ }
+
+ // Remove all empty units.
+ base::EraseIf(units, [](const Unit& unit) { return unit.IsEmpty(); });
+
+ // Sort |units| by RVA, then uniquefy.
+ std::sort(units.begin(), units.end(), [](const Unit& a, const Unit& b) {
+ return std::tie(a.rva_begin, a.rva_size) <
+ std::tie(b.rva_begin, b.rva_size);
+ });
+ units.erase(std::unique(units.begin(), units.end()), units.end());
+
+ // Scan for RVA range overlaps, validate, and merge wherever possible.
+ if (units.size() > 1) {
+ // Traverse with two iterators: |slow| stays behind and modifies Units that
+ // absorb all overlapping (or tangent if suitable) Units; |fast| explores
+ // new Units as candidates for consistency checks and potential merge into
+ // |slow|.
+ auto slow = units.begin();
+
+ // All |it| with |slow| < |it| < |fast| contain garbage.
+ for (auto fast = slow + 1; fast != units.end(); ++fast) {
+ // Comment notation: S = slow offset, F = fast offset, O = overlap offset,
+ // s = slow RVA, f = fast RVA, o = overlap RVA.
+ DCHECK_GE(fast->rva_begin, slow->rva_begin);
+ if (slow->rva_end() < fast->rva_begin) {
+ // ..ssssss..ffffff..: Disjoint: Can advance |slow|.
+ *(++slow) = *fast;
+ continue;
+ }
+
+ // ..ssssffff..: Tangent: Merge is optional.
+ // ..sssooofff.. / ..sssooosss..: Overlap: Merge is required.
+ bool merge_is_optional = slow->rva_end() == fast->rva_begin;
+
+ // Check whether |fast| and |slow| have identical RVA -> offset shift.
+ // If not, then merge cannot be resolved. Examples:
+ // ..ssssffff.. -> ..SSSSFFFF..: Good, can merge.
+ // ..ssssffff.. -> ..SSSS..FFFF..: Non-fatal: don't merge.
+ // ..ssssffff.. -> ..FFFF..SSSS..: Non-fatal: don't merge.
+ // ..ssssffff.. -> ..SSOOFF..: Fatal: Ignore for now (handled later).
+ // ..sssooofff.. -> ..SSSOOOFFF..: Good, can merge.
+ // ..sssooofff.. -> ..SSSSSOFFFFF..: Fatal.
+ // ..sssooofff.. -> ..FFOOOOSS..: Fatal.
+ // ..sssooofff.. -> ..SSSOOOF..: Good, notice |fast| has dangling RVAs.
+ // ..oooooo.. -> ..OOOOOO..: Good, can merge.
+ if (fast->offset_begin < slow->offset_begin ||
+ fast->offset_begin - slow->offset_begin !=
+ fast->rva_begin - slow->rva_begin) {
+ if (merge_is_optional) {
+ *(++slow) = *fast;
+ continue;
+ }
+ return kErrorBadOverlap;
+ }
+
+ // Check whether dangling RVAs (if they exist) are consistent. Examples:
+ // ..sssooofff.. -> ..SSSOOOF..: Good, can merge.
+ // ..sssooosss.. -> ..SSSOOOS..: Good, can merge.
+ // ..sssooofff.. -> ..SSSOO..: Good, can merge.
+ // ..sssooofff.. -> ..SSSOFFF..: Fatal.
+ // ..sssooosss.. -> ..SSSOOFFFF..: Fatal.
+ // ..oooooo.. -> ..OOO..: Good, can merge.
+ // Idea of check: Suppose |fast| has dangling RVA, then
+ // |[fast->rva_start, fast->rva_start + fast->offset_start)| ->
+ // |[fast->offset_start, **fast->offset_end()**)|, with remaining RVA
+ // mapping to fake offsets. This means |fast->offset_end()| must be >=
+ // |slow->offset_end()|, and failure to do so resluts in error. The
+ // argument for |slow| havng dangling RVA is symmetric.
+ if ((fast->HasDanglingRva() && fast->offset_end() < slow->offset_end()) ||
+ (slow->HasDanglingRva() && slow->offset_end() < fast->offset_end())) {
+ if (merge_is_optional) {
+ *(++slow) = *fast;
+ continue;
+ }
+ return kErrorBadOverlapDanglingRva;
+ }
+
+ // Merge |fast| into |slow|.
+ slow->rva_size =
+ std::max(slow->rva_size, fast->rva_end() - slow->rva_begin);
+ slow->offset_size =
+ std::max(slow->offset_size, fast->offset_end() - slow->offset_begin);
+ }
+ ++slow;
+ units.erase(slow, units.end());
+ }
+
+ // After resolving RVA overlaps, any offset overlap would imply error.
+ std::sort(units.begin(), units.end(), [](const Unit& a, const Unit& b) {
+ return a.offset_begin < b.offset_begin;
+ });
+
+ if (units.size() > 1) {
+ auto previous = units.begin();
+ for (auto current = previous + 1; current != units.end(); ++current) {
+ if (previous->offset_end() > current->offset_begin)
+ return kErrorBadOverlap;
+ previous = current;
+ }
+ }
+
+ // For to fake offset heuristics: Compute exclusive upper bounds for offsets
+ // and RVAs.
+ offset_t offset_bound = 0;
+ rva_t rva_bound = 0;
+ for (const Unit& unit : units) {
+ offset_bound = std::max(offset_bound, unit.offset_end());
+ rva_bound = std::max(rva_bound, unit.rva_end());
+ }
+
+ // Compute pessimistic range and see if it still fits within space of valid
+ // offsets. This limits image size to one half of |kOffsetBound|, and is a
+ // main drawback for the current heuristic to convert dangling RVA to fake
+ // offsets.
+ if (!RangeIsBounded(offset_bound, rva_bound, kOffsetBound))
+ return kErrorFakeOffsetBeginTooLarge;
+
+ // Success. Store results. |units| is currently sorted by offset, so assign.
+ units_sorted_by_offset_.assign(units.begin(), units.end());
+
+ // Sort |units| by RVA, and just store it directly
+ std::sort(units.begin(), units.end(), [](const Unit& a, const Unit& b) {
+ return a.rva_begin < b.rva_begin;
+ });
+ units_sorted_by_rva_ = std::move(units);
+
+ fake_offset_begin_ = offset_bound;
+ return kSuccess;
+}
+
+rva_t AddressTranslator::OffsetToRva(offset_t offset) const {
+ if (offset >= fake_offset_begin_) {
+ // Handle dangling RVA: First shift it to regular RVA space.
+ rva_t rva = offset - fake_offset_begin_;
+ // If result is indeed a dangling RVA, return it; else return |kInvalidRva|.
+ const Unit* unit = RvaToUnit(rva);
+ return (unit && unit->HasDanglingRva() && unit->CoversDanglingRva(rva))
+ ? rva
+ : kInvalidRva;
+ }
+ const Unit* unit = OffsetToUnit(offset);
+ return unit ? unit->OffsetToRvaUnsafe(offset) : kInvalidRva;
+}
+
+offset_t AddressTranslator::RvaToOffset(rva_t rva) const {
+ const Unit* unit = RvaToUnit(rva);
+ // This also handles dangling RVA.
+ return unit ? unit->RvaToOffsetUnsafe(rva, fake_offset_begin_)
+ : kInvalidOffset;
+}
+
+const AddressTranslator::Unit* AddressTranslator::OffsetToUnit(
+ offset_t offset) const {
+ // Finds first Unit with |offset_begin| > |offset|, rewind by 1 to find the
+ // last Unit with |offset_begin| >= |offset| (if it exists).
+ auto it = std::upper_bound(
+ units_sorted_by_offset_.begin(), units_sorted_by_offset_.end(), offset,
+ [](offset_t a, const Unit& b) { return a < b.offset_begin; });
+ if (it == units_sorted_by_offset_.begin())
+ return nullptr;
+ --it;
+ return it->CoversOffset(offset) ? &(*it) : nullptr;
+}
+
+const AddressTranslator::Unit* AddressTranslator::RvaToUnit(rva_t rva) const {
+ auto it = std::upper_bound(
+ units_sorted_by_rva_.begin(), units_sorted_by_rva_.end(), rva,
+ [](rva_t a, const Unit& b) { return a < b.rva_begin; });
+ if (it == units_sorted_by_rva_.begin())
+ return nullptr;
+ --it;
+ return it->CoversRva(rva) ? &(*it) : nullptr;
+}
+
+} // namespace zucchini
diff --git a/address_translator.h b/address_translator.h
new file mode 100644
index 0000000..a517a2c
--- /dev/null
+++ b/address_translator.h
@@ -0,0 +1,199 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_ADDRESS_TRANSLATOR_H_
+#define COMPONENTS_ZUCCHINI_ADDRESS_TRANSLATOR_H_
+
+#include <stdint.h>
+
+#include <tuple>
+#include <vector>
+
+#include "components/zucchini/algorithm.h"
+#include "components/zucchini/image_utils.h"
+
+namespace zucchini {
+
+// There are several ways to reason about addresses in an image:
+// - Offset: Position relative to start of image.
+// - VA (Virtual Address): Virtual memory address of a loaded image. This is
+// subject to relocation by the OS.
+// - RVA (Relative Virtual Address): VA relative to some base address. This is
+// the preferred way to specify pointers in an image.
+//
+// Zucchini is primarily concerned with offsets and RVAs. Executable images like
+// PE and ELF are organized into sections. Each section specifies offset and RVA
+// ranges as:
+// {Offset start, offset size, RVA start, RVA size}.
+// This constitutes a basic unit to translate between offsets and RVAs. Note:
+// |offset size| < |RVA size| is possible. For example, the .bss section can can
+// have zero-filled statically-allocated data that have no corresponding bytes
+// on image (to save space). This poses a problem for Zucchini, which stores
+// addresses as offsets: now we'd have "dangling RVAs" that don't map to
+// offsets! Some ways to handling this are:
+// 1. Ignore all dangling RVAs. This simplifies the algorithm, but also means
+// some reference targets would escape detection and processing.
+// 2. Create distinct "fake offsets" to accommodate dangling RVAs. Image data
+// must not be read on these fake offsets, which are only valid as target
+// addresses for reference matching.
+// As for |RVA size| < |offset size|, the extra portion just gets ignored.
+//
+// Status: Zucchini implements (2) in a simple way: dangling RVAs are mapped to
+// fake offsets by adding a large value. This value can be chosen as an
+// exclusive upper bound of all offsets (i.e., image size). This allows them to
+// be easily detected and processed as a special-case.
+// TODO(huangs): Investigate option (1), now that the refactored code makes
+// experimentation easier.
+// TODO(huangs): Make AddressTranslator smarter: Allocate unused |offset_t|
+// ranges and create "fake" units to accommodate dangling RVAs. Then
+// AddressTranslator can be simplified.
+
+// Virtual Address relative to some base address (RVA). There's distinction
+// between "valid RVA" and "existent RVA":
+// - Valid RVA: An RVA that's reasonably small, i.e., below |kRvaBound|.
+// - Existent RVA: An RVA that has semantic meaning in an image, and may
+// translate to an offset in an image or (if a dangling RVA) a fake offset.
+// All existent RVAs are valid RVAs.
+using rva_t = uint32_t;
+// Divide by 2 to match |kOffsetBound|.
+constexpr rva_t kRvaBound = static_cast<rva_t>(-1) / 2;
+constexpr rva_t kInvalidRva = static_cast<rva_t>(-2);
+
+// A utility to translate between offsets and RVAs in an image.
+class AddressTranslator {
+ public:
+ // A basic unit for address translation, roughly maps to a section, but may
+ // be processed (e.g., merged) as an optimization.
+ struct Unit {
+ offset_t offset_end() const { return offset_begin + offset_size; }
+ rva_t rva_end() const { return rva_begin + rva_size; }
+ bool IsEmpty() const {
+ // |rva_size == 0| and |offset_size > 0| means Unit hasn't been trimmed
+ // yet, and once it is then it's empty.
+ // |rva_size > 0| and |offset_size == 0| means Unit has dangling RVA, but
+ // is not empty.
+ return rva_size == 0;
+ }
+ bool CoversOffset(offset_t offset) const {
+ return RangeCovers(offset_begin, offset_size, offset);
+ }
+ bool CoversRva(rva_t rva) const {
+ return RangeCovers(rva_begin, rva_size, rva);
+ }
+ bool CoversDanglingRva(rva_t rva) const {
+ return CoversRva(rva) && rva - rva_begin >= offset_size;
+ }
+ // Assumes valid |offset| (*cannot* be fake offset).
+ rva_t OffsetToRvaUnsafe(offset_t offset) const {
+ return offset - offset_begin + rva_begin;
+ }
+ // Assumes valid |rva| (*can* be danging RVA).
+ offset_t RvaToOffsetUnsafe(rva_t rva, offset_t fake_offset_begin) const {
+ rva_t delta = rva - rva_begin;
+ return delta < offset_size ? delta + offset_begin
+ : fake_offset_begin + rva;
+ }
+ bool HasDanglingRva() const { return rva_size > offset_size; }
+ friend bool operator==(const Unit& a, const Unit& b) {
+ return std::tie(a.offset_begin, a.offset_size, a.rva_begin, a.rva_size) ==
+ std::tie(b.offset_begin, b.offset_size, b.rva_begin, b.rva_size);
+ }
+
+ offset_t offset_begin;
+ offset_t offset_size;
+ rva_t rva_begin;
+ rva_t rva_size;
+ };
+
+ // An adaptor for AddressTranslator::OffsetToRva() that caches the last Unit
+ // found, to reduce the number of OffsetToUnit() calls for clustered queries.
+ class OffsetToRvaCache {
+ public:
+ // Embeds |translator| for use. Now object lifetime is tied to |translator|
+ // lifetime.
+ explicit OffsetToRvaCache(const AddressTranslator& translator);
+ OffsetToRvaCache(const OffsetToRvaCache&) = delete;
+ const OffsetToRvaCache& operator=(const OffsetToRvaCache&) = delete;
+
+ rva_t Convert(offset_t offset) const;
+
+ private:
+ const AddressTranslator& translator_;
+ mutable const AddressTranslator::Unit* cached_unit_ = nullptr;
+ };
+
+ // An adaptor for AddressTranslator::RvaToOffset() that caches the last Unit
+ // found, to reduce the number of RvaToUnit() calls for clustered queries.
+ class RvaToOffsetCache {
+ public:
+ // Embeds |translator| for use. Now object lifetime is tied to |translator|
+ // lifetime.
+ explicit RvaToOffsetCache(const AddressTranslator& translator);
+ RvaToOffsetCache(const RvaToOffsetCache&) = delete;
+ const RvaToOffsetCache& operator=(const RvaToOffsetCache&) = delete;
+
+ bool IsValid(rva_t rva) const;
+
+ offset_t Convert(rva_t rva) const;
+
+ private:
+ const AddressTranslator& translator_;
+ mutable const AddressTranslator::Unit* cached_unit_ = nullptr;
+ };
+
+ enum Status {
+ kSuccess = 0,
+ kErrorOverflow,
+ kErrorBadOverlap,
+ kErrorBadOverlapDanglingRva,
+ kErrorFakeOffsetBeginTooLarge,
+ };
+
+ AddressTranslator();
+ AddressTranslator(AddressTranslator&&);
+ AddressTranslator(const AddressTranslator&) = delete;
+ const AddressTranslator& operator=(const AddressTranslator&) = delete;
+ ~AddressTranslator();
+
+ // Consumes |units| to populate data in this class. Performs consistency
+ // checks and overlapping Units. Returns Status to indicate success.
+ Status Initialize(std::vector<Unit>&& units);
+
+ // Returns the (possibly dangling) RVA corresponding to |offset|, or
+ // kInvalidRva if not found.
+ rva_t OffsetToRva(offset_t offset) const;
+
+ // Returns the (possibly fake) offset corresponding to |rva|, or
+ // kInvalidOffset if not found (i.e., |rva| is non-existent).
+ offset_t RvaToOffset(rva_t rva) const;
+
+ // For testing.
+ offset_t fake_offset_begin() const { return fake_offset_begin_; }
+
+ const std::vector<Unit>& units_sorted_by_offset() const {
+ return units_sorted_by_offset_;
+ }
+
+ const std::vector<Unit>& units_sorted_by_rva() const {
+ return units_sorted_by_rva_;
+ }
+
+ private:
+ // Helper to find the Unit that contains given |offset| or |rva|. Returns null
+ // if not found.
+ const Unit* OffsetToUnit(offset_t offset) const;
+ const Unit* RvaToUnit(rva_t rva) const;
+
+ // Storage of Units. All offset ranges are non-empty and disjoint. Likewise
+ // for all RVA ranges.
+ std::vector<Unit> units_sorted_by_offset_;
+ std::vector<Unit> units_sorted_by_rva_;
+
+ // Conversion factor to translate between dangling RVAs and fake offsets.
+ offset_t fake_offset_begin_;
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_ADDRESS_TRANSLATOR_H_
diff --git a/address_translator_unittest.cc b/address_translator_unittest.cc
new file mode 100644
index 0000000..efa2f14
--- /dev/null
+++ b/address_translator_unittest.cc
@@ -0,0 +1,586 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/address_translator.h"
+
+#include <algorithm>
+#include <string>
+#include <utility>
+
+#include "base/format_macros.h"
+#include "base/strings/stringprintf.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+namespace {
+
+// Test case structs. The convention of EXPECT() specifies "expectd" value
+// before ""actual". However, AddressTranslator interfaces explicitly state "X
+// to Y". So it is clearer in test cases to specify "input" before "expect".
+struct OffsetToRvaTestCase {
+ offset_t input;
+ rva_t expect;
+};
+
+struct RvaToOffsetTestCase {
+ rva_t input;
+ offset_t expect;
+};
+
+class TestAddressTranslator : public AddressTranslator {
+ public:
+ using AddressTranslator::AddressTranslator;
+
+ // Initialize() alternative that parses a visual representation of offset and
+ // RVA ranges. Illustrative example ("special" means '.' or '!'):
+ // "..AAA...|....aaaa" => "..AAA..." for offsets, and "....aaaa" for RVAs:
+ // - "..AAA...": First non-period character is at 2, so |offset_begin| = 2.
+ // - "..AAA...": There are 3 non-special characters, so |offset_size| = +3.
+ // - "....aaaa": First non-period character is at 4, so |rva_begin| = 4.
+ // - "....aaaa": There are 4 non-special characters, so |rva_size| = +4.
+ // For the special case of length-0 range, '!' can be used. For example,
+ // "...!...." specifies |begin| = 3 and |size| = +0.
+ AddressTranslator::Status InitializeWithStrings(
+ const std::vector<std::string>& specs) {
+ std::vector<Unit> units;
+ units.reserve(specs.size());
+ for (const std::string& s : specs) {
+ size_t sep = s.find('|');
+ CHECK_NE(sep, std::string::npos);
+ std::string s1 = s.substr(0, sep);
+ std::string s2 = s.substr(sep + 1);
+
+ auto first_non_blank = [](const std::string& t) {
+ auto is_blank = [](char ch) { return ch == '.'; };
+ return std::find_if_not(t.begin(), t.end(), is_blank) - t.begin();
+ };
+ auto count_non_special = [](const std::string& t) {
+ auto is_special = [](char ch) { return ch == '.' || ch == '!'; };
+ return t.size() - std::count_if(t.begin(), t.end(), is_special);
+ };
+ units.push_back({static_cast<offset_t>(first_non_blank(s1)),
+ static_cast<offset_t>(count_non_special(s1)),
+ static_cast<rva_t>(first_non_blank(s2)),
+ static_cast<rva_t>(count_non_special(s2))});
+ }
+ return Initialize(std::move(units));
+ }
+};
+
+// Simple test: Initialize TestAddressTranslator using |specs|, and match
+// |expected| results re. success or failure.
+void SimpleTest(const std::vector<std::string>& specs,
+ AddressTranslator::Status expected,
+ const std::string& case_name) {
+ TestAddressTranslator translator;
+ auto result = translator.InitializeWithStrings(specs);
+ EXPECT_EQ(expected, result) << case_name;
+}
+
+// Test AddressTranslator::Initialize's Unit overlap and error checks over
+// multiple test cases, each case consists of a fixed unit (specified as
+// string), and a variable string taken from an list.
+class TwoUnitOverlapTester {
+ public:
+ struct TestCase {
+ std::string unit_str;
+ AddressTranslator::Status expected;
+ };
+
+ static void RunTest(const std::string& unit_str1,
+ const std::vector<TestCase>& test_cases) {
+ for (size_t i = 0; i < test_cases.size(); ++i) {
+ const auto& test_case = test_cases[i];
+ const std::string& unit_str2 = test_case.unit_str;
+ const std::string str =
+ base::StringPrintf("Case #%" PRIuS ": %s", i, unit_str2.c_str());
+ SimpleTest({unit_str1, unit_str2}, test_case.expected, str);
+ // Switch order. Expect same results.
+ SimpleTest({unit_str2, unit_str1}, test_case.expected, str);
+ }
+ }
+};
+
+} // namespace
+
+TEST(AddressTranslatorTest, Empty) {
+ using AT = AddressTranslator;
+ TestAddressTranslator translator;
+ EXPECT_EQ(AT::kSuccess,
+ translator.Initialize(std::vector<AddressTranslator::Unit>()));
+ offset_t fake_offset_begin = translator.fake_offset_begin();
+
+ // Optimized versions.
+ AddressTranslator::OffsetToRvaCache offset_to_rva(translator);
+ AddressTranslator::RvaToOffsetCache rva_to_offset(translator);
+
+ EXPECT_EQ(kInvalidRva, translator.OffsetToRva(0U));
+ EXPECT_EQ(kInvalidRva, translator.OffsetToRva(100U));
+ EXPECT_EQ(kInvalidRva, offset_to_rva.Convert(0U));
+ EXPECT_EQ(kInvalidRva, offset_to_rva.Convert(100U));
+
+ EXPECT_EQ(kInvalidOffset, translator.RvaToOffset(0U));
+ EXPECT_EQ(kInvalidOffset, translator.RvaToOffset(100U));
+ EXPECT_EQ(kInvalidOffset, rva_to_offset.Convert(0U));
+ EXPECT_EQ(kInvalidOffset, rva_to_offset.Convert(100U));
+
+ EXPECT_EQ(kInvalidRva, translator.OffsetToRva(fake_offset_begin));
+ EXPECT_EQ(kInvalidRva, offset_to_rva.Convert(fake_offset_begin));
+}
+
+TEST(AddressTranslatorTest, Single) {
+ using AT = AddressTranslator;
+ TestAddressTranslator translator;
+ // Offsets to RVA: [10, 30) -> [100, 120).
+ EXPECT_EQ(AT::kSuccess, translator.Initialize({{10U, +20U, 100U, +20U}}));
+ offset_t fake_offset_begin = translator.fake_offset_begin();
+
+ // Optimized versions.
+ AddressTranslator::OffsetToRvaCache offset_to_rva(translator);
+ AddressTranslator::RvaToOffsetCache rva_to_offset(translator);
+ EXPECT_EQ(30U, fake_offset_begin); // Test implementation detail.
+
+ // Offsets to RVAs.
+ OffsetToRvaTestCase test_cases1[] = {
+ {0U, kInvalidRva}, {9U, kInvalidRva}, {10U, 100U},
+ {20U, 110U}, {29U, 119U}, {30U, kInvalidRva},
+ };
+ for (auto& test_case : test_cases1) {
+ EXPECT_EQ(test_case.expect, translator.OffsetToRva(test_case.input));
+ EXPECT_EQ(test_case.expect, offset_to_rva.Convert(test_case.input));
+ }
+
+ // RVAs to offsets.
+ RvaToOffsetTestCase test_cases2[] = {
+ {0U, kInvalidOffset}, {99U, kInvalidOffset}, {100U, 10U},
+ {110U, 20U}, {119U, 29U}, {120U, kInvalidOffset},
+ };
+ for (auto& test_case : test_cases2) {
+ EXPECT_EQ(test_case.expect, translator.RvaToOffset(test_case.input));
+ EXPECT_EQ(test_case.expect, rva_to_offset.Convert(test_case.input));
+ }
+}
+
+TEST(AddressTranslatorTest, SingleDanglingRva) {
+ using AT = AddressTranslator;
+ TestAddressTranslator translator;
+ // Offsets to RVA: [10, 30) -> [100, 120 + 7), so has dangling RVAs.
+ EXPECT_EQ(AT::kSuccess,
+ translator.Initialize({{10U, +20U, 100U, +20U + 7U}}));
+ offset_t fake_offset_begin = translator.fake_offset_begin();
+
+ EXPECT_EQ(30U, fake_offset_begin); // Test implementation detail.
+
+ // Optimized versions.
+ AddressTranslator::OffsetToRvaCache offset_to_rva(translator);
+ AddressTranslator::RvaToOffsetCache rva_to_offset(translator);
+
+ // Offsets to RVAs.
+ OffsetToRvaTestCase test_cases1[] = {
+ {0U, kInvalidRva},
+ {9U, kInvalidRva},
+ {10U, 100U},
+ {20U, 110U},
+ {29U, 119U},
+ {30U, kInvalidRva},
+ // Fake offsets to dangling RVAs.
+ {fake_offset_begin + 100U, kInvalidRva},
+ {fake_offset_begin + 119U, kInvalidRva},
+ {fake_offset_begin + 120U, 120U},
+ {fake_offset_begin + 126U, 126U},
+ {fake_offset_begin + 127U, kInvalidRva},
+ };
+ for (auto& test_case : test_cases1) {
+ EXPECT_EQ(test_case.expect, translator.OffsetToRva(test_case.input));
+ EXPECT_EQ(test_case.expect, offset_to_rva.Convert(test_case.input));
+ }
+
+ // RVAs to offsets.
+ RvaToOffsetTestCase test_cases2[] = {
+ {0U, kInvalidOffset},
+ {99U, kInvalidOffset},
+ {100U, 10U},
+ {110U, 20U},
+ {119U, 29U},
+ // Dangling RVAs to fake offsets.
+ {120U, fake_offset_begin + 120U},
+ {126U, fake_offset_begin + 126U},
+ {127U, kInvalidOffset},
+ };
+ for (auto& test_case : test_cases2) {
+ EXPECT_EQ(test_case.expect, translator.RvaToOffset(test_case.input));
+ EXPECT_EQ(test_case.expect, rva_to_offset.Convert(test_case.input));
+ }
+}
+
+TEST(AddressTranslatorTest, BasicUsage) {
+ using AT = AddressTranslator;
+ TestAddressTranslator translator;
+ // Offsets covered: [10, 30), [40, 70), [70, 110).
+ // Map to RVAs: [200, 220 + 5), [300, 330), [100, 140), so has dangling RVAs.
+ auto result = translator.Initialize({
+ {10U, +20U, 200U, +20U + 5U}, // Has dangling RVAs.
+ {40U, +30U, 300U, +20U}, // Extra offset truncated and ignored.
+ {50U, +20U, 310U, +20U}, // Overlap with previous: Merged.
+ {70U, +40U, 100U, +20U}, // Tangent with previous but inconsistent; extra
+ // offset truncated and ignored.
+ {90U, +20U, 120U, +20U}, // Tangent with previous and consistent: Merged.
+ });
+ EXPECT_EQ(AT::kSuccess, result);
+ offset_t fake_offset_begin = translator.fake_offset_begin();
+ EXPECT_EQ(110U, fake_offset_begin); // Test implementation detail.
+
+ // Optimized versions.
+ AddressTranslator::OffsetToRvaCache offset_to_rva(translator);
+ AddressTranslator::RvaToOffsetCache rva_to_offset(translator);
+
+ // Offsets to RVAs.
+ OffsetToRvaTestCase test_cases1[] = {
+ {0U, kInvalidRva},
+ {9U, kInvalidRva},
+ {10U, 200U},
+ {20U, 210U},
+ {29U, 219U},
+ {30U, kInvalidRva},
+ {39U, kInvalidRva},
+ {40U, 300U},
+ {55U, 315U},
+ {69U, 329U},
+ {70U, 100U},
+ {90U, 120U},
+ {109U, 139U},
+ {110U, kInvalidRva},
+ // Fake offsets to dangling RVAs.
+ {fake_offset_begin + 220U, 220U},
+ {fake_offset_begin + 224U, 224U},
+ {fake_offset_begin + 225U, kInvalidRva},
+ };
+ for (auto& test_case : test_cases1) {
+ EXPECT_EQ(test_case.expect, translator.OffsetToRva(test_case.input));
+ EXPECT_EQ(test_case.expect, offset_to_rva.Convert(test_case.input));
+ }
+
+ // RVAs to offsets.
+ RvaToOffsetTestCase test_cases2[] = {
+ {0U, kInvalidOffset},
+ {99U, kInvalidOffset},
+ {100U, 70U},
+ {120U, 90U},
+ {139U, 109U},
+ {140U, kInvalidOffset},
+ {199U, kInvalidOffset},
+ {200U, 10U},
+ {210U, 20U},
+ {219U, 29U},
+ {225U, kInvalidOffset},
+ {299U, kInvalidOffset},
+ {300U, 40U},
+ {315U, 55U},
+ {329U, 69U},
+ {330U, kInvalidOffset},
+ // Dangling RVAs to fake offsets.
+ {220U, fake_offset_begin + 220U},
+ {224U, fake_offset_begin + 224U},
+ {225U, kInvalidOffset},
+ };
+ for (auto& test_case : test_cases2) {
+ EXPECT_EQ(test_case.expect, translator.RvaToOffset(test_case.input));
+ EXPECT_EQ(test_case.expect, rva_to_offset.Convert(test_case.input));
+ }
+}
+
+TEST(AddressTranslatorTest, Overflow) {
+ using AT = AddressTranslator;
+ // Test assumes that offset_t and rva_t to be 32-bit.
+ static_assert(sizeof(offset_t) == 4 && sizeof(rva_t) == 4,
+ "Needs to update test.");
+ {
+ AddressTranslator translator1;
+ EXPECT_EQ(AT::kErrorOverflow,
+ translator1.Initialize({{0, +0xC0000000U, 0, +0xC0000000U}}));
+ }
+ {
+ AddressTranslator translator2;
+ EXPECT_EQ(AT::kErrorOverflow,
+ translator2.Initialize({{0, +0, 0, +0xC0000000U}}));
+ }
+ {
+ // Units are okay, owing to but limitations of the heuristic to convert
+ // dangling RVA to fake offset, AddressTranslator::Initialize() fails.
+ AddressTranslator translator3;
+ EXPECT_EQ(AT::kErrorFakeOffsetBeginTooLarge,
+ translator3.Initialize(
+ {{32, +0, 32, +0x50000000U}, {0x50000000U, +16, 0, +16}}));
+ }
+}
+
+// Sanity test for TestAddressTranslator::InitializeWithStrings();
+TEST(AddressTranslatorTest, AddUnitAsString) {
+ using AT = AddressTranslator;
+ {
+ TestAddressTranslator translator1;
+ EXPECT_EQ(AT::kSuccess, translator1.InitializeWithStrings({"..A..|.aaa."}));
+ AddressTranslator::Unit unit1 = translator1.units_sorted_by_offset()[0];
+ EXPECT_EQ(2U, unit1.offset_begin);
+ EXPECT_EQ(+1U, unit1.offset_size);
+ EXPECT_EQ(1U, unit1.rva_begin);
+ EXPECT_EQ(+3U, unit1.rva_size);
+ }
+ {
+ TestAddressTranslator translator2;
+ EXPECT_EQ(AT::kSuccess,
+ translator2.InitializeWithStrings({".....!...|.bbbbbb..."}));
+ AddressTranslator::Unit unit2 = translator2.units_sorted_by_offset()[0];
+ EXPECT_EQ(5U, unit2.offset_begin);
+ EXPECT_EQ(+0U, unit2.offset_size);
+ EXPECT_EQ(1U, unit2.rva_begin);
+ EXPECT_EQ(+6U, unit2.rva_size);
+ }
+}
+
+// AddressTranslator::Initialize() lists Unit merging examples in comments. The
+// format is different from that used by InitializeWithStrings(), but adapting
+// them is easy, so we may as well do so.
+TEST(AddressTranslatorTest, OverlapFromComment) {
+ using AT = AddressTranslator;
+ constexpr auto OK = AT::kSuccess;
+ struct {
+ const char* rva_str; // RVA comes first in this case.
+ const char* offset_str;
+ AT::Status expected;
+ } test_cases[] = {
+ {"..ssssffff..", "..SSSSFFFF..", OK},
+ {"..ssssffff..", "..SSSS..FFFF..", OK},
+ {"..ssssffff..", "..FFFF..SSSS..", OK},
+ {"..ssssffff..", "..SSOOFF..", AT::kErrorBadOverlap},
+ {"..sssooofff..", "..SSSOOOFFF..", OK},
+ {"..sssooofff..", "..SSSSSOFFFFF..", AT::kErrorBadOverlap},
+ {"..sssooofff..", "..FFOOOOSS..", AT::kErrorBadOverlap},
+ {"..sssooofff..", "..SSSOOOF..", OK},
+ {"..sssooofff..", "..SSSOOOF..", OK},
+ {"..sssooosss..", "..SSSOOOS..", OK},
+ {"..sssooofff..", "..SSSOO..", OK},
+ {"..sssooofff..", "..SSSOFFF..", AT::kErrorBadOverlapDanglingRva},
+ {"..sssooosss..", "..SSSOOSSSS..", AT::kErrorBadOverlapDanglingRva},
+ {"..oooooo..", "..OOO..", OK},
+ };
+
+ auto to_period = [](std::string s, char ch) { // |s| passed by value.
+ std::replace(s.begin(), s.end(), ch, '.');
+ return s;
+ };
+
+ size_t idx = 0;
+ for (const auto& test_case : test_cases) {
+ std::string base_str =
+ std::string(test_case.offset_str) + "|" + test_case.rva_str;
+ std::string unit_str1 = to_period(to_period(base_str, 'S'), 's');
+ std::string unit_str2 = to_period(to_period(base_str, 'F'), 'f');
+ SimpleTest({unit_str1, unit_str2}, test_case.expected,
+ base::StringPrintf("Case #%" PRIuS, idx));
+ ++idx;
+ }
+}
+
+TEST(AddressTranslatorTest, Overlap) {
+ using AT = AddressTranslator;
+ constexpr auto OK = AT::kSuccess;
+ constexpr const char* unit_str1 = "....AAA.......|.....aaa......";
+
+ std::vector<TwoUnitOverlapTester::TestCase> test_cases = {
+ //....AAA.......|.....aaa...... The first Unit. NOLINT
+ {"....BBB.......|.....bbb......", OK},
+ {"..BBB.........|...bbb........", OK},
+ {"......BBB.....|.......bbb....", OK},
+ {"..BBBBBBBBB...|...bbb........", OK}, // Extra offset get truncated.
+ {"......BBBBBBBB|.......bbb....", OK},
+ {"....BBB.......|.......bbb....", AT::kErrorBadOverlap},
+ {"..BBB.........|.......bbb....", AT::kErrorBadOverlap},
+ {".......BBB....|.......bbb....", AT::kErrorBadOverlap},
+ //....AAA.......|.....aaa...... The first Unit. NOLINT
+ {"....BBB.......|..........bbb.", AT::kErrorBadOverlap},
+ {"..........BBB.|.......bbb....", AT::kErrorBadOverlap},
+ {"......BBB.....|.....bbb......", AT::kErrorBadOverlap},
+ {"......BBB.....|..bbb.........", AT::kErrorBadOverlap},
+ {"......BBB.....|bbb...........", AT::kErrorBadOverlap},
+ {"BBB...........|bbb...........", OK}, // Disjoint.
+ {"........BBB...|.........bbb..", OK}, // Disjoint.
+ {"BBB...........|..........bbb.", OK}, // Disjoint, offset elsewhere.
+ //....AAA.......|.....aaa...... The first Unit. NOLINT
+ {".BBB..........|..bbb.........", OK}, // Tangent.
+ {".......BBB....|........bbb...", OK}, // Tangent.
+ {".BBB..........|........bbb...", OK}, // Tangent, offset elsewhere.
+ {"BBBBBB........|bbb...........", OK}, // Repeat, with extra offsets.
+ {"........BBBB..|.........bbb..", OK},
+ {"BBBBBB........|..........bbb.", OK},
+ {".BBBBBB.......|..bbb.........", OK},
+ {".......BBBBB..|........bbb...", OK},
+ //....AAA.......|.....aaa...... The first Unit. NOLINT
+ {".BBB..........|........bbb...", OK}, // Tangent, offset elsewhere.
+ {"..BBB.........|........bbb...", AT::kErrorBadOverlap},
+ {"...BB.........|....bb........", OK},
+ {"....BB........|.....bb.......", OK},
+ {".......BB.....|........bb....", OK},
+ {"...BBBBBB.....|....bbbbbb....", OK},
+ {"..BBBBBB......|...bbbbbb.....", OK},
+ {"......BBBBBB..|.......bbbbbb.", OK},
+ //....AAA.......|.....aaa...... The first Unit. NOLINT
+ {"BBBBBBBBBBBBBB|bbbbbbbbbbbbbb", AT::kErrorBadOverlap},
+ {"B.............|b.............", OK},
+ {"B.............|.............b", OK},
+ {"....B.........|.....b........", OK},
+ {"....B.........|......b.......", AT::kErrorBadOverlap},
+ {"....B.........|......b.......", AT::kErrorBadOverlap},
+ {"....BBB.......|.....bb.......", OK},
+ {"....BBBB......|.....bbb......", OK},
+ //....AAA.......|.....aaa...... The first Unit. NOLINT
+ {".........BBBBB|.b............", OK},
+ {"....AAA.......|.....!........", OK},
+ {"....!.........|.....!........", OK}, // Empty units gets deleted early.
+ {"....!.........|..........!...", OK}, // Forgiving!
+ };
+
+ TwoUnitOverlapTester::RunTest(unit_str1, test_cases);
+}
+
+TEST(AddressTranslatorTest, OverlapOffsetMultiple) {
+ using AT = AddressTranslator;
+ // Simple case. Note that RVA ranges don't get merged.
+ SimpleTest({"A..|a....", //
+ ".A.|..a..", //
+ "..A|....a"},
+ AT::kSuccess, "Case #0");
+
+ // Offset range 1 overlaps 2 and 3, but truncation takes place to trim down
+ // offset ranges, so still successful.
+ SimpleTest({"..A|a....", //
+ ".AA|..a..", //
+ "AAA|....a"},
+ AT::kSuccess, "Case #1");
+
+ // Offset range 2 and 3 overlap, so fail.
+ SimpleTest({"A..|a....", //
+ ".A.|..a..", //
+ ".A.|....a"},
+ AT::kErrorBadOverlap, "Case #2");
+}
+
+TEST(AddressTranslatorTest, OverlapDangling) {
+ using AT = AddressTranslator;
+ constexpr auto OK = AT::kSuccess;
+ // First Unit has dangling offsets at
+ constexpr const char* unit_str1 = "....AAA.......|.....aaaaaa...";
+
+ std::vector<TwoUnitOverlapTester::TestCase> test_cases = {
+ //....AAA.......|.....aaaaaa... The first Unit. NOLINT
+ {"....BBB.......|.....bbbbbb...", OK},
+ {"....BBB.......|.....bbbbb....", OK},
+ {"....BBB.......|.....bbbb.....", OK},
+ {"....BBB.......|.....bbb......", OK},
+ {".....BBB......|......bbb.....", AT::kErrorBadOverlapDanglingRva},
+ {".....BB.......|......bbb.....", OK},
+ {"....BBB.......|.....bbbbbbbb.", OK},
+ {"..BBBBB.......|...bbbbbbbb...", OK},
+ //....AAA.......|.....aaaaaa... The first Unit. NOLINT
+ {"......!.......|.bbb..........", AT::kErrorBadOverlap},
+ {"..BBBBB.......|...bbbbb......", OK},
+ {".......BBB....|.bbb..........", OK}, // Just tangent: Can go elsewhere.
+ {".......BBB....|.bbbb.........", OK}, // Can be another dangling RVA.
+ {".......!......|.bbbb.........", OK}, // Same with empty.
+ {"......!.......|.......!......", OK}, // Okay, but gets deleted.
+ {"......!.......|.......b......", AT::kErrorBadOverlapDanglingRva},
+ {"......B.......|.......b......", OK},
+ //....AAA.......|.....aaaaaa... The first Unit. NOLINT
+ {"......BBBB....|.......bbbb...", AT::kErrorBadOverlapDanglingRva},
+ {"......BB......|.......bb.....", AT::kErrorBadOverlapDanglingRva},
+ {"......BB......|bb............", AT::kErrorBadOverlap},
+ };
+
+ TwoUnitOverlapTester::RunTest(unit_str1, test_cases);
+}
+
+// Tests implementation since algorithm is tricky.
+TEST(AddressTranslatorTest, Merge) {
+ using AT = AddressTranslator;
+ // Merge a bunch of overlapping Units into one big Unit.
+ std::vector<std::string> test_case1 = {
+ "AAA.......|.aaa......", // Comment to prevent wrap by formatter.
+ "AA........|.aa.......", //
+ "..AAA.....|...aaa....", //
+ "....A.....|.....a....", //
+ ".....AAA..|......aaa.", //
+ "........A.|.........a", //
+ };
+ // Try all 6! permutations.
+ std::sort(test_case1.begin(), test_case1.end());
+ do {
+ TestAddressTranslator translator1;
+ EXPECT_EQ(AT::kSuccess, translator1.InitializeWithStrings(test_case1));
+ EXPECT_EQ(9U, translator1.fake_offset_begin());
+
+ AT::Unit expected{0U, +9U, 1U, +9U};
+ EXPECT_EQ(1U, translator1.units_sorted_by_offset().size());
+ EXPECT_EQ(expected, translator1.units_sorted_by_offset()[0]);
+ EXPECT_EQ(1U, translator1.units_sorted_by_rva().size());
+ EXPECT_EQ(expected, translator1.units_sorted_by_rva()[0]);
+ } while (std::next_permutation(test_case1.begin(), test_case1.end()));
+
+ // Merge RVA-adjacent Units into two Units.
+ std::vector<std::string> test_case2 = {
+ ".....A..|.a......", // First Unit.
+ "......A.|..a.....", //
+ "A.......|...a....", // Second Unit: RVA-adjacent to first Unit, but
+ ".A......|....a...", // offset would become inconsistent, so a new
+ "..A.....|.....a..", // Unit gets created.
+ };
+ // Try all 5! permutations.
+ std::sort(test_case2.begin(), test_case2.end());
+ do {
+ TestAddressTranslator translator2;
+ EXPECT_EQ(AT::kSuccess, translator2.InitializeWithStrings(test_case2));
+ EXPECT_EQ(7U, translator2.fake_offset_begin());
+
+ AT::Unit expected1{0U, +3U, 3U, +3U};
+ AT::Unit expected2{5U, +2U, 1U, +2U};
+ EXPECT_EQ(2U, translator2.units_sorted_by_offset().size());
+ EXPECT_EQ(expected1, translator2.units_sorted_by_offset()[0]);
+ EXPECT_EQ(expected2, translator2.units_sorted_by_offset()[1]);
+ EXPECT_EQ(2U, translator2.units_sorted_by_rva().size());
+ EXPECT_EQ(expected2, translator2.units_sorted_by_rva()[0]);
+ EXPECT_EQ(expected1, translator2.units_sorted_by_rva()[1]);
+ } while (std::next_permutation(test_case2.begin(), test_case2.end()));
+}
+
+TEST(AddressTranslatorTest, RvaToOffsetCache_IsValid) {
+ AddressTranslator translator;
+ // Notice that the second section has dangling RVA.
+ ASSERT_EQ(AddressTranslator::kSuccess,
+ translator.Initialize(
+ {{0x04, +0x28, 0x1A00, +0x28}, {0x30, +0x10, 0x3A00, +0x30}}));
+ AddressTranslator::RvaToOffsetCache rva_checker(translator);
+
+ EXPECT_FALSE(rva_checker.IsValid(kInvalidRva));
+
+ for (int i = 0; i < 0x28; ++i)
+ EXPECT_TRUE(rva_checker.IsValid(0x1A00 + i));
+ EXPECT_FALSE(rva_checker.IsValid(0x1A00 + 0x28));
+ EXPECT_FALSE(rva_checker.IsValid(0x1A00 + 0x29));
+ EXPECT_FALSE(rva_checker.IsValid(0x1A00 - 1));
+ EXPECT_FALSE(rva_checker.IsValid(0x1A00 - 2));
+
+ for (int i = 0; i < 0x30; ++i)
+ EXPECT_TRUE(rva_checker.IsValid(0x3A00 + i));
+ EXPECT_FALSE(rva_checker.IsValid(0x3A00 + 0x30));
+ EXPECT_FALSE(rva_checker.IsValid(0x3A00 + 0x31));
+ EXPECT_FALSE(rva_checker.IsValid(0x3A00 - 1));
+ EXPECT_FALSE(rva_checker.IsValid(0x3A00 - 2));
+
+ EXPECT_FALSE(rva_checker.IsValid(0));
+ EXPECT_FALSE(rva_checker.IsValid(0x10));
+ EXPECT_FALSE(rva_checker.IsValid(0x7FFFFFFFU));
+ EXPECT_FALSE(rva_checker.IsValid(0xFFFFFFFFU));
+}
+
+} // namespace zucchini
diff --git a/algorithm.h b/algorithm.h
new file mode 100644
index 0000000..f5d49e3
--- /dev/null
+++ b/algorithm.h
@@ -0,0 +1,146 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_ALGORITHM_H_
+#define COMPONENTS_ZUCCHINI_ALGORITHM_H_
+
+#include <stddef.h>
+
+#include <algorithm>
+#include <type_traits>
+#include <vector>
+
+#include "base/check_op.h"
+
+// Collection of simple utilities used in for low-level computation.
+
+namespace zucchini {
+
+// Safely determines whether |[begin, begin + size)| is in |[0, bound)|. Note:
+// The special case |[bound, bound)| is not considered to be in |[0, bound)|.
+template <typename T>
+bool RangeIsBounded(T begin, T size, size_t bound) {
+ static_assert(std::is_unsigned<T>::value, "Value type must be unsigned.");
+ return begin < bound && size <= bound - begin;
+}
+
+// Safely determines whether |value| lies in |[begin, begin + size)|. Works
+// properly even if |begin + size| overflows -- although such ranges are
+// considered pathological, and should fail validation elsewhere.
+template <typename T>
+bool RangeCovers(T begin, T size, T value) {
+ static_assert(std::is_unsigned<T>::value, "Value type must be unsigned.");
+ return begin <= value && value - begin < size;
+}
+
+// Returns the integer in inclusive range |[lo, hi]| that's closest to |value|.
+// This departs from the usual usage of semi-inclusive ranges, but is useful
+// because (1) sentinels can use this, (2) a valid output always exists. It is
+// assumed that |lo <= hi|.
+template <class T>
+T InclusiveClamp(T value, T lo, T hi) {
+ static_assert(std::is_unsigned<T>::value, "Value type must be unsigned.");
+ DCHECK_LE(lo, hi);
+ return value <= lo ? lo : (value >= hi ? hi : value);
+}
+
+// Returns the minimum multiple of |m| that's no less than |x|. Assumes |m > 0|
+// and |x| is sufficiently small so that no overflow occurs.
+template <class T>
+constexpr T AlignCeil(T x, T m) {
+ static_assert(std::is_unsigned<T>::value, "Value type must be unsigned.");
+ return T((x + m - 1) / m) * m;
+}
+
+// Specialized alignment helpers that returns the increment to |pos| to get the
+// next n-aligned value, where n is in {2, 4}. This is useful for aligning
+// iterators relative to a base iterator using:
+// it += IncrementForAlignCeil2(it - base);
+template <class T>
+inline int IncrementForAlignCeil2(T pos) {
+ return static_cast<int>(pos & 1); // Optimized from (-pos) & 1.
+}
+
+template <class T>
+inline int IncrementForAlignCeil4(T pos) {
+ return static_cast<int>((-pos) & 3);
+}
+
+// Sorts values in |container| and removes duplicates.
+template <class T>
+void SortAndUniquify(std::vector<T>* container) {
+ std::sort(container->begin(), container->end());
+ container->erase(std::unique(container->begin(), container->end()),
+ container->end());
+ container->shrink_to_fit();
+}
+
+// Extracts a single bit at |pos| from integer |v|.
+template <int pos, typename T>
+constexpr T GetBit(T v) {
+ return (v >> pos) & 1;
+}
+
+// Extracts bits in inclusive range [|lo|, |hi|] from integer |v|, and returns
+// the sign-extend result. For example, let the (MSB-first) bits in a 32-bit int
+// |v| be:
+// xxxxxxxx xxxxxSii iiiiiiii iyyyyyyy,
+// hi^ lo^ => lo = 7, hi = 18
+// To extract "Sii iiiiiiii i", calling
+// GetSignedBits<7, 18>(v);
+// produces the sign-extended result:
+// SSSSSSSS SSSSSSSS SSSSSiii iiiiiiii.
+template <int lo, int hi, typename T>
+constexpr typename std::make_signed<T>::type GetSignedBits(T v) {
+ constexpr int kNumBits = sizeof(T) * 8;
+ using SignedType = typename std::make_signed<T>::type;
+ // Assumes 0 <= |lo| <= |hi| < |kNumBits|.
+ // How this works:
+ // (1) Shift-left by |kNumBits - 1 - hi| to clear "left" bits.
+ // (2) Shift-right by |kNumBits - 1 - hi + lo| to clear "right" bits. The
+ // input is casted to a signed type to perform sign-extension.
+ return static_cast<SignedType>(v << (kNumBits - 1 - hi)) >>
+ (kNumBits - 1 - hi + lo);
+}
+
+// Similar to GetSignedBits(), but returns the zero-extended result. For the
+// above example, calling
+// GetUnsignedBits<7, 18>(v);
+// results in:
+// 00000000 00000000 0000Siii iiiiiiii.
+template <int lo, int hi, typename T>
+constexpr typename std::make_unsigned<T>::type GetUnsignedBits(T v) {
+ constexpr int kNumBits = sizeof(T) * 8;
+ using UnsignedType = typename std::make_unsigned<T>::type;
+ return static_cast<UnsignedType>(v << (kNumBits - 1 - hi)) >>
+ (kNumBits - 1 - hi + lo);
+}
+
+// Copies bits at |pos| in |v| to all higher bits, and returns the result as the
+// same int type as |v|.
+template <typename T>
+constexpr T SignExtend(int pos, T v) {
+ int kNumBits = sizeof(T) * 8;
+ int kShift = kNumBits - 1 - pos;
+ return static_cast<typename std::make_signed<T>::type>(v << kShift) >> kShift;
+}
+
+// Optimized version where |pos| becomes a template parameter.
+template <int pos, typename T>
+constexpr T SignExtend(T v) {
+ constexpr int kNumBits = sizeof(T) * 8;
+ constexpr int kShift = kNumBits - 1 - pos;
+ return static_cast<typename std::make_signed<T>::type>(v << kShift) >> kShift;
+}
+
+// Determines whether |v|, if interpreted as a signed integer, is representable
+// using |digs| bits. |1 <= digs <= sizeof(T)| is assumed.
+template <int digs, typename T>
+constexpr bool SignedFit(T v) {
+ return v == SignExtend<digs - 1, T>(v);
+}
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_ALGORITHM_H_
diff --git a/algorithm_unittest.cc b/algorithm_unittest.cc
new file mode 100644
index 0000000..2e1f94d
--- /dev/null
+++ b/algorithm_unittest.cc
@@ -0,0 +1,347 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/algorithm.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+namespace {
+
+// Casting functions to specify signed 8-bit and 16-bit integer constants.
+// For example, signed8(0xFF) == int8_t(-1).
+inline int8_t signed8(uint8_t v) {
+ return *reinterpret_cast<const int8_t*>(&v);
+}
+
+inline int32_t signed16(uint16_t v) {
+ return *reinterpret_cast<const int16_t*>(&v);
+}
+
+} // namespace
+
+TEST(AlgorithmTest, RangeIsBounded) {
+ // Basic tests.
+ EXPECT_TRUE(RangeIsBounded<uint8_t>(0U, +0U, 10U));
+ EXPECT_TRUE(RangeIsBounded<uint8_t>(0U, +10U, 10U));
+ EXPECT_TRUE(RangeIsBounded<uint8_t>(1U, +9U, 10U));
+ EXPECT_FALSE(RangeIsBounded<uint8_t>(1U, +10U, 10U));
+ EXPECT_TRUE(RangeIsBounded<uint8_t>(8U, +1U, 10U));
+ EXPECT_TRUE(RangeIsBounded<uint8_t>(8U, +2U, 10U));
+ EXPECT_TRUE(RangeIsBounded<uint8_t>(9U, +0U, 10U));
+ EXPECT_FALSE(RangeIsBounded<uint8_t>(10U, +0U, 10U)); // !
+ EXPECT_FALSE(RangeIsBounded<uint8_t>(100U, +0U, 10U));
+ EXPECT_FALSE(RangeIsBounded<uint8_t>(100U, +1U, 10U));
+
+ // Test at boundary of overflow.
+ EXPECT_TRUE(RangeIsBounded<uint8_t>(42U, +137U, 255U));
+ EXPECT_TRUE(RangeIsBounded<uint8_t>(0U, +255U, 255U));
+ EXPECT_TRUE(RangeIsBounded<uint8_t>(1U, +254U, 255U));
+ EXPECT_FALSE(RangeIsBounded<uint8_t>(1U, +255U, 255U));
+ EXPECT_TRUE(RangeIsBounded<uint8_t>(254U, +0U, 255U));
+ EXPECT_TRUE(RangeIsBounded<uint8_t>(254U, +1U, 255U));
+ EXPECT_FALSE(RangeIsBounded<uint8_t>(255U, +0U, 255U));
+ EXPECT_FALSE(RangeIsBounded<uint8_t>(255U, +3U, 255U));
+
+ // Test with uint32_t.
+ EXPECT_TRUE(RangeIsBounded<uint32_t>(0U, +0x1000U, 0x2000U));
+ EXPECT_TRUE(RangeIsBounded<uint32_t>(0x0FFFU, +0x1000U, 0x2000U));
+ EXPECT_TRUE(RangeIsBounded<uint32_t>(0x1000U, +0x1000U, 0x2000U));
+ EXPECT_FALSE(RangeIsBounded<uint32_t>(0x1000U, +0x1001U, 0x2000U));
+ EXPECT_TRUE(RangeIsBounded<uint32_t>(0x1FFFU, +1U, 0x2000U));
+ EXPECT_FALSE(RangeIsBounded<uint32_t>(0x2000U, +0U, 0x2000U)); // !
+ EXPECT_FALSE(RangeIsBounded<uint32_t>(0x3000U, +0U, 0x2000U));
+ EXPECT_FALSE(RangeIsBounded<uint32_t>(0x3000U, +1U, 0x2000U));
+ EXPECT_TRUE(RangeIsBounded<uint32_t>(0U, +0xFFFFFFFEU, 0xFFFFFFFFU));
+ EXPECT_TRUE(RangeIsBounded<uint32_t>(0U, +0xFFFFFFFFU, 0xFFFFFFFFU));
+ EXPECT_TRUE(RangeIsBounded<uint32_t>(1U, +0xFFFFFFFEU, 0xFFFFFFFFU));
+ EXPECT_FALSE(RangeIsBounded<uint32_t>(1U, +0xFFFFFFFFU, 0xFFFFFFFFU));
+ EXPECT_TRUE(RangeIsBounded<uint32_t>(0x80000000U, +0x7FFFFFFFU, 0xFFFFFFFFU));
+ EXPECT_FALSE(
+ RangeIsBounded<uint32_t>(0x80000000U, +0x80000000U, 0xFFFFFFFFU));
+ EXPECT_TRUE(RangeIsBounded<uint32_t>(0xFFFFFFFEU, +1U, 0xFFFFFFFFU));
+ EXPECT_FALSE(RangeIsBounded<uint32_t>(0xFFFFFFFFU, +0U, 0xFFFFFFFFU)); // !
+ EXPECT_FALSE(
+ RangeIsBounded<uint32_t>(0xFFFFFFFFU, +0xFFFFFFFFU, 0xFFFFFFFFU));
+}
+
+TEST(AlgorithmTest, RangeCovers) {
+ // Basic tests.
+ EXPECT_TRUE(RangeCovers<uint8_t>(0U, +10U, 0U));
+ EXPECT_TRUE(RangeCovers<uint8_t>(0U, +10U, 5U));
+ EXPECT_TRUE(RangeCovers<uint8_t>(0U, +10U, 9U));
+ EXPECT_FALSE(RangeCovers<uint8_t>(0U, +10U, 10U));
+ EXPECT_FALSE(RangeCovers<uint8_t>(0U, +10U, 100U));
+ EXPECT_FALSE(RangeCovers<uint8_t>(0U, +10U, 255U));
+
+ EXPECT_FALSE(RangeCovers<uint8_t>(42U, +137U, 0U));
+ EXPECT_FALSE(RangeCovers<uint8_t>(42U, +137U, 41U));
+ EXPECT_TRUE(RangeCovers<uint8_t>(42U, +137U, 42U));
+ EXPECT_TRUE(RangeCovers<uint8_t>(42U, +137U, 100U));
+ EXPECT_TRUE(RangeCovers<uint8_t>(42U, +137U, 178U));
+ EXPECT_FALSE(RangeCovers<uint8_t>(42U, +137U, 179U));
+ EXPECT_FALSE(RangeCovers<uint8_t>(42U, +137U, 255U));
+
+ // 0-size ranges.
+ EXPECT_FALSE(RangeCovers<uint8_t>(42U, +0U, 41U));
+ EXPECT_FALSE(RangeCovers<uint8_t>(42U, +0U, 42U));
+ EXPECT_FALSE(RangeCovers<uint8_t>(42U, +0U, 43U));
+
+ // Test at boundary of overflow.
+ EXPECT_TRUE(RangeCovers<uint8_t>(254U, +1U, 254U));
+ EXPECT_FALSE(RangeCovers<uint8_t>(254U, +1U, 255U));
+ EXPECT_FALSE(RangeCovers<uint8_t>(255U, +0U, 255U));
+ EXPECT_TRUE(RangeCovers<uint8_t>(255U, +1U, 255U));
+ EXPECT_FALSE(RangeCovers<uint8_t>(255U, +5U, 0U));
+
+ // Test with unit32_t.
+ EXPECT_FALSE(RangeCovers<uint32_t>(1234567U, +7654321U, 0U));
+ EXPECT_FALSE(RangeCovers<uint32_t>(1234567U, +7654321U, 1234566U));
+ EXPECT_TRUE(RangeCovers<uint32_t>(1234567U, +7654321U, 1234567U));
+ EXPECT_TRUE(RangeCovers<uint32_t>(1234567U, +7654321U, 4444444U));
+ EXPECT_TRUE(RangeCovers<uint32_t>(1234567U, +7654321U, 8888887U));
+ EXPECT_FALSE(RangeCovers<uint32_t>(1234567U, +7654321U, 8888888U));
+ EXPECT_FALSE(RangeCovers<uint32_t>(1234567U, +7654321U, 0x80000000U));
+ EXPECT_FALSE(RangeCovers<uint32_t>(1234567U, +7654321U, 0xFFFFFFFFU));
+ EXPECT_FALSE(RangeCovers<uint32_t>(0xFFFFFFFFU, +0, 0xFFFFFFFFU));
+ EXPECT_TRUE(RangeCovers<uint32_t>(0xFFFFFFFFU, +1, 0xFFFFFFFFU));
+ EXPECT_FALSE(RangeCovers<uint32_t>(0xFFFFFFFFU, +2, 0));
+}
+
+TEST(AlgorithmTest, InclusiveClamp) {
+ EXPECT_EQ(1U, InclusiveClamp<uint32_t>(0U, 1U, 9U));
+ EXPECT_EQ(1U, InclusiveClamp<uint32_t>(1U, 1U, 9U));
+ EXPECT_EQ(5U, InclusiveClamp<uint32_t>(5U, 1U, 9U));
+ EXPECT_EQ(8U, InclusiveClamp<uint32_t>(8U, 1U, 9U));
+ EXPECT_EQ(9U, InclusiveClamp<uint32_t>(9U, 1U, 9U));
+ EXPECT_EQ(9U, InclusiveClamp<uint32_t>(10U, 1U, 9U));
+ EXPECT_EQ(9U, InclusiveClamp<uint32_t>(0xFFFFFFFFU, 1U, 9U));
+ EXPECT_EQ(42U, InclusiveClamp<uint32_t>(0U, 42U, 42U));
+ EXPECT_EQ(42U, InclusiveClamp<uint32_t>(41U, 42U, 42U));
+ EXPECT_EQ(42U, InclusiveClamp<uint32_t>(42U, 42U, 42U));
+ EXPECT_EQ(42U, InclusiveClamp<uint32_t>(43U, 42U, 42U));
+ EXPECT_EQ(0U, InclusiveClamp<uint32_t>(0U, 0U, 0U));
+ EXPECT_EQ(0xFFFFFFFF,
+ InclusiveClamp<uint32_t>(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF));
+}
+
+TEST(AlgorithmTest, AlignCeil) {
+ EXPECT_EQ(0U, AlignCeil<uint32_t>(0U, 2U));
+ EXPECT_EQ(2U, AlignCeil<uint32_t>(1U, 2U));
+ EXPECT_EQ(2U, AlignCeil<uint32_t>(2U, 2U));
+ EXPECT_EQ(4U, AlignCeil<uint32_t>(3U, 2U));
+ EXPECT_EQ(4U, AlignCeil<uint32_t>(4U, 2U));
+ EXPECT_EQ(11U, AlignCeil<uint32_t>(10U, 11U));
+ EXPECT_EQ(11U, AlignCeil<uint32_t>(11U, 11U));
+ EXPECT_EQ(22U, AlignCeil<uint32_t>(12U, 11U));
+ EXPECT_EQ(22U, AlignCeil<uint32_t>(21U, 11U));
+ EXPECT_EQ(22U, AlignCeil<uint32_t>(22U, 11U));
+ EXPECT_EQ(33U, AlignCeil<uint32_t>(23U, 11U));
+}
+
+TEST(AlgorithmTest, IncrementForAlignCeil) {
+ struct TestCase {
+ int exp; // Increment to |pos| to get the next nearest aligned value.
+ int pos;
+ };
+ TestCase kTestCases2[] = {
+ {0, 0}, {1, 1}, {0, 2}, {1, 3}, {0, 4}, {1, 5},
+ {1, 97}, {0, 98}, {1, 99}, {0, 100}, {1, -1}, {0, -2},
+ {1, -101}, {0, -100}, {1, -99}, {0, -98}, {1, -97}, {0, -96},
+ };
+ for (const auto& test_case : kTestCases2) {
+ EXPECT_EQ(test_case.exp, IncrementForAlignCeil2<int32_t>(test_case.pos));
+ if (test_case.pos >= 0)
+ EXPECT_EQ(test_case.exp, IncrementForAlignCeil2<uint32_t>(test_case.pos));
+ }
+ TestCase kTestCases4[] = {
+ {0, 0}, {3, 1}, {2, 2}, {1, 3}, {0, 4}, {3, 5},
+ {3, 97}, {2, 98}, {1, 99}, {0, 100}, {1, -1}, {2, -2},
+ {1, -101}, {0, -100}, {3, -99}, {2, -98}, {1, -97}, {0, -96},
+ };
+ for (const auto& test_case : kTestCases4) {
+ EXPECT_EQ(test_case.exp, IncrementForAlignCeil4<int32_t>(test_case.pos));
+ if (test_case.pos >= 0)
+ EXPECT_EQ(test_case.exp, IncrementForAlignCeil4<uint32_t>(test_case.pos));
+ }
+}
+
+TEST(AlgorithmTest, GetBit) {
+ // 0xC5 = 0b1100'0101.
+ constexpr uint8_t v = 0xC5;
+ EXPECT_EQ(uint8_t(1), (GetBit<0>(v)));
+ EXPECT_EQ(int8_t(0), (GetBit<1>(signed8(v))));
+ EXPECT_EQ(uint8_t(1), (GetBit<2>(v)));
+ EXPECT_EQ(int8_t(0), (GetBit<3>(signed8(v))));
+ EXPECT_EQ(uint8_t(0), (GetBit<4>(v)));
+ EXPECT_EQ(int8_t(0), (GetBit<5>(signed8(v))));
+ EXPECT_EQ(uint8_t(1), (GetBit<6>(v)));
+ EXPECT_EQ(int8_t(1), (GetBit<7>(signed8(v))));
+
+ EXPECT_EQ(int16_t(1), (GetBit<3, int16_t>(0x0008)));
+ EXPECT_EQ(uint16_t(0), (GetBit<14, uint16_t>(0xB000)));
+ EXPECT_EQ(uint16_t(1), (GetBit<15, uint16_t>(0xB000)));
+
+ EXPECT_EQ(uint32_t(1), (GetBit<0, uint32_t>(0xFFFFFFFF)));
+ EXPECT_EQ(int32_t(1), (GetBit<31, int32_t>(0xFFFFFFFF)));
+
+ EXPECT_EQ(uint32_t(0), (GetBit<0, uint32_t>(0xFF00A596)));
+ EXPECT_EQ(int32_t(1), (GetBit<1, int32_t>(0xFF00A596)));
+ EXPECT_EQ(uint32_t(1), (GetBit<4, uint32_t>(0xFF00A596)));
+ EXPECT_EQ(int32_t(1), (GetBit<7, int32_t>(0xFF00A596)));
+ EXPECT_EQ(uint32_t(0), (GetBit<9, uint32_t>(0xFF00A596)));
+ EXPECT_EQ(int32_t(0), (GetBit<16, int32_t>(0xFF00A59)));
+ EXPECT_EQ(uint32_t(1), (GetBit<24, uint32_t>(0xFF00A596)));
+ EXPECT_EQ(int32_t(1), (GetBit<31, int32_t>(0xFF00A596)));
+
+ EXPECT_EQ(uint64_t(0), (GetBit<62, uint64_t>(0xB000000000000000ULL)));
+ EXPECT_EQ(int64_t(1), (GetBit<63, int64_t>(0xB000000000000000LL)));
+}
+
+TEST(AlgorithmTest, GetBits) {
+ // Zero-extended: Basic cases for various values.
+ uint32_t test_cases[] = {0, 1, 2, 7, 137, 0x10000, 0x69969669, 0xFFFFFFFF};
+ for (uint32_t v : test_cases) {
+ EXPECT_EQ(uint32_t(v & 0xFF), (GetUnsignedBits<0, 7>(v)));
+ EXPECT_EQ(uint32_t((v >> 8) & 0xFF), (GetUnsignedBits<8, 15>(v)));
+ EXPECT_EQ(uint32_t((v >> 16) & 0xFF), (GetUnsignedBits<16, 23>(v)));
+ EXPECT_EQ(uint32_t((v >> 24) & 0xFF), (GetUnsignedBits<24, 31>(v)));
+ EXPECT_EQ(uint32_t(v & 0xFFFF), (GetUnsignedBits<0, 15>(v)));
+ EXPECT_EQ(uint32_t((v >> 1) & 0x3FFFFFFF), (GetUnsignedBits<1, 30>(v)));
+ EXPECT_EQ(uint32_t((v >> 2) & 0x0FFFFFFF), (GetUnsignedBits<2, 29>(v)));
+ EXPECT_EQ(uint32_t(v), (GetUnsignedBits<0, 31>(v)));
+ }
+
+ // Zero-extended: Reading off various nibbles.
+ EXPECT_EQ(uint32_t(0x4), (GetUnsignedBits<20, 23>(0x00432100U)));
+ EXPECT_EQ(uint32_t(0x43), (GetUnsignedBits<16, 23>(0x00432100)));
+ EXPECT_EQ(uint32_t(0x432), (GetUnsignedBits<12, 23>(0x00432100U)));
+ EXPECT_EQ(uint32_t(0x4321), (GetUnsignedBits<8, 23>(0x00432100)));
+ EXPECT_EQ(uint32_t(0x321), (GetUnsignedBits<8, 19>(0x00432100U)));
+ EXPECT_EQ(uint32_t(0x21), (GetUnsignedBits<8, 15>(0x00432100)));
+ EXPECT_EQ(uint32_t(0x1), (GetUnsignedBits<8, 11>(0x00432100U)));
+
+ // Sign-extended: 0x3CA5 = 0b0011'1100'1010'0101.
+ EXPECT_EQ(signed16(0xFFFF), (GetSignedBits<0, 0>(0x3CA5U)));
+ EXPECT_EQ(signed16(0x0001), (GetSignedBits<0, 1>(0x3CA5)));
+ EXPECT_EQ(signed16(0xFFFD), (GetSignedBits<0, 2>(0x3CA5U)));
+ EXPECT_EQ(signed16(0x0005), (GetSignedBits<0, 4>(0x3CA5)));
+ EXPECT_EQ(signed16(0xFFA5), (GetSignedBits<0, 7>(0x3CA5U)));
+ EXPECT_EQ(signed16(0xFCA5), (GetSignedBits<0, 11>(0x3CA5)));
+ EXPECT_EQ(signed16(0x0005), (GetSignedBits<0, 3>(0x3CA5U)));
+ EXPECT_EQ(signed16(0xFFFA), (GetSignedBits<4, 7>(0x3CA5)));
+ EXPECT_EQ(signed16(0xFFFC), (GetSignedBits<8, 11>(0x3CA5U)));
+ EXPECT_EQ(signed16(0x0003), (GetSignedBits<12, 15>(0x3CA5)));
+ EXPECT_EQ(signed16(0x0000), (GetSignedBits<4, 4>(0x3CA5U)));
+ EXPECT_EQ(signed16(0xFFFF), (GetSignedBits<5, 5>(0x3CA5)));
+ EXPECT_EQ(signed16(0x0002), (GetSignedBits<4, 6>(0x3CA5U)));
+ EXPECT_EQ(signed16(0x1E52), (GetSignedBits<1, 14>(0x3CA5)));
+ EXPECT_EQ(signed16(0xFF29), (GetSignedBits<2, 13>(0x3CA5U)));
+ EXPECT_EQ(int32_t(0x00001E52), (GetSignedBits<1, 14>(0x3CA5)));
+ EXPECT_EQ(int32_t(0xFFFFFF29), (GetSignedBits<2, 13>(0x3CA5U)));
+
+ // 64-bits: Extract from middle 0x66 = 0b0110'0110.
+ EXPECT_EQ(uint64_t(0x0000000000000009LL),
+ (GetUnsignedBits<30, 33>(int64_t(0x2222222661111111LL))));
+ EXPECT_EQ(int64_t(0xFFFFFFFFFFFFFFF9LL),
+ (GetSignedBits<30, 33>(uint64_t(0x2222222661111111LL))));
+}
+
+TEST(AlgorithmTest, SignExtend) {
+ // 0x6A = 0b0110'1010.
+ EXPECT_EQ(uint8_t(0x00), (SignExtend<uint8_t>(0, 0x6A)));
+ EXPECT_EQ(signed8(0xFE), (SignExtend<int8_t>(1, signed8(0x6A))));
+ EXPECT_EQ(uint8_t(0x02), (SignExtend<uint8_t>(2, 0x6A)));
+ EXPECT_EQ(signed8(0xFA), (SignExtend<int8_t>(3, signed8(0x6A))));
+ EXPECT_EQ(uint8_t(0x0A), (SignExtend<uint8_t>(4, 0x6A)));
+ EXPECT_EQ(signed8(0xEA), (SignExtend<int8_t>(5, signed8(0x6A))));
+ EXPECT_EQ(uint8_t(0xEA), (SignExtend<uint8_t>(6, 0x6A)));
+ EXPECT_EQ(signed8(0x6A), (SignExtend<int8_t>(7, signed8(0x6A))));
+
+ EXPECT_EQ(signed16(0xFFFA), (SignExtend<int16_t>(3, 0x6A)));
+ EXPECT_EQ(uint16_t(0x000A), (SignExtend<uint16_t>(4, 0x6A)));
+
+ EXPECT_EQ(int32_t(0xFFFF8000), (SignExtend<int32_t>(15, 0x00008000)));
+ EXPECT_EQ(uint32_t(0x00008000U), (SignExtend<uint32_t>(16, 0x00008000)));
+ EXPECT_EQ(int32_t(0xFFFFFC00), (SignExtend<int32_t>(10, 0x00000400)));
+ EXPECT_EQ(uint32_t(0xFFFFFFFFU), (SignExtend<uint32_t>(31, 0xFFFFFFFF)));
+
+ EXPECT_EQ(int64_t(0xFFFFFFFFFFFFFE6ALL),
+ (SignExtend<int64_t>(9, 0x000000000000026ALL)));
+ EXPECT_EQ(int64_t(0x000000000000016ALL),
+ (SignExtend<int64_t>(9, 0xFFFFFFFFFFFFFD6ALL)));
+ EXPECT_EQ(uint64_t(0xFFFFFFFFFFFFFE6AULL),
+ (SignExtend<uint64_t>(9, 0x000000000000026AULL)));
+ EXPECT_EQ(uint64_t(0x000000000000016AULL),
+ (SignExtend<uint64_t>(9, 0xFFFFFFFFFFFFFD6AULL)));
+}
+
+TEST(AlgorithmTest, SignExtendTemplated) {
+ // 0x6A = 0b0110'1010.
+ EXPECT_EQ(uint8_t(0x00), (SignExtend<0, uint8_t>(0x6A)));
+ EXPECT_EQ(signed8(0xFE), (SignExtend<1, int8_t>(signed8(0x6A))));
+ EXPECT_EQ(uint8_t(0x02), (SignExtend<2, uint8_t>(0x6A)));
+ EXPECT_EQ(signed8(0xFA), (SignExtend<3, int8_t>(signed8(0x6A))));
+ EXPECT_EQ(uint8_t(0x0A), (SignExtend<4, uint8_t>(0x6A)));
+ EXPECT_EQ(signed8(0xEA), (SignExtend<5, int8_t>(signed8(0x6A))));
+ EXPECT_EQ(uint8_t(0xEA), (SignExtend<6, uint8_t>(0x6A)));
+ EXPECT_EQ(signed8(0x6A), (SignExtend<7, int8_t>(signed8(0x6A))));
+
+ EXPECT_EQ(signed16(0xFFFA), (SignExtend<3, int16_t>(0x6A)));
+ EXPECT_EQ(uint16_t(0x000A), (SignExtend<4, uint16_t>(0x6A)));
+
+ EXPECT_EQ(int32_t(0xFFFF8000), (SignExtend<15, int32_t>(0x00008000)));
+ EXPECT_EQ(uint32_t(0x00008000U), (SignExtend<16, uint32_t>(0x00008000)));
+ EXPECT_EQ(int32_t(0xFFFFFC00), (SignExtend<10, int32_t>(0x00000400)));
+ EXPECT_EQ(uint32_t(0xFFFFFFFFU), (SignExtend<31, uint32_t>(0xFFFFFFFF)));
+
+ EXPECT_EQ(int64_t(0xFFFFFFFFFFFFFE6ALL),
+ (SignExtend<9, int64_t>(0x000000000000026ALL)));
+ EXPECT_EQ(int64_t(0x000000000000016ALL),
+ (SignExtend<9, int64_t>(0xFFFFFFFFFFFFFD6ALL)));
+ EXPECT_EQ(uint64_t(0xFFFFFFFFFFFFFE6AULL),
+ (SignExtend<9, uint64_t>(0x000000000000026AULL)));
+ EXPECT_EQ(uint64_t(0x000000000000016AULL),
+ (SignExtend<9, uint64_t>(0xFFFFFFFFFFFFFD6AULL)));
+}
+
+TEST(AlgorithmTest, SignedFit) {
+ for (int v = -0x80; v < 0x80; ++v) {
+ EXPECT_EQ(v >= -1 && v < 1, (SignedFit<1, int8_t>(v)));
+ EXPECT_EQ(v >= -1 && v < 1, (SignedFit<1, uint8_t>(v)));
+ EXPECT_EQ(v >= -2 && v < 2, (SignedFit<2, int8_t>(v)));
+ EXPECT_EQ(v >= -4 && v < 4, (SignedFit<3, uint8_t>(v)));
+ EXPECT_EQ(v >= -8 && v < 8, (SignedFit<4, int16_t>(v)));
+ EXPECT_EQ(v >= -16 && v < 16, (SignedFit<5, uint32_t>(v)));
+ EXPECT_EQ(v >= -32 && v < 32, (SignedFit<6, int32_t>(v)));
+ EXPECT_EQ(v >= -64 && v < 64, (SignedFit<7, uint64_t>(v)));
+ EXPECT_TRUE((SignedFit<8, int8_t>(v)));
+ EXPECT_TRUE((SignedFit<8, uint8_t>(v)));
+ }
+
+ EXPECT_TRUE((SignedFit<16, uint32_t>(0x00000000)));
+ EXPECT_TRUE((SignedFit<16, uint32_t>(0x00007FFF)));
+ EXPECT_TRUE((SignedFit<16, uint32_t>(0xFFFF8000)));
+ EXPECT_TRUE((SignedFit<16, uint32_t>(0xFFFFFFFF)));
+ EXPECT_TRUE((SignedFit<16, int32_t>(0x00007FFF)));
+ EXPECT_TRUE((SignedFit<16, int32_t>(0xFFFF8000)));
+
+ EXPECT_FALSE((SignedFit<16, uint32_t>(0x80000000)));
+ EXPECT_FALSE((SignedFit<16, uint32_t>(0x7FFFFFFF)));
+ EXPECT_FALSE((SignedFit<16, uint32_t>(0x00008000)));
+ EXPECT_FALSE((SignedFit<16, uint32_t>(0xFFFF7FFF)));
+ EXPECT_FALSE((SignedFit<16, int32_t>(0x00008000)));
+ EXPECT_FALSE((SignedFit<16, int32_t>(0xFFFF7FFF)));
+
+ EXPECT_TRUE((SignedFit<48, int64_t>(0x00007FFFFFFFFFFFLL)));
+ EXPECT_TRUE((SignedFit<48, int64_t>(0xFFFF800000000000LL)));
+ EXPECT_FALSE((SignedFit<48, int64_t>(0x0008000000000000LL)));
+ EXPECT_FALSE((SignedFit<48, int64_t>(0xFFFF7FFFFFFFFFFFLL)));
+}
+
+} // namespace zucchini
diff --git a/arm_utils.cc b/arm_utils.cc
new file mode 100644
index 0000000..2a915a8
--- /dev/null
+++ b/arm_utils.cc
@@ -0,0 +1,597 @@
+// Copyright 2019 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/arm_utils.h"
+
+#include "components/zucchini/algorithm.h"
+
+namespace zucchini {
+
+namespace {
+
+inline bool IsMisaligned(rva_t rva, ArmAlign align) {
+ return (rva & (align - 1)) != 0;
+}
+
+} // namespace
+
+/******** AArch32Rel32Translator ********/
+
+AArch32Rel32Translator::AArch32Rel32Translator() = default;
+
+// The mapping between ARM instruction "Code" to "Displacement" involves complex
+// bit manipulation. The comments below annotate bits mappings using a string.
+// * Bits are listed from highest-order to lowerst-order (like in the manual).
+// * '0' and '1' denote literals.
+// * Uppercase letters denote a single bit in "Code". For example, 'S' denotes
+// a sign bit that gets extended in "Displacement". To follow naming in the
+// manual, these may enumerated, and written as "(I1)", "(I2)", etc.
+// * Lowercase letters denote bit fields with orders preserved.
+
+// static
+ArmAlign AArch32Rel32Translator::DecodeA24(uint32_t code32, arm_disp_t* disp) {
+ // Handle multiple instructions. Let cccc != 1111:
+ // B encoding A1:
+ // Code: cccc1010 Siiiiiii iiiiiiii iiiiiiii
+ // Displacement: SSSSSSSi iiiiiiii iiiiiiii iiiiii00
+ // BL encoding A1:
+ // Code: cccc1011 Siiiiiii iiiiiiii iiiiiiii
+ // Displacement: SSSSSSSi iiiiiiii iiiiiiii iiiiii00
+ // BLX encoding A2:
+ // Code: 1111101H Siiiiiii iiiiiiii iiiiiiii
+ // Displacement: SSSSSSSi iiiiiiii iiiiiiii iiiiiiH0
+ uint8_t bits = GetUnsignedBits<24, 27>(code32);
+ if (bits == 0xA || bits == 0xB) { // B, BL, or BLX.
+ *disp = GetSignedBits<0, 23>(code32) << 2;
+ uint8_t cond = GetUnsignedBits<28, 31>(code32);
+ if (cond == 0xF) { // BLX.
+ uint32_t H = GetBit<24>(code32);
+ *disp |= H << 1;
+ return kArmAlign2;
+ }
+ return kArmAlign4;
+ }
+ return kArmAlignFail;
+}
+
+// static
+bool AArch32Rel32Translator::EncodeA24(arm_disp_t disp, uint32_t* code32) {
+ uint32_t t = *code32;
+ uint8_t bits = GetUnsignedBits<24, 27>(t);
+ if (bits == 0xA || bits == 0xB) {
+ // B, BL, or BLX.
+ if (!SignedFit<26>(disp)) // Detect overflow.
+ return false;
+ uint8_t cond = GetUnsignedBits<28, 31>(t);
+ if (cond == 0xF) {
+ if (disp % 2) // BLX (encoding A2) requires 2-byte alignment.
+ return false;
+ uint32_t H = GetBit<1>(disp);
+ t = (t & 0xFEFFFFFF) | (H << 24);
+ } else {
+ if (disp % 4) // B and BL require 4-byte alignment.
+ return false;
+ }
+ t = (t & 0xFF000000) | ((disp >> 2) & 0x00FFFFFF);
+ *code32 = t;
+ return true;
+ }
+ return false;
+}
+
+// static
+bool AArch32Rel32Translator::ReadA24(rva_t instr_rva,
+ uint32_t code32,
+ rva_t* target_rva) {
+ constexpr ArmAlign kInstrAlign = kArmAlign4;
+ if (IsMisaligned(instr_rva, kInstrAlign))
+ return false;
+ arm_disp_t disp;
+ ArmAlign target_align = DecodeA24(code32, &disp);
+ if (target_align == kArmAlignFail)
+ return false;
+ *target_rva = GetArmTargetRvaFromDisp(instr_rva, disp, target_align);
+ return true;
+}
+
+// static
+bool AArch32Rel32Translator::WriteA24(rva_t instr_rva,
+ rva_t target_rva,
+ uint32_t* code32) {
+ constexpr ArmAlign kInstrAlign = kArmAlign4;
+ if (IsMisaligned(instr_rva, kInstrAlign))
+ return false;
+ // Dummy decode to get |target_align|.
+ arm_disp_t dummy_disp;
+ ArmAlign target_align = DecodeA24(*code32, &dummy_disp);
+ if (target_align == kArmAlignFail || IsMisaligned(target_rva, target_align))
+ return false;
+ arm_disp_t disp =
+ GetArmDispFromTargetRva(instr_rva, target_rva, target_align);
+ return EncodeA24(disp, code32);
+}
+
+// static
+ArmAlign AArch32Rel32Translator::DecodeT8(uint16_t code16, arm_disp_t* disp) {
+ if ((code16 & 0xF000) == 0xD000 && (code16 & 0x0F00) != 0x0F00) {
+ // B encoding T1:
+ // Code: 1101cccc Siiiiiii
+ // Displacement: SSSSSSSS SSSSSSSS SSSSSSSS iiiiiii0
+ *disp = GetSignedBits<0, 7>(code16) << 1;
+ return kArmAlign2;
+ }
+ return kArmAlignFail;
+}
+
+// static
+bool AArch32Rel32Translator::EncodeT8(arm_disp_t disp, uint16_t* code16) {
+ uint16_t t = *code16;
+ if ((t & 0xF000) == 0xD000 && (t & 0x0F00) != 0x0F00) {
+ if (disp % 2) // Require 2-byte alignment.
+ return false;
+ if (!SignedFit<9>(disp)) // Detect overflow.
+ return false;
+ t = (t & 0xFF00) | ((disp >> 1) & 0x00FF);
+ *code16 = t;
+ return true;
+ }
+ return false;
+}
+
+// static
+bool AArch32Rel32Translator::ReadT8(rva_t instr_rva,
+ uint16_t code16,
+ rva_t* target_rva) {
+ constexpr ArmAlign kInstrAlign = kArmAlign2;
+ if (IsMisaligned(instr_rva, kInstrAlign))
+ return false;
+ arm_disp_t disp;
+ ArmAlign target_align = DecodeT8(code16, &disp);
+ if (target_align == kArmAlignFail)
+ return false;
+ *target_rva = GetThumb2TargetRvaFromDisp(instr_rva, disp, target_align);
+ return true;
+}
+
+// static
+bool AArch32Rel32Translator::WriteT8(rva_t instr_rva,
+ rva_t target_rva,
+ uint16_t* code16) {
+ constexpr ArmAlign kInstrAlign = kArmAlign2;
+ constexpr ArmAlign kTargetAlign = kArmAlign2;
+ if (IsMisaligned(instr_rva, kInstrAlign) ||
+ IsMisaligned(target_rva, kTargetAlign)) {
+ return false;
+ }
+ arm_disp_t disp =
+ GetThumb2DispFromTargetRva(instr_rva, target_rva, kTargetAlign);
+ return EncodeT8(disp, code16);
+}
+
+// static
+ArmAlign AArch32Rel32Translator::DecodeT11(uint16_t code16, arm_disp_t* disp) {
+ if ((code16 & 0xF800) == 0xE000) {
+ // B encoding T2:
+ // Code: 11100Sii iiiiiiii
+ // Displacement: SSSSSSSS SSSSSSSS SSSSSiii iiiiiii0
+ *disp = GetSignedBits<0, 10>(code16) << 1;
+ return kArmAlign2;
+ }
+ return kArmAlignFail;
+}
+
+// static
+bool AArch32Rel32Translator::EncodeT11(arm_disp_t disp, uint16_t* code16) {
+ uint16_t t = *code16;
+ if ((t & 0xF800) == 0xE000) {
+ if (disp % 2) // Require 2-byte alignment.
+ return false;
+ if (!SignedFit<12>(disp)) // Detect overflow.
+ return false;
+ t = (t & 0xF800) | ((disp >> 1) & 0x07FF);
+ *code16 = t;
+ return true;
+ }
+ return false;
+}
+
+// static
+bool AArch32Rel32Translator::ReadT11(rva_t instr_rva,
+ uint16_t code16,
+ rva_t* target_rva) {
+ constexpr ArmAlign kInstrAlign = kArmAlign2;
+ if (IsMisaligned(instr_rva, kInstrAlign))
+ return false;
+ arm_disp_t disp;
+ ArmAlign target_align = DecodeT11(code16, &disp);
+ if (target_align == kArmAlignFail)
+ return false;
+ *target_rva = GetThumb2TargetRvaFromDisp(instr_rva, disp, target_align);
+ return true;
+}
+
+// static
+bool AArch32Rel32Translator::WriteT11(rva_t instr_rva,
+ rva_t target_rva,
+ uint16_t* code16) {
+ constexpr ArmAlign kInstrAlign = kArmAlign2;
+ constexpr ArmAlign kTargetAlign = kArmAlign2;
+ if (IsMisaligned(instr_rva, kInstrAlign) ||
+ IsMisaligned(target_rva, kTargetAlign)) {
+ return false;
+ }
+ arm_disp_t disp =
+ GetThumb2DispFromTargetRva(instr_rva, target_rva, kTargetAlign);
+ return EncodeT11(disp, code16);
+}
+
+// static
+ArmAlign AArch32Rel32Translator::DecodeT20(uint32_t code32, arm_disp_t* disp) {
+ if ((code32 & 0xF800D000) == 0xF0008000 &&
+ (code32 & 0x03C00000) != 0x03C00000) {
+ // B encoding T3. Note the reversal of "(J1)" and "(J2)".
+ // Code: 11110Scc cciiiiii 10(J1)0(J2)jjj jjjjjjjj
+ // Displacement: SSSSSSSS SSSS(J2)(J1)ii iiiijjjj jjjjjjj0
+ uint32_t imm11 = GetUnsignedBits<0, 10>(code32); // jj...j.
+ uint32_t J2 = GetBit<11>(code32);
+ uint32_t J1 = GetBit<13>(code32);
+ uint32_t imm6 = GetUnsignedBits<16, 21>(code32); // ii...i.
+ uint32_t S = GetBit<26>(code32);
+ uint32_t t = (imm6 << 12) | (imm11 << 1);
+ t |= (S << 20) | (J2 << 19) | (J1 << 18);
+ *disp = SignExtend<20, int32_t>(t);
+ return kArmAlign2;
+ }
+ return kArmAlignFail;
+}
+
+// static
+bool AArch32Rel32Translator::EncodeT20(arm_disp_t disp, uint32_t* code32) {
+ uint32_t t = *code32;
+ if ((t & 0xF800D000) == 0xF0008000 && (t & 0x03C00000) != 0x03C00000) {
+ if (disp % 2) // Require 2-byte alignment.
+ return false;
+ if (!SignedFit<21>(disp)) // Detect overflow.
+ return false;
+ uint32_t S = GetBit<20>(disp);
+ uint32_t J2 = GetBit<19>(disp);
+ uint32_t J1 = GetBit<18>(disp);
+ uint32_t imm6 = GetUnsignedBits<12, 17>(disp); // ii...i.
+ uint32_t imm11 = GetUnsignedBits<1, 11>(disp); // jj...j.
+ t &= 0xFBC0D000;
+ t |= (S << 26) | (imm6 << 16) | (J1 << 13) | (J2 << 11) | imm11;
+ *code32 = t;
+ return true;
+ }
+ return false;
+}
+
+// static
+bool AArch32Rel32Translator::ReadT20(rva_t instr_rva,
+ uint32_t code32,
+ rva_t* target_rva) {
+ constexpr ArmAlign kInstrAlign = kArmAlign2;
+ if (IsMisaligned(instr_rva, kInstrAlign))
+ return false;
+ arm_disp_t disp;
+ ArmAlign target_align = DecodeT20(code32, &disp);
+ if (target_align == kArmAlignFail)
+ return false;
+ *target_rva = GetThumb2TargetRvaFromDisp(instr_rva, disp, target_align);
+ return true;
+}
+
+// static
+bool AArch32Rel32Translator::WriteT20(rva_t instr_rva,
+ rva_t target_rva,
+ uint32_t* code32) {
+ constexpr ArmAlign kInstrAlign = kArmAlign2;
+ constexpr ArmAlign kTargetAlign = kArmAlign2;
+ if (IsMisaligned(instr_rva, kInstrAlign) ||
+ IsMisaligned(target_rva, kTargetAlign)) {
+ return false;
+ }
+ arm_disp_t disp =
+ GetThumb2DispFromTargetRva(instr_rva, target_rva, kTargetAlign);
+ return EncodeT20(disp, code32);
+}
+
+// static
+ArmAlign AArch32Rel32Translator::DecodeT24(uint32_t code32, arm_disp_t* disp) {
+ uint32_t bits = code32 & 0xF800D000;
+ if (bits == 0xF0009000 || bits == 0xF000D000 || bits == 0xF000C000) {
+ // Let I1 = J1 ^ S ^ 1, I2 = J2 ^ S ^ 1.
+ // B encoding T4:
+ // Code: 11110Sii iiiiiiii 10(J1)1(J2)jjj jjjjjjjj
+ // Displacement: SSSSSSSS (I1)(I2)iiiiii iiiijjjj jjjjjjj0
+ // BL encoding T1:
+ // Code: 11110Sii iiiiiiii 11(J1)1(J2)jjj jjjjjjjj
+ // Displacement: SSSSSSSS (I1)(I2)iiiiii iiiijjjj jjjjjjj0
+ // BLX encoding T2: H should be 0:
+ // Code: 11110Sii iiiiiiii 11(J1)0(J2)jjj jjjjjjjH
+ // Displacement: SSSSSSSS (I1)(I2)iiiiii iiiijjjj jjjjjjH0
+ uint32_t imm11 = GetUnsignedBits<0, 10>(code32); // jj...j.
+ uint32_t J2 = GetBit<11>(code32);
+ uint32_t J1 = GetBit<13>(code32);
+ uint32_t imm10 = GetUnsignedBits<16, 25>(code32); // ii...i.
+ uint32_t S = GetBit<26>(code32);
+ uint32_t t = (imm10 << 12) | (imm11 << 1);
+ t |= (S << 24) | ((J1 ^ S ^ 1) << 23) | ((J2 ^ S ^ 1) << 22);
+ t = SignExtend<24, int32_t>(t);
+ // BLX encoding T2 requires final target to be 4-byte aligned by rounding
+ // downward. This is applied to |t| *after* clipping.
+ ArmAlign target_align = kArmAlign2;
+ if (bits == 0xF000C000) {
+ uint32_t H = GetBit<0>(code32);
+ if (H)
+ return kArmAlignFail; // Illegal instruction: H must be 0.
+ target_align = kArmAlign4;
+ }
+ *disp = static_cast<int32_t>(t);
+ return target_align;
+ }
+ return kArmAlignFail;
+}
+
+// static
+bool AArch32Rel32Translator::EncodeT24(arm_disp_t disp, uint32_t* code32) {
+ uint32_t t = *code32;
+ uint32_t bits = t & 0xF800D000;
+ if (bits == 0xF0009000 || bits == 0xF000D000 || bits == 0xF000C000) {
+ if (disp % 2) // Require 2-byte alignment.
+ return false;
+ // BLX encoding T2 requires H == 0, and that |disp| results in |target_rva|
+ // with a 4-byte aligned address.
+ if (bits == 0xF000C000) {
+ uint32_t H = GetBit<1>(disp);
+ if (H)
+ return false; // Illegal |disp|: H must be 0.
+ }
+ if (!SignedFit<25>(disp)) // Detect overflow.
+ return false;
+ uint32_t imm11 = GetUnsignedBits<1, 11>(disp); // jj...j.
+ uint32_t imm10 = GetUnsignedBits<12, 21>(disp); // ii...i.
+ uint32_t I2 = GetBit<22>(disp);
+ uint32_t I1 = GetBit<23>(disp);
+ uint32_t S = GetBit<24>(disp);
+ t &= 0xF800D000;
+ t |= (S << 26) | (imm10 << 16) | ((I1 ^ S ^ 1) << 13) |
+ ((I2 ^ S ^ 1) << 11) | imm11;
+ *code32 = t;
+ return true;
+ }
+ return false;
+}
+
+// static
+bool AArch32Rel32Translator::ReadT24(rva_t instr_rva,
+ uint32_t code32,
+ rva_t* target_rva) {
+ constexpr ArmAlign kInstrAlign = kArmAlign2;
+ if (IsMisaligned(instr_rva, kInstrAlign))
+ return false;
+ arm_disp_t disp;
+ ArmAlign target_align = DecodeT24(code32, &disp);
+ if (target_align == kArmAlignFail)
+ return false;
+ *target_rva = GetThumb2TargetRvaFromDisp(instr_rva, disp, target_align);
+ return true;
+}
+
+// static
+bool AArch32Rel32Translator::WriteT24(rva_t instr_rva,
+ rva_t target_rva,
+ uint32_t* code32) {
+ constexpr ArmAlign kInstrAlign = kArmAlign2;
+ if (IsMisaligned(instr_rva, kInstrAlign))
+ return false;
+ // Dummy decode to get |target_align|.
+ arm_disp_t dummy_disp;
+ ArmAlign target_align = DecodeT24(*code32, &dummy_disp);
+ if (target_align == kArmAlignFail || IsMisaligned(target_rva, target_align))
+ return false;
+ arm_disp_t disp =
+ GetThumb2DispFromTargetRva(instr_rva, target_rva, target_align);
+ return EncodeT24(disp, code32);
+}
+
+/******** AArch64Rel32Translator ********/
+
+AArch64Rel32Translator::AArch64Rel32Translator() = default;
+
+// static
+ArmAlign AArch64Rel32Translator::DecodeImmd14(uint32_t code32,
+ arm_disp_t* disp) {
+ // TBZ:
+ // Code: b0110110 bbbbbSii iiiiiiii iiittttt
+ // Displacement: SSSSSSSS SSSSSSSS Siiiiiii iiiiii00
+ // TBNZ:
+ // Code: b0110111 bbbbbSii iiiiiiii iiittttt
+ // Displacement: SSSSSSSS SSSSSSSS Siiiiiii iiiiii00
+ uint32_t bits = code32 & 0x7F000000;
+ if (bits == 0x36000000 || bits == 0x37000000) {
+ *disp = GetSignedBits<5, 18>(code32) << 2;
+ return kArmAlign4;
+ }
+ return kArmAlignFail;
+}
+
+// static
+bool AArch64Rel32Translator::EncodeImmd14(arm_disp_t disp, uint32_t* code32) {
+ uint32_t t = *code32;
+ uint32_t bits = t & 0x7F000000;
+ if (bits == 0x36000000 || bits == 0x37000000) {
+ if (disp % 4) // Require 4-byte alignment.
+ return false;
+ if (!SignedFit<16>(disp)) // Detect overflow.
+ return false;
+ uint32_t imm14 = GetUnsignedBits<2, 15>(disp); // ii...i.
+ t &= 0xFFF8001F;
+ t |= imm14 << 5;
+ *code32 = t;
+ return true;
+ }
+ return false;
+}
+
+// static
+bool AArch64Rel32Translator::ReadImmd14(rva_t instr_rva,
+ uint32_t code32,
+ rva_t* target_rva) {
+ constexpr ArmAlign kInstrAlign = kArmAlign4;
+ if (IsMisaligned(instr_rva, kInstrAlign))
+ return false;
+ arm_disp_t disp;
+ if (DecodeImmd14(code32, &disp) == kArmAlignFail)
+ return false;
+ *target_rva = GetTargetRvaFromDisp(instr_rva, disp);
+ return true;
+}
+
+// static
+bool AArch64Rel32Translator::WriteImmd14(rva_t instr_rva,
+ rva_t target_rva,
+ uint32_t* code32) {
+ constexpr ArmAlign kInstrAlign = kArmAlign4;
+ constexpr ArmAlign kTargetAlign = kArmAlign4;
+ if (IsMisaligned(instr_rva, kInstrAlign) ||
+ IsMisaligned(target_rva, kTargetAlign)) {
+ return false;
+ }
+ arm_disp_t disp = GetDispFromTargetRva(instr_rva, target_rva);
+ return EncodeImmd14(disp, code32);
+}
+
+// static
+ArmAlign AArch64Rel32Translator::DecodeImmd19(uint32_t code32,
+ arm_disp_t* disp) {
+ // B.cond:
+ // Code: 01010100 Siiiiiii iiiiiiii iii0cccc
+ // Displacement: SSSSSSSS SSSSiiii iiiiiiii iiiiii00
+ // CBZ:
+ // Code: z0110100 Siiiiiii iiiiiiii iiittttt
+ // Displacement: SSSSSSSS SSSSiiii iiiiiiii iiiiii00
+ // CBNZ:
+ // Code: z0110101 Siiiiiii iiiiiiii iiittttt
+ // Displacement: SSSSSSSS SSSSiiii iiiiiiii iiiiii00
+ uint32_t bits1 = code32 & 0xFF000010;
+ uint32_t bits2 = code32 & 0x7F000000;
+ if (bits1 == 0x54000000 || bits2 == 0x34000000 || bits2 == 0x35000000) {
+ *disp = GetSignedBits<5, 23>(code32) << 2;
+ return kArmAlign4;
+ }
+ return kArmAlignFail;
+}
+
+// static
+bool AArch64Rel32Translator::EncodeImmd19(arm_disp_t disp, uint32_t* code32) {
+ uint32_t t = *code32;
+ uint32_t bits1 = t & 0xFF000010;
+ uint32_t bits2 = t & 0x7F000000;
+ if (bits1 == 0x54000000 || bits2 == 0x34000000 || bits2 == 0x35000000) {
+ if (disp % 4) // Require 4-byte alignment.
+ return false;
+ if (!SignedFit<21>(disp)) // Detect overflow.
+ return false;
+ uint32_t imm19 = GetUnsignedBits<2, 20>(disp); // ii...i.
+ t &= 0xFF00001F;
+ t |= imm19 << 5;
+ *code32 = t;
+ return true;
+ }
+ return false;
+}
+
+// static
+bool AArch64Rel32Translator::ReadImmd19(rva_t instr_rva,
+ uint32_t code32,
+ rva_t* target_rva) {
+ constexpr ArmAlign kInstrAlign = kArmAlign4;
+ if (IsMisaligned(instr_rva, kInstrAlign))
+ return false;
+ arm_disp_t disp;
+ if (DecodeImmd19(code32, &disp) == kArmAlignFail)
+ return false;
+ *target_rva = GetTargetRvaFromDisp(instr_rva, disp);
+ return true;
+}
+
+// static
+bool AArch64Rel32Translator::WriteImmd19(rva_t instr_rva,
+ rva_t target_rva,
+ uint32_t* code32) {
+ constexpr ArmAlign kInstrAlign = kArmAlign4;
+ constexpr ArmAlign kTargetAlign = kArmAlign4;
+ if (IsMisaligned(instr_rva, kInstrAlign) ||
+ IsMisaligned(target_rva, kTargetAlign)) {
+ return false;
+ }
+ arm_disp_t disp = GetDispFromTargetRva(instr_rva, target_rva);
+ return EncodeImmd19(disp, code32);
+}
+
+// static
+ArmAlign AArch64Rel32Translator::DecodeImmd26(uint32_t code32,
+ arm_disp_t* disp) {
+ // B:
+ // Code: 000101Si iiiiiiii iiiiiiii iiiiiiii
+ // Displacement: SSSSSiii iiiiiiii iiiiiiii iiiiii00
+ // BL:
+ // Code: 100101Si iiiiiiii iiiiiiii iiiiiiii
+ // Displacement: SSSSSiii iiiiiiii iiiiiiii iiiiii00
+ uint32_t bits = code32 & 0xFC000000;
+ if (bits == 0x14000000 || bits == 0x94000000) {
+ *disp = GetSignedBits<0, 25>(code32) << 2;
+ return kArmAlign4;
+ }
+ return kArmAlignFail;
+}
+
+// static
+bool AArch64Rel32Translator::EncodeImmd26(arm_disp_t disp, uint32_t* code32) {
+ uint32_t t = *code32;
+ uint32_t bits = t & 0xFC000000;
+ if (bits == 0x14000000 || bits == 0x94000000) {
+ if (disp % 4) // Require 4-byte alignment.
+ return false;
+ if (!SignedFit<28>(disp)) // Detect overflow.
+ return false;
+ uint32_t imm26 = GetUnsignedBits<2, 27>(disp); // ii...i.
+ t &= 0xFC000000;
+ t |= imm26;
+ *code32 = t;
+ return true;
+ }
+ return false;
+}
+
+// static
+bool AArch64Rel32Translator::ReadImmd26(rva_t instr_rva,
+ uint32_t code32,
+ rva_t* target_rva) {
+ constexpr ArmAlign kInstrAlign = kArmAlign4;
+ if (IsMisaligned(instr_rva, kInstrAlign))
+ return false;
+ arm_disp_t disp;
+ if (DecodeImmd26(code32, &disp) == kArmAlignFail)
+ return false;
+ *target_rva = GetTargetRvaFromDisp(instr_rva, disp);
+ return true;
+}
+
+// static
+bool AArch64Rel32Translator::WriteImmd26(rva_t instr_rva,
+ rva_t target_rva,
+ uint32_t* code32) {
+ constexpr ArmAlign kInstrAlign = kArmAlign4;
+ constexpr ArmAlign kTargetAlign = kArmAlign4;
+ if (IsMisaligned(instr_rva, kInstrAlign) ||
+ IsMisaligned(target_rva, kTargetAlign)) {
+ return false;
+ }
+ arm_disp_t disp = GetDispFromTargetRva(instr_rva, target_rva);
+ return EncodeImmd26(disp, code32);
+}
+
+} // namespace zucchini
diff --git a/arm_utils.h b/arm_utils.h
new file mode 100644
index 0000000..8664f3e
--- /dev/null
+++ b/arm_utils.h
@@ -0,0 +1,423 @@
+// Copyright 2019 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_ARM_UTILS_H_
+#define COMPONENTS_ZUCCHINI_ARM_UTILS_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "base/check_op.h"
+#include "components/zucchini/address_translator.h"
+#include "components/zucchini/buffer_view.h"
+
+namespace zucchini {
+
+// References:
+// * AArch32 (32-bit ARM, AKA ARM32):
+// https://static.docs.arm.com/ddi0406/c/DDI0406C_C_arm_architecture_reference_manual.pdf
+// * AArch64 (64-bit ARM):
+// https://static.docs.arm.com/ddi0487/da/DDI0487D_a_armv8_arm.pdf
+
+// Definitions (used in Zucchini):
+// * |instr_rva|: Instruction RVA: The RVA where an instruction is located. In
+// ARM mode and for AArch64 this is 4-byte aligned; in THUMB2 mode this is
+// 2-byte aligned.
+// * |code|: Instruction code: ARM instruction code as seen in manual. In ARM
+// mode and for AArch64, this is a 32-bit int. In THUMB2 mode, this may be a
+// 16-bit or 32-bit int.
+// * |disp|: Displacement: For branch instructions (e.g.: B, BL, BLX, and
+// conditional varieties) this is the value encoded in instruction bytes.
+// * PC: Program Counter: In ARM mode this is |instr_rva + 8|; in THUMB2 mode
+// this is |instr_rva + 4|; for AArch64 this is |instr_rva|.
+// * |target_rva|: Target RVA: The RVA targeted by a branch instruction.
+//
+// These are related by:
+// |code| = Fetch(image data at offset(|instr_rva|)).
+// |disp| = Decode(|code|).
+// PC = |instr_rva| + {8 in ARM mode, 4 in THUMB2 mode, 0 for AArch64}.
+// |target_rva| = PC + |disp| - (see "BLX complication" below)
+//
+// Example 1 (ARM mode):
+// 00103050: 00 01 02 EA B 00183458
+// |instr_rva| = 0x00103050 (4-byte aligned).
+// |code| = 0xEA020100 (little endian fetched from data).
+// |disp| = 0x00080400 (decoded from |code| with A24 -> B encoding T1).
+// PC = |instr_rva| + 8 = 0x00103058 (ARM mode).
+// |target_rva| = PC + |disp| = 0x00183458.
+//
+// Example 2 (THUMB2 mode):
+// 001030A2: 00 F0 01 FA BL 001034A8
+// |instr_rva| = 0x001030A2 (2-byte aligned).
+// |code| = 0xF000FA01 (special THUMB2 mode data fetch).
+// |disp| = 0x00000402 (decoded from |code| with T24 -> BL encoding T1).
+// PC = |instr_rva| + 4 = 0x001030A6 (THUMB2 mode).
+// |target_rva| = PC + |disp| = 0x001034A8.
+//
+// Example 3 (AArch64):
+// 0000000000305070: 03 02 01 14 B 000000000034587C
+// |instr_rva| = 0x00305070 (4-byte aligned, assumed to fit in 32-bit).
+// |code| = 0x14010203 (little endian fetchd from data).
+// |disp| = 0x0004080C (decoded from |code| with Immd -> B).
+// PC = |instr_rva| = 0x00305070 (AArch64).
+// |target_rva| = PC + |disp| = 0x0034587C.
+
+// BLX complication: BLX transits between ARM mode and THUMB2 mode, and branches
+// to an address. Therefore |instr_rva| must align by the "old" mode, and
+// |target_rva| must align by the "new" mode. In particular:
+// * BLX encoding A2 (ARM -> THUMB2): |instr_rva| is 4-byte aligned with
+// PC = |instr_rva| + 8; |target_rva| is 2-byte aligned, and so |disp| is
+// 2-byte aligned.
+// * BLX encoding T2 (THUMB2 -> ARM): |instr_rva| is 2-byte aligned with
+// PC = |instr_rva| + 4; |target_rva| is 4-byte aligned. Complication: BLX
+// encoding T2 stores a bit |H| that corresponds to "2" in binary, but |H|
+// must be set to 0. Thus the encoded value is effectively 4-byte aligned. So
+// when computing |target_rva| by adding PC (2-byte aligned) to the stored
+// value (4-byte aligned), the result must be rounded down to the nearest
+// 4-byte aligned address.
+// The last situation creates ambiguity in how |disp| is defined! Alternatives:
+// (1) |disp| := |target_rva| - PC: So |code| <-> |disp| for BLX encoding T2,
+// requires |instr_rva| % 4 to be determined, and adjustments made.
+// (2) |disp| := Value stored in |code|: So |disp| <-> |target_rva| for BLX
+// encoding T2 requires adjustment: |disp| -> |target_rva| needs to round
+// down, whereas |target_rva| -> |disp| needs to round up.
+// We adopt (2) to simplify |code| <-> |disp|, since that gets used.
+
+using arm_disp_t = int32_t;
+
+// Alignment requirement for |target_rva|, useful for |disp| <-> |target_rva|
+// (also requires |instr_rva|). Alignment is determined by parsing |code| in
+// *Decode() functions. kArmAlignFail is also defined to indicate parse failure.
+// Alignments can be 2 or 4. These values are also used in the enum, so
+// |x % align| with |x & (align - 1)| to compute alignment.
+enum ArmAlign : uint32_t {
+ kArmAlignFail = 0U,
+ kArmAlign2 = 2U,
+ kArmAlign4 = 4U,
+};
+
+// Traits for rel32 address types (technically rel64 for AArch64 -- but we
+// assume values are small enough), which form collections of strategies to
+// process each rel32 address type.
+template <typename ENUM_ADDR_TYPE,
+ ENUM_ADDR_TYPE ADDR_TYPE,
+ typename CODE_T,
+ CODE_T (*FETCH)(ConstBufferView, offset_t),
+ void (*STORE)(MutableBufferView, offset_t, CODE_T),
+ ArmAlign (*DECODE)(CODE_T, arm_disp_t*),
+ bool (*ENCODE)(arm_disp_t, CODE_T*),
+ bool (*READ)(rva_t, CODE_T, rva_t*),
+ bool (*WRITE)(rva_t, rva_t, CODE_T*)>
+class ArmAddrTraits {
+ public:
+ static constexpr ENUM_ADDR_TYPE addr_type = ADDR_TYPE;
+ using code_t = CODE_T;
+ static constexpr CODE_T (*Fetch)(ConstBufferView, offset_t) = FETCH;
+ static constexpr void (*Store)(MutableBufferView, offset_t, CODE_T) = STORE;
+ static constexpr ArmAlign (*Decode)(CODE_T, arm_disp_t*) = DECODE;
+ static constexpr bool (*Encode)(arm_disp_t, CODE_T*) = ENCODE;
+ static constexpr bool (*Read)(rva_t, CODE_T, rva_t*) = READ;
+ static constexpr bool (*Write)(rva_t, rva_t, CODE_T*) = WRITE;
+};
+
+// Given THUMB2 instruction |code16|, returns 2 if it's from a 16-bit THUMB2
+// instruction, or 4 if it's from a 32-bit THUMB2 instruction.
+inline int GetThumb2InstructionSize(uint16_t code16) {
+ return ((code16 & 0xF000) == 0xF000 || (code16 & 0xF800) == 0xE800) ? 4 : 2;
+}
+
+// A translator for ARM mode and THUMB2 mode with static functions that
+// translate among |code|, |disp|, and |target_rva|.
+class AArch32Rel32Translator {
+ public:
+ // Rel32 address types enumeration.
+ enum AddrType : uint8_t {
+ ADDR_NONE = 0xFF,
+ // Naming: Here "A24" represents ARM mode instructions where |code|
+ // dedicates 24 bits (including sign bit) to specify |disp|. Similarly, "T8"
+ // represents THUMB2 mode instructions with 8 bits for |disp|. Currently
+ // only {A24, T8, T11, T20, T24} are defined. These are not to be confused
+ // with "B encoding A1", "B encoding T3", etc., which are specific encoding
+ // schemes given by the manual for the "B" (or other) instructions (only
+ // {A1, A2, T1, T2, T3, T4} are seen).
+ ADDR_A24 = 0,
+ ADDR_T8,
+ ADDR_T11,
+ ADDR_T20,
+ ADDR_T24,
+ NUM_ADDR_TYPE
+ };
+
+ AArch32Rel32Translator();
+ AArch32Rel32Translator(const AArch32Rel32Translator&) = delete;
+ const AArch32Rel32Translator& operator=(const AArch32Rel32Translator&) =
+ delete;
+
+ // Fetches the 32-bit ARM instruction |code| at |view[idx]|.
+ static inline uint32_t FetchArmCode32(ConstBufferView view, offset_t idx) {
+ return view.read<uint32_t>(idx);
+ }
+
+ // Fetches the 16-bit THUMB2 instruction |code| at |view[idx]|.
+ static inline uint16_t FetchThumb2Code16(ConstBufferView view, offset_t idx) {
+ return view.read<uint16_t>(idx);
+ }
+
+ // Fetches the 32-bit THUMB2 instruction |code| at |view[idx]|.
+ static inline uint32_t FetchThumb2Code32(ConstBufferView view, offset_t idx) {
+ // By convention, 32-bit THUMB2 instructions are written (as seen later) as:
+ // [byte3, byte2, byte1, byte0].
+ // However (assuming little-endian ARM) the in-memory representation is
+ // [byte2, byte3, byte0, byte1].
+ return (static_cast<uint32_t>(view.read<uint16_t>(idx)) << 16) |
+ view.read<uint16_t>(idx + 2);
+ }
+
+ // Stores the 32-bit ARM instruction |code| to |mutable_view[idx]|.
+ static inline void StoreArmCode32(MutableBufferView mutable_view,
+ offset_t idx,
+ uint32_t code) {
+ mutable_view.write<uint32_t>(idx, code);
+ }
+
+ // Stores the 16-bit THUMB2 instruction |code| to |mutable_view[idx]|.
+ static inline void StoreThumb2Code16(MutableBufferView mutable_view,
+ offset_t idx,
+ uint16_t code) {
+ mutable_view.write<uint16_t>(idx, code);
+ }
+
+ // Stores the next 32-bit THUMB2 instruction |code| to |mutable_view[idx]|.
+ static inline void StoreThumb2Code32(MutableBufferView mutable_view,
+ offset_t idx,
+ uint32_t code) {
+ mutable_view.write<uint16_t>(idx, static_cast<uint16_t>(code >> 16));
+ mutable_view.write<uint16_t>(idx + 2, static_cast<uint16_t>(code & 0xFFFF));
+ }
+
+ // The following functions convert |code| (16-bit or 32-bit) from/to |disp|
+ // or |target_rva|, for specific branch instruction types.
+ // Read*() and write*() functions convert between |code| and |target_rva|.
+ // * Decode*() determines whether |code16/code32| is a branch instruction
+ // of a specific type. If so, then extracts |*disp| and returns the required
+ // ArmAlign. Otherwise returns kArmAlignFail.
+ // * Encode*() determines whether |*code16/*code32| is a branch instruction of
+ // a specific type, and whether it can accommodate |disp|. If so, then
+ // re-encodes |*code32| using |disp|, and returns true. Otherwise returns
+ // false.
+ // * Read*() is similar to Decode*(), but on success, extracts |*target_rva|
+ // using |instr_rva| as aid, performs the proper alignment, and returns
+ // true. Otherwise returns false.
+ // * Write*() is similar to Encode*(), takes |target_rva| instead, and uses
+ // |instr_rva| as aid.
+ static ArmAlign DecodeA24(uint32_t code32, arm_disp_t* disp);
+ static bool EncodeA24(arm_disp_t disp, uint32_t* code32);
+ // TODO(huangs): Refactor the Read*() functions: These are identical
+ // except for Decode*() and Get*TargetRvaFromDisp().
+ static bool ReadA24(rva_t instr_rva, uint32_t code32, rva_t* target_rva);
+ static bool WriteA24(rva_t instr_rva, rva_t target_rva, uint32_t* code32);
+
+ static ArmAlign DecodeT8(uint16_t code16, arm_disp_t* disp);
+ static bool EncodeT8(arm_disp_t disp, uint16_t* code16);
+ static bool ReadT8(rva_t instr_rva, uint16_t code16, rva_t* target_rva);
+ static bool WriteT8(rva_t instr_rva, rva_t target_rva, uint16_t* code16);
+
+ static ArmAlign DecodeT11(uint16_t code16, arm_disp_t* disp);
+ static bool EncodeT11(arm_disp_t disp, uint16_t* code16);
+ static bool ReadT11(rva_t instr_rva, uint16_t code16, rva_t* target_rva);
+ static bool WriteT11(rva_t instr_rva, rva_t target_rva, uint16_t* code16);
+
+ static ArmAlign DecodeT20(uint32_t code32, arm_disp_t* disp);
+ static bool EncodeT20(arm_disp_t disp, uint32_t* code32);
+ static bool ReadT20(rva_t instr_rva, uint32_t code32, rva_t* target_rva);
+ static bool WriteT20(rva_t instr_rva, rva_t target_rva, uint32_t* code32);
+
+ static ArmAlign DecodeT24(uint32_t code32, arm_disp_t* disp);
+ static bool EncodeT24(arm_disp_t disp, uint32_t* code32);
+ static bool ReadT24(rva_t instr_rva, uint32_t code32, rva_t* target_rva);
+ static bool WriteT24(rva_t instr_rva, rva_t target_rva, uint32_t* code32);
+
+ // Computes |target_rva| from |instr_rva| and |disp| in ARM mode.
+ static inline rva_t GetArmTargetRvaFromDisp(rva_t instr_rva,
+ arm_disp_t disp,
+ ArmAlign align) {
+ rva_t ret = static_cast<rva_t>(instr_rva + 8 + disp);
+ // Align down.
+ DCHECK_NE(align, kArmAlignFail);
+ return ret - (ret & static_cast<rva_t>(align - 1));
+ }
+
+ // Computes |target_rva| from |instr_rva| and |disp| in THUMB2 mode.
+ static inline rva_t GetThumb2TargetRvaFromDisp(rva_t instr_rva,
+ arm_disp_t disp,
+ ArmAlign align) {
+ rva_t ret = static_cast<rva_t>(instr_rva + 4 + disp);
+ // Align down.
+ DCHECK_NE(align, kArmAlignFail);
+ return ret - (ret & static_cast<rva_t>(align - 1));
+ }
+
+ // Computes |disp| from |instr_rva| and |target_rva| in ARM mode.
+ static inline arm_disp_t GetArmDispFromTargetRva(rva_t instr_rva,
+ rva_t target_rva,
+ ArmAlign align) {
+ // Assumes that |instr_rva + 8| does not overflow.
+ arm_disp_t ret = static_cast<arm_disp_t>(target_rva) -
+ static_cast<arm_disp_t>(instr_rva + 8);
+ // Align up.
+ DCHECK_NE(align, kArmAlignFail);
+ return ret + ((-ret) & static_cast<arm_disp_t>(align - 1));
+ }
+
+ // Computes |disp| from |instr_rva| and |target_rva| in THUMB2 mode.
+ static inline arm_disp_t GetThumb2DispFromTargetRva(rva_t instr_rva,
+ rva_t target_rva,
+ ArmAlign align) {
+ // Assumes that |instr_rva + 4| does not overflow.
+ arm_disp_t ret = static_cast<arm_disp_t>(target_rva) -
+ static_cast<arm_disp_t>(instr_rva + 4);
+ // Align up.
+ DCHECK_NE(align, kArmAlignFail);
+ return ret + ((-ret) & static_cast<arm_disp_t>(align - 1));
+ }
+
+ // Strategies to process each rel32 address type.
+ using AddrTraits_A24 = ArmAddrTraits<AddrType,
+ ADDR_A24,
+ uint32_t,
+ FetchArmCode32,
+ StoreArmCode32,
+ DecodeA24,
+ EncodeA24,
+ ReadA24,
+ WriteA24>;
+ using AddrTraits_T8 = ArmAddrTraits<AddrType,
+ ADDR_T8,
+ uint16_t,
+ FetchThumb2Code16,
+ StoreThumb2Code16,
+ DecodeT8,
+ EncodeT8,
+ ReadT8,
+ WriteT8>;
+ using AddrTraits_T11 = ArmAddrTraits<AddrType,
+ ADDR_T11,
+ uint16_t,
+ FetchThumb2Code16,
+ StoreThumb2Code16,
+ DecodeT11,
+ EncodeT11,
+ ReadT11,
+ WriteT11>;
+ using AddrTraits_T20 = ArmAddrTraits<AddrType,
+ ADDR_T20,
+ uint32_t,
+ FetchThumb2Code32,
+ StoreThumb2Code32,
+ DecodeT20,
+ EncodeT20,
+ ReadT20,
+ WriteT20>;
+ using AddrTraits_T24 = ArmAddrTraits<AddrType,
+ ADDR_T24,
+ uint32_t,
+ FetchThumb2Code32,
+ StoreThumb2Code32,
+ DecodeT24,
+ EncodeT24,
+ ReadT24,
+ WriteT24>;
+};
+
+// Translator for AArch64, which is simpler than 32-bit ARM. Although pointers
+// are 64-bit, displacements are within 32-bit.
+class AArch64Rel32Translator {
+ public:
+ // Rel64 address types enumeration.
+ enum AddrType : uint8_t {
+ ADDR_NONE = 0xFF,
+ ADDR_IMMD14 = 0,
+ ADDR_IMMD19,
+ ADDR_IMMD26,
+ NUM_ADDR_TYPE
+ };
+
+ // Although RVA for 64-bit architecture can be 64-bit in length, we make the
+ // bold assumption that for ELF images that RVA will stay nicely in 32-bit!
+ AArch64Rel32Translator();
+ AArch64Rel32Translator(const AArch64Rel32Translator&) = delete;
+ const AArch64Rel32Translator& operator=(const AArch64Rel32Translator&) =
+ delete;
+
+ static inline uint32_t FetchCode32(ConstBufferView view, offset_t idx) {
+ return view.read<uint32_t>(idx);
+ }
+
+ static inline void StoreCode32(MutableBufferView mutable_view,
+ offset_t idx,
+ uint32_t code) {
+ mutable_view.write<uint32_t>(idx, code);
+ }
+
+ // Conversion functions for |code32| from/to |disp| or |target_rva|, similar
+ // to the counterparts in AArch32Rel32Translator.
+ static ArmAlign DecodeImmd14(uint32_t code32, arm_disp_t* disp);
+ static bool EncodeImmd14(arm_disp_t disp, uint32_t* code32);
+ // TODO(huangs): Refactor the Read*() functions: These are identical
+ // except for Decode*().
+ static bool ReadImmd14(rva_t instr_rva, uint32_t code32, rva_t* target_rva);
+ static bool WriteImmd14(rva_t instr_rva, rva_t target_rva, uint32_t* code32);
+
+ static ArmAlign DecodeImmd19(uint32_t code32, arm_disp_t* disp);
+ static bool EncodeImmd19(arm_disp_t disp, uint32_t* code32);
+ static bool ReadImmd19(rva_t instr_rva, uint32_t code32, rva_t* target_rva);
+ static bool WriteImmd19(rva_t instr_rva, rva_t target_rva, uint32_t* code32);
+
+ static ArmAlign DecodeImmd26(uint32_t code32, arm_disp_t* disp);
+ static bool EncodeImmd26(arm_disp_t disp, uint32_t* code32);
+ static bool ReadImmd26(rva_t instr_rva, uint32_t code32, rva_t* target_rva);
+ static bool WriteImmd26(rva_t instr_rva, rva_t target_rva, uint32_t* code32);
+
+ static inline rva_t GetTargetRvaFromDisp(rva_t instr_rva, arm_disp_t disp) {
+ return static_cast<rva_t>(instr_rva + disp);
+ }
+
+ static inline arm_disp_t GetDispFromTargetRva(rva_t instr_rva,
+ rva_t target_rva) {
+ return static_cast<arm_disp_t>(target_rva - instr_rva);
+ }
+
+ // Strategies to process each rel32 address type.
+ using AddrTraits_Immd14 = ArmAddrTraits<AddrType,
+ ADDR_IMMD14,
+ uint32_t,
+ FetchCode32,
+ StoreCode32,
+ DecodeImmd14,
+ EncodeImmd14,
+ ReadImmd14,
+ WriteImmd14>;
+ using AddrTraits_Immd19 = ArmAddrTraits<AddrType,
+ ADDR_IMMD19,
+ uint32_t,
+ FetchCode32,
+ StoreCode32,
+ DecodeImmd19,
+ EncodeImmd19,
+ ReadImmd19,
+ WriteImmd19>;
+ using AddrTraits_Immd26 = ArmAddrTraits<AddrType,
+ ADDR_IMMD26,
+ uint32_t,
+ FetchCode32,
+ StoreCode32,
+ DecodeImmd26,
+ EncodeImmd26,
+ ReadImmd26,
+ WriteImmd26>;
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_ARM_UTILS_H_
diff --git a/arm_utils_unittest.cc b/arm_utils_unittest.cc
new file mode 100644
index 0000000..8109c92
--- /dev/null
+++ b/arm_utils_unittest.cc
@@ -0,0 +1,862 @@
+// Copyright 2019 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/arm_utils.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <cctype>
+#include <initializer_list>
+#include <map>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "base/check_op.h"
+#include "components/zucchini/address_translator.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+namespace {
+
+// "Clean slate" |code|s for branch instruction encodings with |disp| = 0, and
+// if applicable, |cond| = 0.
+uint32_t kCleanSlateB_A1 = 0x0A000000; // A24.
+uint32_t kCleanSlateBL_A1 = 0x0B000000; // A24.
+uint32_t kCleanSlateBLX_A2 = 0xFA000000; // A24.
+uint16_t kCleanSlateB_T1 = 0xD000; // T8.
+uint16_t kCleanSlateB_T2 = 0xE000; // T11.
+uint32_t kCleanSlateB_T3 = 0xF0008000; // T20.
+// For T24 encodings, |disp| = 0 means J1 = J2 = 1, so include 0x00002800.
+uint32_t kCleanSlateB_T4 = 0xF0009000 | 0x00002800; // T24.
+uint32_t kCleanSlateBL_T1 = 0xF000D000 | 0x00002800; // T24.
+uint32_t kCleanSlateBLX_T2 = 0xF000C000 | 0x00002800; // T24.
+
+// For AArch64.
+uint32_t kCleanSlate64TBZw = 0x36000000; // Immd14.
+uint32_t kCleanSlate64TBZz = 0xB6000000; // Immd14.
+uint32_t kCleanSlate64TBNZw = 0x37000000; // Immd14.
+uint32_t kCleanSlate64TBNZz = 0xB7000000; // Immd14.
+uint32_t kCleanSlate64Bcond = 0x54000000; // Immd19.
+uint32_t kCleanSlate64CBZw = 0x34000000; // Immd19.
+uint32_t kCleanSlate64CBZz = 0xB4000000; // Immd19.
+uint32_t kCleanSlate64CBNZw = 0x35000000; // Immd19.
+uint32_t kCleanSlate64CBNZz = 0xB5000000; // Immd19.
+uint32_t kCleanSlate64B = 0x14000000; // Immd26.
+uint32_t kCleanSlate64BL = 0x94000000; // Immd26.
+
+// Special case: Cond = 0xE => AL.
+uint32_t kCleanSlateBAL_A1 = kCleanSlateB_A1 | (0xE << 28); //
+
+// Test helper: Extracts |components| from |value| (may be |code| or |disp|)
+// based on |pattern|. Also performs consistency checks. On success, writes to
+// |*components| and returns true. Otherwise returns false.
+// Example (all numbers are in binary):
+// |pattern| = "11110Scc cciiiiii 10(J1)0(J2)jjj jjjj...."
+// |value| = 11110111 00111000 10 1 0 0 111 11000101
+// Result: Noting that all 0's and 1's are consistent, returns true with:
+// |*components| = {S: 1, c: 1100, i: 111000, J1: 1, J2: 0, j: 1111100}
+// Rules for |pattern|:
+// * Spaces are ignored.
+// * '.' means "don't care".
+// * '0' and '1' are expected literals; mismatch leads to failure.
+// * A variable name is specified as:
+// * A single letter.
+// * "(var)", where "var" is a name that begins with a letter.
+// * If a variable's first letter is uppercase, then it's a singleton bit.
+// * If repeated, consistency check is applied (must be identical).
+// * If a variable's first letter is lowercase, then it spans multiple bits.
+// * These need not be contiguous, but order is preserved (big-endian).
+static bool SplitBits(const std::string& pattern,
+ uint32_t value,
+ std::map<std::string, uint32_t>* components) {
+ CHECK(components);
+
+ // Split |pattern| into |token_list|.
+ std::vector<std::string> token_list;
+ size_t bracket_start = std::string::npos;
+ for (size_t i = 0; i < pattern.size(); ++i) {
+ char ch = pattern[i];
+ if (bracket_start == std::string::npos) {
+ if (ch == '(')
+ bracket_start = i + 1;
+ else if (ch != ' ') // Ignore space.
+ token_list.push_back(std::string(1, ch));
+ } else if (ch == ')') {
+ token_list.push_back(pattern.substr(bracket_start, i - bracket_start));
+ bracket_start = std::string::npos;
+ }
+ }
+ CHECK_EQ(std::string::npos, bracket_start); // No dangling "(".
+
+ // Process each token.
+ size_t num_tokens = token_list.size();
+ std::map<std::string, uint32_t> temp_components;
+ CHECK(num_tokens == 32 || (num_tokens == 16 && value <= 0xFFFF));
+ for (size_t i = 0; i < num_tokens; ++i) {
+ const std::string& token = token_list[i];
+ CHECK(!token.empty());
+ uint32_t bit = (value >> (num_tokens - 1 - i)) & 1;
+ if (token == "0" || token == "1") {
+ if (token[0] != static_cast<char>('0' + bit))
+ return false; // Fail: Mismatch.
+ } else if (isupper(token[0])) {
+ if (temp_components.count(token)) {
+ if (temp_components[token] != bit)
+ return false; // Fail: Singleton bit not uniform.
+ } else {
+ temp_components[token] = bit;
+ }
+ } else if (islower(token[0])) {
+ temp_components[token] = (temp_components[token] << 1) | bit;
+ } else if (token != ".") {
+ return false; // Fail: Unrecognized token.
+ }
+ }
+ components->swap(temp_components);
+ return true;
+}
+
+// AArch32 or AArch64 instruction specification for tests. May be 16-bit or
+// 32-bit (determined by INT_T).
+template <typename INT_T>
+struct ArmRelInstruction {
+ ArmRelInstruction(const std::string& code_pattern_in, INT_T code)
+ : code_pattern(code_pattern_in), clean_slate_code(code) {}
+
+ // Code pattern for SplitBits().
+ std::string code_pattern;
+
+ // "Clean slate" |code| encodes |disp| = 0.
+ INT_T clean_slate_code;
+};
+
+// Tester for ARM Encode / Decode functions for |disp| <-> |code|.
+template <typename TRAITS>
+class ArmTranslatorEncodeDecodeTest {
+ public:
+ using CODE_T = typename TRAITS::code_t;
+
+ ArmTranslatorEncodeDecodeTest() {}
+
+ // For each instruction (with |clean_slate_code| in |instr_list|) and for each
+ // |disp| in |good_disp_list|, forms |code| with |encode_fun()| and checks for
+ // success. Extracts |disp_out| with |decode_fun()| and checks that it's the
+ // original |disp|. For each (|disp|, |code|) pair, extracts components using
+ // SplitBits(), and checks that components from |toks_list| are identical. For
+ // each |disp| in |bad_disp_list|, checks that |decode_fun_()| fails.
+ void Run(const std::string& disp_pattern,
+ const std::vector<std::string>& toks_list,
+ const std::vector<ArmRelInstruction<CODE_T>>& instr_list,
+ const std::vector<arm_disp_t>& good_disp_list,
+ const std::vector<arm_disp_t>& bad_disp_list) {
+ ArmAlign (*decode_fun)(CODE_T, arm_disp_t*) = TRAITS::Decode;
+ bool (*encode_fun)(arm_disp_t, CODE_T*) = TRAITS::Encode;
+
+ for (const ArmRelInstruction<CODE_T> instr : instr_list) {
+ // Parse clean slate code bytes, and ensure it's well-formed.
+ std::map<std::string, uint32_t> clean_slate_code_components;
+ EXPECT_TRUE(SplitBits(instr.code_pattern, instr.clean_slate_code,
+ &clean_slate_code_components));
+
+ for (arm_disp_t disp : good_disp_list) {
+ CODE_T code = instr.clean_slate_code;
+ // Encode |disp| to |code|.
+ EXPECT_TRUE((*encode_fun)(disp, &code)) << disp;
+ arm_disp_t disp_out = 0;
+
+ // Extract components (performs consistency checks) and compare.
+ std::map<std::string, uint32_t> disp_components;
+ EXPECT_TRUE(SplitBits(disp_pattern, static_cast<uint32_t>(disp),
+ &disp_components));
+ std::map<std::string, uint32_t> code_components;
+ EXPECT_TRUE(SplitBits(instr.code_pattern, code, &code_components));
+ for (const std::string& tok : toks_list) {
+ EXPECT_EQ(1U, disp_components.count(tok)) << tok;
+ EXPECT_EQ(1U, code_components.count(tok)) << tok;
+ EXPECT_EQ(disp_components[tok], code_components[tok]) << tok;
+ }
+
+ // Decode |code| to |disp_out|, check fidelity.
+ EXPECT_NE(kArmAlignFail, (*decode_fun)(code, &disp_out));
+ EXPECT_EQ(disp, disp_out);
+
+ // Sanity check: Re-encode |disp| into |code|, ensure no change.
+ CODE_T code_copy = code;
+ EXPECT_TRUE((*encode_fun)(disp, &code));
+ EXPECT_EQ(code_copy, code);
+
+ // Encode 0, ensure we get clean slate |code| back.
+ EXPECT_TRUE((*encode_fun)(0, &code));
+ EXPECT_EQ(instr.clean_slate_code, code);
+ }
+
+ for (arm_disp_t disp : bad_disp_list) {
+ CODE_T code = instr.clean_slate_code;
+ EXPECT_FALSE((*encode_fun)(disp, &code)) << disp;
+ // Value does not get modified after failure.
+ EXPECT_EQ(instr.clean_slate_code, code);
+ }
+ }
+ }
+};
+
+// Tester for ARM Write / Read functions for |target_rva| <-> |code|.
+template <typename TRAITS>
+class ArmTranslatorWriteReadTest {
+ public:
+ using CODE_T = typename TRAITS::code_t;
+
+ ArmTranslatorWriteReadTest() {}
+
+ // Expects successful Write() to |clean_slate_code| for each |target_rva_list|
+ // RVA, using each |instr_rva_list| RVA, and that the resulting |code| leads
+ // to successful Read(), which recovers |instr_rva|.
+ void Accept(CODE_T clean_slate_code,
+ const std::vector<rva_t>& instr_rva_list,
+ const std::vector<rva_t>& target_rva_list) {
+ bool (*read_fun)(rva_t, CODE_T, rva_t*) = TRAITS::Read;
+ bool (*write_fun)(rva_t, rva_t, CODE_T*) = TRAITS::Write;
+
+ for (rva_t instr_rva : instr_rva_list) {
+ for (rva_t target_rva : target_rva_list) {
+ CODE_T code = clean_slate_code;
+ // Write |target_rva| to |code|.
+ EXPECT_TRUE((*write_fun)(instr_rva, target_rva, &code)) << target_rva;
+ rva_t target_rva_out = kInvalidRva;
+
+ // Read |code| to |target_rva_out|, check fidelity.
+ EXPECT_TRUE((*read_fun)(instr_rva, code, &target_rva_out));
+ EXPECT_EQ(target_rva, target_rva_out);
+
+ // Sanity check: Rewrite |target_rva| into |code|, ensure no change.
+ CODE_T code_copy = code;
+ EXPECT_TRUE((*write_fun)(instr_rva, target_rva, &code));
+ EXPECT_EQ(code_copy, code);
+ }
+ }
+ }
+
+ // Expects failed Write() to |clean_slate_code| for each |target_rva_list|
+ // RVA, using each |instr_rva_list| RVA.
+ void Reject(CODE_T clean_slate_code,
+ const std::vector<rva_t>& instr_rva_list,
+ const std::vector<rva_t>& target_rva_list) {
+ bool (*write_fun)(rva_t, rva_t, CODE_T*) = TRAITS::Write;
+
+ for (rva_t instr_rva : instr_rva_list) {
+ for (rva_t target_rva : target_rva_list) {
+ CODE_T code = clean_slate_code;
+ EXPECT_FALSE((*write_fun)(instr_rva, target_rva, &code)) << target_rva;
+ // Output variable is unmodified after failure.
+ EXPECT_EQ(clean_slate_code, code);
+ }
+ }
+ }
+};
+
+} // namespace
+
+// Test for test helper.
+TEST(ArmUtilsTest, SplitBits) {
+ // If |expected| == "BAD" then we expect failure.
+ auto run_test = [](const std::string& expected, const std::string& pattern,
+ uint32_t value) {
+ std::map<std::string, uint32_t> components;
+ if (expected == "BAD") {
+ EXPECT_FALSE(SplitBits(pattern, value, &components));
+ EXPECT_TRUE(components.empty());
+ } else {
+ EXPECT_TRUE(SplitBits(pattern, value, &components));
+ std::ostringstream oss;
+ // Not using AsHex<>, since number of digits is not fixed.
+ oss << std::uppercase << std::hex;
+ std::string sep = "";
+ for (auto it : components) {
+ oss << sep << it.first << "=" << it.second;
+ sep = ",";
+ }
+ EXPECT_EQ(expected, oss.str());
+ }
+ };
+
+ run_test("a=ABCD0123", "aaaaaaaa aaaaaaaa aaaaaaaa aaaaaaaa", 0xABCD0123);
+ run_test("a=ABCD,b=123", "aaaaaaaa aaaaaaaa bbbbbbbb bbbbbbbb", 0xABCD0123);
+ run_test("a=23,b=1,c=CD,d=AB", "dddddddd cccccccc bbbbbbbb aaaaaaaa",
+ 0xABCD0123);
+ run_test("", "........ ........ ........ ........", 0xABCD0123);
+ run_test("t=AC02", " tttt.... tt tt.... tttt....tttt.... ", 0xABCD0123);
+
+ run_test("a=8,b=C,c=E,d1=F", "aaaabbbb cccc(d1)(d1)(d1)(d1)", 0x8CEF);
+ run_test("a=F,b=7,c=3,d1=1", "abc(d1)abc(d1) abc(d1)abc(d1)", 0x8CEF);
+
+ run_test("A1=0,X=1", "(A1)XX(A1) X(A1)(A1)(A1) (X)(A1)(X)X(X)(X)X(A1)",
+ 0x68BE);
+ run_test("BAD", "(A1)XX(A1) X(A1)(A1)(A1) (X)(A1)(X)X(X)(X)X(A1)", 0x68BF);
+ run_test("BAD", "(A1)XX(A1) X(A1)(A1)(A1) (X)(A1)(X)X(X)(X)X(A1)", 0x683E);
+
+ run_test("A=1,B=0,a=C", "AAAAaaaa BBBB01..", 0xFC06);
+ run_test("A=1,B=0,a=4", "AAAAaaaa BBBB01..", 0xF406);
+ run_test("A=0,B=1,a=C", "AAAAaaaa BBBB01..", 0x0CF5);
+ run_test("BAD", "AAAAaaaa BBBB01..", 0xEC06); // Non-uniform A.
+ run_test("BAD", "AAAAaaaa BBBB01..", 0xFC16); // Non-uniform B.
+ run_test("BAD", "AAAAaaaa BBBB01..", 0xFC02); // Constant mismatch.
+}
+
+TEST(AArch32Rel32Translator, Fetch) {
+ std::vector<uint8_t> bytes = {0x10, 0x32, 0x54, 0x76, 0x98, 0xBA, 0xDC, 0xFE};
+ ConstBufferView region(&bytes[0], bytes.size());
+ AArch32Rel32Translator translator;
+ EXPECT_EQ(0x76543210U, translator.FetchArmCode32(region, 0U));
+ EXPECT_EQ(0xFEDCBA98U, translator.FetchArmCode32(region, 4U));
+
+ EXPECT_EQ(0x3210U, translator.FetchThumb2Code16(region, 0U));
+ EXPECT_EQ(0xFEDCU, translator.FetchThumb2Code16(region, 6U));
+
+ EXPECT_EQ(0x32107654U, translator.FetchThumb2Code32(region, 0U));
+ EXPECT_EQ(0xBA98FEDCU, translator.FetchThumb2Code32(region, 4U));
+}
+
+TEST(AArch32Rel32Translator, Store) {
+ std::vector<uint8_t> expected = {
+ 0xFF, 0xFF, 0xFF, 0xFF, // Padding.
+ 0x10, 0x32, 0x54, 0x76, // ARM 32-bit.
+ 0xFF, 0xFF, // Padding.
+ 0x42, 0x86, // THUMB2 16-bit.
+ 0xFF, 0xFF, // Padding.
+ 0xDC, 0xFE, 0x98, 0xBA, // THUMB2 32-bit.
+ 0xFF, 0xFF, 0xFF, 0xFF // Padding.
+ };
+
+ std::vector<uint8_t> bytes(4 * 2 + 2 * 3 + 4 * 2, 0xFF);
+ MutableBufferView region(&bytes[0], bytes.size());
+ CHECK_EQ(expected.size(), bytes.size());
+
+ AArch32Rel32Translator translator;
+ translator.StoreArmCode32(region, 4U, 0x76543210U);
+ translator.StoreThumb2Code16(region, 10U, 0x8642U);
+ translator.StoreThumb2Code32(region, 14U, 0xFEDCBA98U);
+
+ EXPECT_EQ(expected, bytes);
+}
+
+// Detailed test of Encode/Decode: Check valid and invalid |disp| for various
+// clean slate |code| cases. Also check |disp| and |code| binary components,
+// which in AArch32Rel32Translator comments.
+TEST(AArch32Rel32Translator, EncodeDecode) {
+ // A24 tests.
+ ArmTranslatorEncodeDecodeTest<AArch32Rel32Translator::AddrTraits_A24>
+ test_A24;
+ for (int cond = 0; cond <= 0x0E; ++cond) {
+ ArmRelInstruction<uint32_t> B_A1_cond("cccc1010 Siiiiiii iiiiiiii iiiiiiii",
+ kCleanSlateB_A1 | (cond << 28));
+ ArmRelInstruction<uint32_t> BL_A1_cond(
+ "cccc1011 Siiiiiii iiiiiiii iiiiiiii", kCleanSlateBL_A1 | (cond << 28));
+ test_A24.Run("SSSSSSSi iiiiiiii iiiiiiii iiiiii00", {"S", "i"},
+ {B_A1_cond, BL_A1_cond},
+ {0x01FFFFFC, -0x02000000, 0, 4, -4, 0x40, 0x44},
+ {2, -2, 0x41, 0x42, 0x43, 0x02000000, -0x02000004});
+ }
+ // BLX encoding A2, which has 2-byte alignment.
+ ArmRelInstruction<uint32_t> BLX_A2("1111101H Siiiiiii iiiiiiii iiiiiiii",
+ kCleanSlateBLX_A2);
+ test_A24.Run("SSSSSSSi iiiiiiii iiiiiiii iiiiiiH0", {"S", "i", "H"}, {BLX_A2},
+ {0x01FFFFFC, 0x01FFFFFE, -0x02000000, 0, 2, -2, 4, 0x40, 0x42},
+ {1, -1, 0x41, 0x43, 0x02000000, -0x02000002});
+
+ // T8 tests.
+ ArmTranslatorEncodeDecodeTest<AArch32Rel32Translator::AddrTraits_T8> test_T8;
+ for (int cond = 0; cond <= 0x0E; ++cond) {
+ ArmRelInstruction<uint16_t> B_T1_cond("1101cccc Siiiiiii",
+ kCleanSlateB_T1 | (cond << 8));
+ test_T8.Run("SSSSSSSS SSSSSSSS SSSSSSSS iiiiiii0", {"S", "i"}, {B_T1_cond},
+ {0x00FE, -0x0100, 0, 2, -2, 4, 0x40, 0x42},
+ {1, -1, 0x41, 0x43, 0x0100, -0x0102});
+ }
+ ArmRelInstruction<uint16_t> B_T1_invalid("11011111 ........",
+ kCleanSlateB_T1 | (0x0F << 8));
+ test_T8.Run("........ ........ ........ ........", std::vector<std::string>(),
+ {B_T1_invalid}, std::vector<arm_disp_t>(),
+ {0x00FE, -0x0100, 0, 2, 4, 0x40, 0x41, 0x0100, -0x0102});
+
+ // T11 tests.
+ ArmTranslatorEncodeDecodeTest<AArch32Rel32Translator::AddrTraits_T11>
+ test_T11;
+ ArmRelInstruction<uint16_t> B_T2("11100Sii iiiiiiii", kCleanSlateB_T2);
+ test_T11.Run("SSSSSSSS SSSSSSSS SSSSSiii iiiiiii0", {"S", "i"}, {B_T2},
+ {0x07FE, -0x0800, 0, 2, -2, 4, 0x40, 0x42},
+ {1, -1, 0x41, 0x43, 0x0800, -0x0802});
+
+ // T20 tests.
+ ArmTranslatorEncodeDecodeTest<AArch32Rel32Translator::AddrTraits_T20>
+ test_T20;
+ for (int cond = 0; cond <= 0x0E; ++cond) {
+ ArmRelInstruction<uint32_t> B_T3_cond(
+ "11110Scc cciiiiii 10(J1)0(J2)jjj jjjjjjjj",
+ kCleanSlateB_T3 | (cond << 22));
+ test_T20.Run("SSSSSSSS SSSS(J2)(J1)ii iiiijjjj jjjjjjj0",
+ {"S", "J2", "J1", "i", "j"}, {B_T3_cond},
+ {0x000FFFFE, -0x00100000, 0, 2, -2, 4, 0x40, 0x42},
+ {1, -1, 0x41, 0x43, 0x00100000, -0x00100002});
+ }
+ ArmRelInstruction<uint32_t> B_T3_invalid(
+ "11110.11 11...... 10.0.... ........", kCleanSlateB_T3 | (0x0F << 22));
+ test_T20.Run("........ ........ ........ ........",
+ std::vector<std::string>(), {B_T3_invalid},
+ std::vector<arm_disp_t>(),
+ {0x000FFFFE, -0x00100000, 0, 2, 4, 0x40, 0x42, 1, 0x41, 0x43,
+ 0x00100000, -0x00100002});
+
+ // T24 tests.
+ ArmTranslatorEncodeDecodeTest<AArch32Rel32Translator::AddrTraits_T24>
+ test_T24;
+ // "Clean slate" means J1 = J2 = 1, so we include 0x00002800.
+ ArmRelInstruction<uint32_t> B_T4("11110Sii iiiiiiii 10(J1)1(J2)jjj jjjjjjjj",
+ kCleanSlateB_T4);
+ ArmRelInstruction<uint32_t> BL_T1("11110Sii iiiiiiii 11(J1)1(J2)jjj jjjjjjjj",
+ kCleanSlateBL_T1);
+ test_T24.Run("SSSSSSSS (I1)(I2)iiiiii iiiijjjj jjjjjjj0",
+ {"S", "i", "j"}, // Skip "J1", "J2", "I1", "I2" checks.
+ {B_T4, BL_T1},
+ {0x00FFFFFE, -0x01000000, 0, 2, -2, 4, -4, 0x40, 0x42},
+ {1, -1, 0x41, 0x43, 0x01000000, -0x01000002});
+
+ // For BLX encoding T2, |disp| must be multiple of 4.
+ ArmRelInstruction<uint32_t> BLX_T2(
+ "11110Sii iiiiiiii 11(J1)0(J2)jjj jjjjjjj0", kCleanSlateBLX_T2);
+ test_T24.Run(
+ "SSSSSSSS (I1)(I2)iiiiii iiiijjjj jjjjjj00",
+ {"S", "i", "j"}, // Skip "J1", "J2", "I1", "I2" checks.
+ {BLX_T2}, {0x00FFFFFC, -0x01000000, 0, 4, -4, 0x40},
+ {1, -1, 2, -2, 0x41, 0x42, 0x43, 0x00FFFFFE, 0x01000000, -0x01000002});
+}
+
+TEST(AArch32Rel32Translator, WriteRead) {
+ std::vector<rva_t> aligned4;
+ std::vector<rva_t> misaligned4;
+ std::vector<rva_t> aligned2;
+ std::vector<rva_t> misaligned2;
+ for (rva_t rva = 0x1FFC; rva <= 0x2010; ++rva) {
+ ((rva % 4 == 0) ? aligned4 : misaligned4).push_back(rva);
+ ((rva % 2 == 0) ? aligned2 : misaligned2).push_back(rva);
+ }
+ CHECK_EQ(6U, aligned4.size());
+ CHECK_EQ(15U, misaligned4.size());
+ CHECK_EQ(11U, aligned2.size());
+ CHECK_EQ(10U, misaligned2.size());
+
+ // Helpers to convert an instruction's RVA to PC.
+ auto pcArm = [](rva_t instr_rva) -> rva_t { return instr_rva + 8; };
+ auto pcThumb2 = [](rva_t instr_rva) -> rva_t { return instr_rva + 4; };
+
+ // A24 tests.
+ ArmTranslatorWriteReadTest<AArch32Rel32Translator::AddrTraits_A24> test_A24;
+ for (uint32_t clean_slate_code : {kCleanSlateB_A1, kCleanSlateBL_A1}) {
+ test_A24.Accept(clean_slate_code, aligned4, aligned4);
+ test_A24.Reject(clean_slate_code, aligned4, misaligned4);
+ test_A24.Reject(clean_slate_code, misaligned4, aligned4);
+ test_A24.Reject(clean_slate_code, misaligned4, misaligned4);
+ // Signed (24 + 2)-bit range, 4-byte aligned: [-0x02000000, 0x01FFFFFC].
+ test_A24.Accept(clean_slate_code, {0x15000000},
+ {pcArm(0x13000000), pcArm(0x16FFFFFC)});
+ test_A24.Reject(clean_slate_code, {0x15000000},
+ {pcArm(0x13000000 - 4), pcArm(0x16FFFFFC + 4)});
+ }
+
+ // BLX complication: ARM -> THUMB2.
+ test_A24.Accept(kCleanSlateBLX_A2, aligned4, aligned2);
+ test_A24.Reject(kCleanSlateBLX_A2, aligned4, misaligned2);
+ test_A24.Reject(kCleanSlateBLX_A2, misaligned4, aligned2);
+ test_A24.Reject(kCleanSlateBLX_A2, misaligned4, misaligned2);
+ test_A24.Accept(kCleanSlateBLX_A2, {0x15000000},
+ {pcArm(0x13000000), pcArm(0x16FFFFFE)});
+ test_A24.Reject(kCleanSlateBLX_A2, {0x15000000},
+ {pcArm(0x13000000 - 4), pcArm(0x13000000 - 2),
+ pcArm(0x16FFFFFE + 2), pcArm(0x16FFFFFE + 4)});
+
+ // T8 tests.
+ ArmTranslatorWriteReadTest<AArch32Rel32Translator::AddrTraits_T8> test_T8;
+ test_T8.Accept(kCleanSlateB_T1, aligned2, aligned2);
+ test_T8.Reject(kCleanSlateB_T1, aligned2, misaligned2);
+ test_T8.Reject(kCleanSlateB_T1, misaligned2, aligned2);
+ test_T8.Reject(kCleanSlateB_T1, misaligned2, misaligned2);
+ // Signed (8 + 1)-bit range, 2-byte aligned: [-0x0100, 0x00FE].
+ test_T8.Accept(kCleanSlateB_T1, {0x10000500},
+ {pcThumb2(0x10000400), pcThumb2(0x100005FE)});
+ test_T8.Reject(kCleanSlateB_T1, {0x10000500},
+ {pcThumb2(0x10000400 - 2), pcThumb2(0x100005FE + 2)});
+
+ // T11 tests.
+ ArmTranslatorWriteReadTest<AArch32Rel32Translator::AddrTraits_T11> test_T11;
+ test_T11.Accept(kCleanSlateB_T2, aligned2, aligned2);
+ test_T11.Reject(kCleanSlateB_T2, aligned2, misaligned2);
+ test_T11.Reject(kCleanSlateB_T2, misaligned2, aligned2);
+ test_T11.Reject(kCleanSlateB_T2, misaligned2, misaligned2);
+ // Signed (11 + 1)-bit range, 2-byte aligned: [-0x0800, 0x07FE].
+ test_T11.Accept(kCleanSlateB_T2, {0x10003000},
+ {pcThumb2(0x10002800), pcThumb2(0x100037FE)});
+ test_T11.Reject(kCleanSlateB_T2, {0x10003000},
+ {pcThumb2(0x10002800 - 2), pcThumb2(0x100037FE + 2)});
+
+ // T20 tests.
+ ArmTranslatorWriteReadTest<AArch32Rel32Translator::AddrTraits_T20> test_T20;
+ test_T20.Accept(kCleanSlateB_T3, aligned2, aligned2);
+ test_T20.Reject(kCleanSlateB_T3, aligned2, misaligned2);
+ test_T20.Reject(kCleanSlateB_T3, misaligned2, aligned2);
+ test_T20.Reject(kCleanSlateB_T3, misaligned2, misaligned2);
+ // Signed (20 + 1)-bit range, 2-byte aligned: [-0x00100000, 0x000FFFFE].
+ test_T20.Accept(kCleanSlateB_T3, {0x10300000},
+ {pcThumb2(0x10200000), pcThumb2(0x103FFFFE)});
+ test_T20.Reject(kCleanSlateB_T3, {0x10300000},
+ {pcThumb2(0x10200000 - 2), pcThumb2(0x103FFFFE + 2)});
+
+ // T24 tests.
+ ArmTranslatorWriteReadTest<AArch32Rel32Translator::AddrTraits_T24> test_T24;
+ for (uint32_t clean_slate_code : {kCleanSlateB_T4, kCleanSlateBL_T1}) {
+ test_T24.Accept(clean_slate_code, aligned2, aligned2);
+ test_T24.Reject(clean_slate_code, aligned2, misaligned2);
+ test_T24.Reject(clean_slate_code, misaligned2, aligned2);
+ test_T24.Reject(clean_slate_code, misaligned2, misaligned2);
+ // Signed (24 + 1)-bit range, 2-byte aligned: [-0x01000000, 0x00FFFFFE].
+ test_T24.Accept(clean_slate_code, {0x16000000},
+ {pcThumb2(0x15000000), pcThumb2(0x16FFFFFE)});
+ test_T24.Reject(clean_slate_code, {0x16000000},
+ {pcThumb2(0x15000000 - 2), pcThumb2(0x16FFFFFE + 2)});
+ }
+
+ // BLX complication: THUMB2 -> ARM.
+ test_T24.Accept(kCleanSlateBLX_T2, aligned2, aligned4);
+ test_T24.Reject(kCleanSlateBLX_T2, aligned2, misaligned4);
+ test_T24.Reject(kCleanSlateBLX_T2, misaligned2, aligned4);
+ test_T24.Reject(kCleanSlateBLX_T2, misaligned2, misaligned4);
+ test_T24.Accept(kCleanSlateBLX_T2, {0x16000000},
+ {pcThumb2(0x15000000), pcThumb2(0x16FFFFFC)});
+ test_T24.Reject(kCleanSlateBLX_T2, {0x16000000},
+ {pcThumb2(0x15000000 - 4), pcThumb2(0x15000000 - 2),
+ pcThumb2(0x16FFFFFC + 2), pcThumb2(0x16FFFFFC + 4)});
+}
+
+// Typical usage in |target_rva| extraction.
+TEST(AArch32Rel32Translator, Main) {
+ // ARM mode (32-bit).
+ // 00103050: 00 01 02 EA B 00183458 ; B encoding A1 (cond = AL).
+ {
+ rva_t instr_rva = 0x00103050U;
+ AArch32Rel32Translator translator;
+ std::vector<uint8_t> bytes = {0x00, 0x01, 0x02, 0xEA};
+ MutableBufferView region(&bytes[0], bytes.size());
+ uint32_t code = translator.FetchArmCode32(region, 0U);
+ EXPECT_EQ(0xEA020100U, code);
+
+ // |code| <-> |disp|.
+ arm_disp_t disp = 0;
+ EXPECT_EQ(kArmAlign4, translator.DecodeA24(code, &disp));
+ EXPECT_EQ(+0x00080400, disp);
+
+ uint32_t code_from_disp = kCleanSlateBAL_A1;
+ EXPECT_TRUE(translator.EncodeA24(disp, &code_from_disp));
+ EXPECT_EQ(code, code_from_disp);
+
+ // |code| <-> |target_rva|.
+ rva_t target_rva = kInvalidRva;
+ EXPECT_TRUE(translator.ReadA24(instr_rva, code, &target_rva));
+ // 0x00103050 + 8 + 0x00080400.
+ EXPECT_EQ(0x00183458U, target_rva);
+
+ uint32_t code_from_rva = kCleanSlateBAL_A1;
+ EXPECT_TRUE(translator.WriteA24(instr_rva, target_rva, &code_from_rva));
+ EXPECT_EQ(code, code_from_rva);
+ }
+
+ // THUMB2 mode (16-bit).
+ // 001030A2: F3 E7 B 0010308C ; B encoding T2.
+ {
+ rva_t instr_rva = 0x001030A2U;
+ AArch32Rel32Translator translator;
+ std::vector<uint8_t> bytes = {0xF3, 0xE7};
+ MutableBufferView region(&bytes[0], bytes.size());
+ uint16_t code = translator.FetchThumb2Code16(region, 0U);
+ // Sii iiiiiiii = 111 11110011 = -1101 = -0x0D.
+ EXPECT_EQ(0xE7F3U, code);
+
+ // |code| <-> |disp|.
+ arm_disp_t disp = 0;
+ EXPECT_EQ(kArmAlign2, translator.DecodeT11(code, &disp));
+ EXPECT_EQ(-0x0000001A, disp); // -0x0D * 2 = -0x1A.
+
+ uint16_t code_from_disp = kCleanSlateB_T2;
+ EXPECT_TRUE(translator.EncodeT11(disp, &code_from_disp));
+ EXPECT_EQ(code, code_from_disp);
+
+ // |code| <-> |target_rva|.
+ rva_t target_rva = kInvalidRva;
+ EXPECT_TRUE(translator.ReadT11(instr_rva, code, &target_rva));
+ // 0x001030A2 + 4 - 0x0000001A.
+ EXPECT_EQ(0x0010308CU, target_rva);
+
+ uint16_t code_from_rva = kCleanSlateB_T2;
+ EXPECT_TRUE(translator.WriteT11(instr_rva, target_rva, &code_from_rva));
+ EXPECT_EQ(code, code_from_rva);
+ }
+
+ // THUMB2 mode (32-bit).
+ // 001030A2: 00 F0 01 FA BL 001034A8 ; BL encoding T1.
+ {
+ rva_t instr_rva = 0x001030A2U;
+ AArch32Rel32Translator translator;
+ std::vector<uint8_t> bytes = {0x00, 0xF0, 0x01, 0xFA};
+ MutableBufferView region(&bytes[0], bytes.size());
+ uint32_t code = translator.FetchThumb2Code32(region, 0U);
+ EXPECT_EQ(0xF000FA01U, code);
+
+ // |code| <-> |disp|.
+ arm_disp_t disp = 0;
+ EXPECT_EQ(kArmAlign2, translator.DecodeT24(code, &disp));
+ EXPECT_EQ(+0x00000402, disp);
+
+ uint32_t code_from_disp = kCleanSlateBL_T1;
+ EXPECT_TRUE(translator.EncodeT24(disp, &code_from_disp));
+ EXPECT_EQ(code, code_from_disp);
+
+ // |code| <-> |target_rva|.
+ rva_t target_rva = kInvalidRva;
+ EXPECT_TRUE(translator.ReadT24(instr_rva, code, &target_rva));
+ // 0x001030A2 + 4 + 0x00000002.
+ EXPECT_EQ(0x001034A8U, target_rva);
+
+ uint32_t code_from_rva = kCleanSlateBL_T1;
+ EXPECT_TRUE(translator.WriteT24(instr_rva, target_rva, &code_from_rva));
+ EXPECT_EQ(code, code_from_rva);
+ }
+}
+
+TEST(AArch32Rel32Translator, BLXComplication) {
+ auto run_test = [](rva_t instr_rva,
+ std::vector<uint8_t> bytes, // Pass by value.
+ uint32_t expected_code, arm_disp_t expected_disp,
+ uint32_t clean_slate_code, rva_t expected_target_rva) {
+ AArch32Rel32Translator translator;
+ MutableBufferView region(&bytes[0], bytes.size());
+ uint32_t code = translator.FetchThumb2Code32(region, 0U);
+ EXPECT_EQ(expected_code, code);
+
+ // |code| <-> |disp|.
+ arm_disp_t disp = 0;
+ EXPECT_TRUE(translator.DecodeT24(code, &disp));
+ EXPECT_EQ(expected_disp, disp);
+
+ uint32_t code_from_disp = clean_slate_code;
+ EXPECT_TRUE(translator.EncodeT24(disp, &code_from_disp));
+ EXPECT_EQ(code, code_from_disp);
+
+ // |code| <-> |target_rva|.
+ rva_t target_rva = kInvalidRva;
+ EXPECT_TRUE(translator.ReadT24(instr_rva, code, &target_rva));
+ EXPECT_EQ(expected_target_rva, target_rva);
+
+ uint32_t code_from_rva = clean_slate_code;
+ EXPECT_TRUE(translator.WriteT24(instr_rva, target_rva, &code_from_rva));
+ EXPECT_EQ(code, code_from_rva);
+ };
+
+ // No complication, 4-byte aligned.
+ // 001030A0: 01 F0 06 B0 B 005040B0 ; B encoding T4.
+ run_test(0x001030A0U, // Multiple of 4.
+ {0x01, 0xF0, 0x06, 0xB0}, 0xF001B006U, 0x0040100C, kCleanSlateB_T4,
+ // "Canonical" |target_rva|: 0x001030A0 + 4 + 0x0040100C.
+ 0x005040B0U);
+
+ // No complication, not 4-byte aligned.
+ // 001030A2: 01 F0 06 B0 B 005040B2 ; B encoding T4.
+ run_test(0x001030A2U, // Shift by 2: Not multiple of 4.
+ {0x01, 0xF0, 0x06, 0xB0}, 0xF001B006U, 0x0040100C, kCleanSlateB_T4,
+ // Shifted by 2: 0x001030A2 + 4 + 0x0040100C.
+ 0x005040B2U);
+
+ // Repeat the above, but use BLX instead of B.
+
+ // BLX complication, 4-byte aligned.
+ // 001030A0: 01 F0 06 E0 BLX 005040B0 ; BLX encoding T2.
+ run_test(0x001030A0U, // Multiple of 4.
+ {0x01, 0xF0, 0x06, 0xE0}, 0xF001E006U, 0x0040100C, kCleanSlateBLX_T2,
+ // Canonical again: align_down_4(0x001030A0 + 4 + 0x0040100C).
+ 0x005040B0U);
+
+ // BLX complication, not 4-byte aligned.
+ // 001030A2: 01 F0 06 E0 BLX 005040B0 ; BLX encoding T2.
+ run_test(0x001030A2U, // Shift by 2: Not multiple of 4.
+ {0x01, 0xF0, 0x06, 0xE0}, 0xF001E006U, 0x0040100C, kCleanSlateBLX_T2,
+ // No shift: align_down_4(0x001030A2 + 4 + 0x0040100C).
+ 0x005040B0U);
+}
+
+TEST(AArch64Rel32Translator, FetchStore) {
+ std::vector<uint8_t> bytes = {0x10, 0x32, 0x54, 0x76, 0x98, 0xBA, 0xDC, 0xFE};
+ std::vector<uint8_t> expected = {0xAB, 0x33, 0x22, 0x11,
+ 0x69, 0x5A, 0xFF, 0x00};
+ MutableBufferView region(&bytes[0], bytes.size());
+ AArch64Rel32Translator translator;
+ EXPECT_EQ(0x76543210U, translator.FetchCode32(region, 0U));
+ EXPECT_EQ(0xFEDCBA98U, translator.FetchCode32(region, 4U));
+
+ translator.StoreCode32(region, 0U, 0x112233ABU);
+ translator.StoreCode32(region, 4U, 0x00FF5A69);
+ EXPECT_EQ(expected, bytes);
+}
+
+TEST(AArch64Rel32Translator, EncodeDecode) {
+ // Immd14 tests.
+ ArmTranslatorEncodeDecodeTest<AArch64Rel32Translator::AddrTraits_Immd14>
+ test_immd14;
+ for (int b40 : {0, 1, 7, 31}) {
+ uint32_t b40_mask = b40 << 19;
+ for (int Rt : {0, 1, 15, 30}) {
+ uint32_t mask = b40_mask | Rt;
+ ArmRelInstruction<uint32_t> TBZw_Rt("00110110 bbbbbSii iiiiiiii iiittttt",
+ kCleanSlate64TBZw | mask);
+ ArmRelInstruction<uint32_t> TBZz_Rt("10110110 bbbbbSii iiiiiiii iiittttt",
+ kCleanSlate64TBZz | mask);
+ ArmRelInstruction<uint32_t> TBNZw_Rt(
+ "00110111 bbbbbSii iiiiiiii iiittttt", kCleanSlate64TBNZw | mask);
+ ArmRelInstruction<uint32_t> TBNZz_Rt(
+ "10110111 bbbbbSii iiiiiiii iiittttt", kCleanSlate64TBNZz | mask);
+ test_immd14.Run("SSSSSSSS SSSSSSSS Siiiiiii iiiiii00", {"S", "i"},
+ {TBZw_Rt, TBZz_Rt, TBNZw_Rt, TBNZz_Rt},
+ {0x00007FFC, -0x00008000, 0, 4, -4, 0x40, 0x44},
+ {2, -2, 0x41, 0x42, 0x43, 0x00008000, -0x00008004});
+ }
+ }
+
+ // Immd19 tests.
+ ArmTranslatorEncodeDecodeTest<AArch64Rel32Translator::AddrTraits_Immd19>
+ test_immd19;
+ for (int cond = 0; cond <= 0x0E; ++cond) {
+ ArmRelInstruction<uint32_t> B_cond("01010100 Siiiiiii iiiiiiii iii0cccc",
+ kCleanSlate64Bcond | cond);
+ test_immd19.Run("SSSSSSSS SSSSiiii iiiiiiii iiiiii00", {"S", "i"}, {B_cond},
+ {0x000FFFFC, -0x00100000, 0, 4, -4, 0x40, 0x44},
+ {2, -2, 0x41, 0x42, 0x43, 0x00100000, -0x00100004});
+ }
+ for (int Rt : {0, 1, 15, 30}) {
+ ArmRelInstruction<uint32_t> CBZw_Rt("00110100 Siiiiiii iiiiiiii iiittttt",
+ kCleanSlate64CBZw | Rt);
+ ArmRelInstruction<uint32_t> CBZz_Rt("10110100 Siiiiiii iiiiiiii iiittttt",
+ kCleanSlate64CBZz | Rt);
+ ArmRelInstruction<uint32_t> CBNZw_Rt("00110101 Siiiiiii iiiiiiii iiittttt",
+ kCleanSlate64CBNZw | Rt);
+ ArmRelInstruction<uint32_t> CBNZz_Rt("10110101 Siiiiiii iiiiiiii iiittttt",
+ kCleanSlate64CBNZz | Rt);
+ test_immd19.Run("SSSSSSSS SSSSiiii iiiiiiii iiiiii00", {"S", "i"},
+ {CBZw_Rt, CBZz_Rt, CBNZw_Rt, CBNZz_Rt},
+ {0x000FFFFC, -0x00100000, 0, 4, -4, 0x40, 0x44},
+ {2, -2, 0x41, 0x42, 0x43, 0x00100000, -0x00100004});
+ }
+
+ // Immd26 tests.
+ ArmTranslatorEncodeDecodeTest<AArch64Rel32Translator::AddrTraits_Immd26>
+ test_immd26;
+ ArmRelInstruction<uint32_t> B("000101Si iiiiiiii iiiiiiii iiiiiiii",
+ kCleanSlate64B);
+ ArmRelInstruction<uint32_t> BL("100101Si iiiiiiii iiiiiiii iiiiiiii",
+ kCleanSlate64BL);
+ test_immd26.Run("SSSSSiii iiiiiiii iiiiiiii iiiiii00", {"S", "i"}, {B, BL},
+ {0x07FFFFFC, -0x08000000, 0, 4, -4, 0x40, 0x44},
+ {2, -2, 0x41, 0x42, 0x43, 0x08000000, -0x08000004});
+}
+
+TEST(AArch64Rel32Translator, WriteRead) {
+ std::vector<rva_t> aligned4;
+ std::vector<rva_t> misaligned4;
+ for (rva_t rva = 0x1FFC; rva <= 0x2010; ++rva) {
+ ((rva % 4 == 0) ? aligned4 : misaligned4).push_back(rva);
+ }
+ CHECK_EQ(6U, aligned4.size());
+ CHECK_EQ(15U, misaligned4.size());
+
+ // Helper to convert an instruction's RVA to PC.
+ auto pcAArch64 = [](rva_t instr_rva) -> rva_t { return instr_rva; };
+
+ // Immd14 tests.
+ ArmTranslatorWriteReadTest<AArch64Rel32Translator::AddrTraits_Immd14>
+ test_immd14;
+ for (uint32_t clean_slate_code : {kCleanSlate64TBZw, kCleanSlate64TBZz,
+ kCleanSlate64TBNZw, kCleanSlate64TBNZz}) {
+ test_immd14.Accept(clean_slate_code, aligned4, aligned4);
+ test_immd14.Reject(clean_slate_code, aligned4, misaligned4);
+ test_immd14.Reject(clean_slate_code, misaligned4, aligned4);
+ test_immd14.Reject(clean_slate_code, misaligned4, misaligned4);
+ // Signed (14 + 2)-bit range, 4-byte aligned: [-0x00008000, 0x00007FFC].
+ test_immd14.Accept(clean_slate_code, {0x10040000},
+ {pcAArch64(0x10038000), pcAArch64(0x10047FFC)});
+ test_immd14.Reject(clean_slate_code, {0x15000000},
+ {pcAArch64(0x10038000 - 4), pcAArch64(0x10047FFC + 4)});
+ }
+
+ // Immd19 tests.
+ ArmTranslatorWriteReadTest<AArch64Rel32Translator::AddrTraits_Immd19>
+ test_immd19;
+ for (uint32_t clean_slate_code :
+ {kCleanSlate64Bcond, kCleanSlate64CBZw, kCleanSlate64CBZz,
+ kCleanSlate64CBNZw, kCleanSlate64CBNZz}) {
+ test_immd19.Accept(clean_slate_code, aligned4, aligned4);
+ test_immd19.Reject(clean_slate_code, aligned4, misaligned4);
+ test_immd19.Reject(clean_slate_code, misaligned4, aligned4);
+ test_immd19.Reject(clean_slate_code, misaligned4, misaligned4);
+ // Signed (19 + 2)-bit range, 4-byte aligned: [-0x00100000, 0x000FFFFC].
+ test_immd19.Accept(clean_slate_code, {0x10300000},
+ {pcAArch64(0x10200000), pcAArch64(0x103FFFFC)});
+ test_immd19.Reject(clean_slate_code, {0x10300000},
+ {pcAArch64(0x10200000 - 4), pcAArch64(0x103FFFFC + 4)});
+ }
+
+ // Immd26 tests.
+ ArmTranslatorWriteReadTest<AArch64Rel32Translator::AddrTraits_Immd26>
+ test_immd26;
+ for (uint32_t clean_slate_code : {kCleanSlate64B, kCleanSlate64BL}) {
+ test_immd26.Accept(clean_slate_code, aligned4, aligned4);
+ test_immd26.Reject(clean_slate_code, aligned4, misaligned4);
+ test_immd26.Reject(clean_slate_code, misaligned4, aligned4);
+ test_immd26.Reject(clean_slate_code, misaligned4, misaligned4);
+ // Signed (26 + 2)-bit range, 4-byte aligned: [-0x08000000, 0x07FFFFFC].
+ test_immd26.Accept(clean_slate_code, {0x30000000},
+ {pcAArch64(0x28000000), pcAArch64(0x37FFFFFC)});
+ test_immd26.Reject(clean_slate_code, {0x30000000},
+ {pcAArch64(0x28000000 - 4), pcAArch64(0x37FFFFFC + 4)});
+ }
+}
+
+// Typical usage in |target_rva| extraction.
+TEST(AArch64Rel32Translator, Main) {
+ // 00103050: 02 01 02 14 B 00183458
+ rva_t instr_rva = 0x00103050U;
+ AArch64Rel32Translator translator;
+ std::vector<uint8_t> bytes = {0x02, 0x01, 0x02, 0x14};
+ MutableBufferView region(&bytes[0], bytes.size());
+ uint32_t code = translator.FetchCode32(region, 0U);
+ EXPECT_EQ(0x14020102U, code);
+
+ // |code| <-> |disp|.
+ arm_disp_t disp = 0;
+ EXPECT_TRUE(translator.DecodeImmd26(code, &disp));
+ EXPECT_EQ(+0x00080408, disp);
+
+ uint32_t code_from_disp = kCleanSlate64B;
+ EXPECT_TRUE(translator.EncodeImmd26(disp, &code_from_disp));
+ EXPECT_EQ(code, code_from_disp);
+
+ // |code| <-> |target_rva|.
+ rva_t target_rva = kInvalidRva;
+ EXPECT_TRUE(translator.ReadImmd26(instr_rva, code, &target_rva));
+ // 0x00103050 + 0 + 0x00080408.
+ EXPECT_EQ(0x00183458U, target_rva);
+
+ uint32_t code_from_rva = kCleanSlate64B;
+ EXPECT_TRUE(translator.WriteImmd26(instr_rva, target_rva, &code_from_rva));
+ EXPECT_EQ(code, code_from_rva);
+}
+
+} // namespace zucchini
diff --git a/binary_data_histogram.cc b/binary_data_histogram.cc
new file mode 100644
index 0000000..7f6ece8
--- /dev/null
+++ b/binary_data_histogram.cc
@@ -0,0 +1,91 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/binary_data_histogram.h"
+
+#include <algorithm>
+#include <cmath>
+#include <limits>
+
+#include "base/check_op.h"
+#include "base/format_macros.h"
+#include "base/strings/stringprintf.h"
+
+namespace zucchini {
+
+/******** OutlierDetector ********/
+
+OutlierDetector::OutlierDetector() = default;
+
+OutlierDetector::~OutlierDetector() = default;
+
+// For BinaryDataHistogram, |sample| is typically in interval [0, 1].
+void OutlierDetector::Add(double sample) {
+ ++n_;
+ sum_ += sample;
+ sum_of_squares_ += sample * sample;
+}
+
+void OutlierDetector::Prepare() {
+ if (n_ > 0) {
+ mean_ = sum_ / n_;
+ standard_deviation_ = ::sqrt((sum_of_squares_ - sum_ * mean_) /
+ std::max(static_cast<size_t>(1), n_ - 1));
+ }
+}
+
+std::string OutlierDetector::RenderStats() {
+ return base::StringPrintf("Mean = %.5f, StdDev = %.5f over %" PRIuS
+ " samples",
+ mean_, standard_deviation_, n_);
+}
+
+// Constants are chosen for BinaryDataHistogram, where |sample| is typically in
+// [0, 1].
+int OutlierDetector::DecideOutlier(double sample) {
+ // Lower bound to avoid divide-by-zero and penalizing tight clusters.
+ constexpr double kMinTolerance = 0.1;
+ // Number of standard deviations away from mean for value to become outlier.
+ constexpr double kSigmaBound = 1.9;
+ if (n_ <= 1)
+ return 0;
+ double tolerance = std::max(kMinTolerance, standard_deviation_);
+ double num_sigma = (sample - mean_) / tolerance;
+ return num_sigma > kSigmaBound ? 1 : num_sigma < -kSigmaBound ? -1 : 0;
+}
+
+/******** BinaryDataHistogram ********/
+
+BinaryDataHistogram::BinaryDataHistogram() = default;
+
+BinaryDataHistogram::~BinaryDataHistogram() = default;
+
+bool BinaryDataHistogram::Compute(ConstBufferView region) {
+ DCHECK(!histogram_);
+ // Binary data with size < 2 are invalid.
+ if (region.size() < sizeof(uint16_t))
+ return false;
+ DCHECK_LE(region.size(),
+ static_cast<size_t>(std::numeric_limits<int32_t>::max()));
+
+ histogram_ = std::make_unique<int32_t[]>(kNumBins);
+ size_ = region.size();
+ // Number of 2-byte intervals fully contained in |region|.
+ size_t bound = size_ - sizeof(uint16_t) + 1;
+ for (size_t i = 0; i < bound; ++i)
+ ++histogram_[region.read<uint16_t>(i)];
+ return true;
+}
+
+double BinaryDataHistogram::Distance(const BinaryDataHistogram& other) const {
+ DCHECK(IsValid() && other.IsValid());
+ // Compute Manhattan (L1) distance between respective histograms.
+ double total_diff = 0;
+ for (int i = 0; i < kNumBins; ++i)
+ total_diff += std::abs(histogram_[i] - other.histogram_[i]);
+ // Normalize by total size, so result lies in [0, 1].
+ return total_diff / (size_ + other.size_);
+}
+
+} // namespace zucchini
diff --git a/binary_data_histogram.h b/binary_data_histogram.h
new file mode 100644
index 0000000..201f90a
--- /dev/null
+++ b/binary_data_histogram.h
@@ -0,0 +1,90 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_BINARY_DATA_HISTOGRAM_H_
+#define COMPONENTS_ZUCCHINI_BINARY_DATA_HISTOGRAM_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <memory>
+#include <string>
+
+#include "components/zucchini/buffer_view.h"
+
+namespace zucchini {
+
+// A class to detect outliers in a list of doubles using Chauvenet's criterion:
+// Compute mean and standard deviation of observations, then determine whether
+// a query value lies beyond a fixed number of standard deviations (sigmas) from
+// the mean. The purpose of this test is to reduce the chance of false-positive
+// ensemble matches.
+class OutlierDetector {
+ public:
+ OutlierDetector();
+ OutlierDetector(const OutlierDetector&) = delete;
+ const OutlierDetector& operator=(const OutlierDetector&) = delete;
+ ~OutlierDetector();
+
+ // Incorporates |sample| into mean and standard deviation.
+ void Add(double sample);
+
+ // Prepares basic statistics for DecideOutlier() calls. Should be called after
+ // all samples have been added.
+ void Prepare();
+
+ // Renders current statistics as strings for logging.
+ std::string RenderStats();
+
+ // Heuristically decides whether |sample| is an outlier. Returns 1 if |sample|
+ // is "too high", 0 if |sample| is "normal", and -1 if |sample| is "too low".
+ // Must be called after Prepare().
+ int DecideOutlier(double sample);
+
+ private:
+ size_t n_ = 0;
+ double sum_ = 0;
+ double sum_of_squares_ = 0;
+ double mean_ = 0;
+ double standard_deviation_ = 0;
+};
+
+// A class to compute similarity score between binary data. The heuristic here
+// preprocesses input data to a size-65536 histogram, counting the frequency of
+// consecutive 2-byte sequences. Therefore data with lengths < 2 are considered
+// invalid -- but this is okay for Zucchini's use case.
+class BinaryDataHistogram {
+ public:
+ BinaryDataHistogram();
+ BinaryDataHistogram(const BinaryDataHistogram&) = delete;
+ const BinaryDataHistogram& operator=(const BinaryDataHistogram&) = delete;
+ ~BinaryDataHistogram();
+
+ // Attempts to compute the histogram, returns true iff successful.
+ bool Compute(ConstBufferView region);
+
+ bool IsValid() const { return static_cast<bool>(histogram_); }
+
+ // Returns distance to another histogram (heuristics). If two binaries are
+ // identical then their histogram distance is 0. However, the converse is not
+ // true in general. For example, "aba" and "bab" are different, but their
+ // histogram distance is 0 (both histograms are {"ab": 1, "ba": 1}).
+ double Distance(const BinaryDataHistogram& other) const;
+
+ private:
+ enum { kNumBins = 1 << (sizeof(uint16_t) * 8) };
+ static_assert(kNumBins == 65536, "Incorrect constant computation.");
+
+ // Size, in bytes, of the data over which the histogram was computed.
+ size_t size_ = 0;
+
+ // 2^16 buckets holding counts of all 2-byte sequences in the data. The counts
+ // are stored as signed values to simplify computing the distance between two
+ // histograms.
+ std::unique_ptr<int32_t[]> histogram_;
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_BINARY_DATA_HISTOGRAM_H_
diff --git a/binary_data_histogram_unittest.cc b/binary_data_histogram_unittest.cc
new file mode 100644
index 0000000..ca71010
--- /dev/null
+++ b/binary_data_histogram_unittest.cc
@@ -0,0 +1,132 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/binary_data_histogram.h"
+
+#include <stddef.h>
+
+#include <memory>
+#include <vector>
+
+#include "components/zucchini/buffer_view.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+TEST(OutlierDetectorTest, Basic) {
+ auto make_detector = [](const std::vector<double>& values) {
+ auto detector = std::make_unique<OutlierDetector>();
+ for (double v : values)
+ detector->Add(v);
+ detector->Prepare();
+ return detector;
+ };
+
+ std::unique_ptr<OutlierDetector> detector;
+ // No data: Should at least not cause error.
+ detector = make_detector({});
+ EXPECT_EQ(0, detector->DecideOutlier(0.0));
+ // Single point: Trivially inert.
+ detector = make_detector({0.5});
+ EXPECT_EQ(0, detector->DecideOutlier(0.1));
+ EXPECT_EQ(0, detector->DecideOutlier(0.5));
+ EXPECT_EQ(0, detector->DecideOutlier(0.9));
+ // Two identical points: StdDev is 0, so falls back to built-in tolerance.
+ detector = make_detector({0.5, 0.5});
+ EXPECT_EQ(-1, detector->DecideOutlier(0.3));
+ EXPECT_EQ(0, detector->DecideOutlier(0.499));
+ EXPECT_EQ(0, detector->DecideOutlier(0.5));
+ EXPECT_EQ(0, detector->DecideOutlier(0.501));
+ EXPECT_EQ(1, detector->DecideOutlier(0.7));
+ // Two separate points: Outliner test is pretty lax.
+ detector = make_detector({0.4, 0.6});
+ EXPECT_EQ(-1, detector->DecideOutlier(0.2));
+ EXPECT_EQ(0, detector->DecideOutlier(0.3));
+ EXPECT_EQ(0, detector->DecideOutlier(0.5));
+ EXPECT_EQ(0, detector->DecideOutlier(0.7));
+ EXPECT_EQ(1, detector->DecideOutlier(0.8));
+ // Sharpen distribution by clustering toward norm: Now test is stricter.
+ detector = make_detector({0.4, 0.47, 0.48, 0.49, 0.50, 0.51, 0.52, 0.6});
+ EXPECT_EQ(-1, detector->DecideOutlier(0.3));
+ EXPECT_EQ(0, detector->DecideOutlier(0.4));
+ EXPECT_EQ(0, detector->DecideOutlier(0.5));
+ EXPECT_EQ(0, detector->DecideOutlier(0.6));
+ EXPECT_EQ(1, detector->DecideOutlier(0.7));
+ // Shift numbers around: Mean is 0.3, and data order scrambled.
+ detector = make_detector({0.28, 0.2, 0.31, 0.4, 0.29, 0.32, 0.27, 0.30});
+ EXPECT_EQ(-1, detector->DecideOutlier(0.0));
+ EXPECT_EQ(-1, detector->DecideOutlier(0.1));
+ EXPECT_EQ(0, detector->DecideOutlier(0.2));
+ EXPECT_EQ(0, detector->DecideOutlier(0.3));
+ EXPECT_EQ(0, detector->DecideOutlier(0.4));
+ EXPECT_EQ(1, detector->DecideOutlier(0.5));
+ EXPECT_EQ(1, detector->DecideOutlier(1.0));
+ // Typical usage: Potential outlier would be part of original input data!
+ detector = make_detector({0.3, 0.29, 0.31, 0.0, 0.3, 0.32, 0.3, 0.29, 0.6});
+ EXPECT_EQ(-1, detector->DecideOutlier(0.0));
+ EXPECT_EQ(0, detector->DecideOutlier(0.28));
+ EXPECT_EQ(0, detector->DecideOutlier(0.29));
+ EXPECT_EQ(0, detector->DecideOutlier(0.3));
+ EXPECT_EQ(0, detector->DecideOutlier(0.31));
+ EXPECT_EQ(0, detector->DecideOutlier(0.32));
+ EXPECT_EQ(1, detector->DecideOutlier(0.6));
+}
+
+TEST(BinaryDataHistogramTest, Basic) {
+ constexpr double kUninitScore = -1;
+
+ constexpr uint8_t kTestData[] = {2, 137, 42, 0, 0, 0, 7, 11, 1, 11, 255};
+ const size_t n = sizeof(kTestData);
+ ConstBufferView region(kTestData, n);
+
+ std::vector<BinaryDataHistogram> prefix_histograms(n + 1); // Short to long.
+ std::vector<BinaryDataHistogram> suffix_histograms(n + 1); // Long to short.
+
+ for (size_t i = 0; i <= n; ++i) {
+ ConstBufferView prefix(region.begin(), i);
+ ConstBufferView suffix(region.begin() + i, n - i);
+ // If regions are smaller than 2 bytes then it is invalid. Else valid.
+ EXPECT_EQ(prefix.size() >= 2, prefix_histograms[i].Compute(prefix));
+ EXPECT_EQ(suffix.size() >= 2, suffix_histograms[i].Compute(suffix));
+ // IsValid() returns the same results.
+ EXPECT_EQ(prefix.size() >= 2, prefix_histograms[i].IsValid());
+ EXPECT_EQ(suffix.size() >= 2, suffix_histograms[i].IsValid());
+ }
+
+ // Full-prefix = full-suffix = full data.
+ EXPECT_EQ(0.0, prefix_histograms[n].Distance(suffix_histograms[0]));
+ EXPECT_EQ(0.0, suffix_histograms[0].Distance(prefix_histograms[n]));
+
+ // Testing heuristics without overreliance on implementation details.
+
+ // Strict prefixes, in increasing size. Compare against full data.
+ double prev_prefix_score = kUninitScore;
+ for (size_t i = 2; i < n; ++i) {
+ double score = prefix_histograms[i].Distance(prefix_histograms[n]);
+ // Positivity.
+ EXPECT_GT(score, 0.0);
+ // Symmetry.
+ EXPECT_EQ(score, prefix_histograms[n].Distance(prefix_histograms[i]));
+ // Distance should decrease as prefix gets nearer to full data.
+ if (prev_prefix_score != kUninitScore)
+ EXPECT_LT(score, prev_prefix_score);
+ prev_prefix_score = score;
+ }
+
+ // Strict suffixes, in decreasing size. Compare against full data.
+ double prev_suffix_score = -1;
+ for (size_t i = 1; i <= n - 2; ++i) {
+ double score = suffix_histograms[i].Distance(suffix_histograms[0]);
+ // Positivity.
+ EXPECT_GT(score, 0.0);
+ // Symmetry.
+ EXPECT_EQ(score, suffix_histograms[0].Distance(suffix_histograms[i]));
+ // Distance should increase as suffix gets farther from full data.
+ if (prev_suffix_score != kUninitScore)
+ EXPECT_GT(score, prev_suffix_score);
+ prev_suffix_score = score;
+ }
+}
+
+} // namespace zucchini
diff --git a/buffer_sink.cc b/buffer_sink.cc
new file mode 100644
index 0000000..5b89e3a
--- /dev/null
+++ b/buffer_sink.cc
@@ -0,0 +1,11 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/buffer_sink.h"
+
+namespace zucchini {
+
+BufferSink::BufferSink(MutableBufferView buffer) : MutableBufferView(buffer) {}
+
+} // namespace zucchini
diff --git a/buffer_sink.h b/buffer_sink.h
new file mode 100644
index 0000000..24798af
--- /dev/null
+++ b/buffer_sink.h
@@ -0,0 +1,68 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_BUFFER_SINK_H_
+#define COMPONENTS_ZUCCHINI_BUFFER_SINK_H_
+
+#include <stdint.h>
+
+#include <algorithm>
+#include <iterator>
+
+#include "base/check_op.h"
+#include "components/zucchini/buffer_view.h"
+
+namespace zucchini {
+
+// BufferSink acts like an output stream with convenience methods to serialize
+// data into a contiguous sequence of raw data. The underlying MutableBufferView
+// emulates a cursor to track current write position, and guards against buffer
+// overrun. Where applicable, BufferSink should be passed by pointer to maintain
+// cursor progress across writes.
+class BufferSink : public MutableBufferView {
+ public:
+ using iterator = MutableBufferView::iterator;
+
+ using MutableBufferView::MutableBufferView;
+ BufferSink() = default;
+ explicit BufferSink(MutableBufferView buffer);
+ BufferSink(const BufferSink&) = default;
+ BufferSink& operator=(BufferSink&&) = default;
+
+ // If sufficient space is available, writes the binary representation of
+ // |value| starting at the cursor, while advancing the cursor beyond the
+ // written region, and returns true. Otherwise returns false.
+ template <class T>
+ bool PutValue(const T& value) {
+ DCHECK_NE(begin(), nullptr);
+ if (Remaining() < sizeof(T))
+ return false;
+ *reinterpret_cast<T*>(begin()) = value;
+ remove_prefix(sizeof(T));
+ return true;
+ }
+
+ // If sufficient space is available, writes the raw bytes [|first|, |last|)
+ // starting at the cursor, while advancing the cursor beyond the written
+ // region, and returns true. Otherwise returns false.
+ template <class It>
+ bool PutRange(It first, It last) {
+ static_assert(sizeof(typename std::iterator_traits<It>::value_type) ==
+ sizeof(uint8_t),
+ "value_type should fit in uint8_t");
+ DCHECK_NE(begin(), nullptr);
+ DCHECK(last >= first);
+ if (Remaining() < size_type(last - first))
+ return false;
+ std::copy(first, last, begin());
+ remove_prefix(last - first);
+ return true;
+ }
+
+ size_type Remaining() const { return size(); }
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_BUFFER_SINK_H_
diff --git a/buffer_sink_unittest.cc b/buffer_sink_unittest.cc
new file mode 100644
index 0000000..33b788e
--- /dev/null
+++ b/buffer_sink_unittest.cc
@@ -0,0 +1,71 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/buffer_sink.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+constexpr uint8_t kUninit = 0xFF;
+
+class BufferSinkTest : public testing::Test {
+ protected:
+ BufferSinkTest()
+ : buffer_(10, kUninit), sink_(buffer_.data(), buffer_.size()) {}
+
+ std::vector<uint8_t> buffer_;
+ BufferSink sink_;
+};
+
+TEST_F(BufferSinkTest, PutValue) {
+ EXPECT_EQ(size_t(10), sink_.Remaining());
+
+ EXPECT_TRUE(sink_.PutValue(uint32_t(0x76543210)));
+ EXPECT_EQ(size_t(6), sink_.Remaining());
+
+ EXPECT_TRUE(sink_.PutValue(uint32_t(0xFEDCBA98)));
+ EXPECT_EQ(size_t(2), sink_.Remaining());
+
+ EXPECT_FALSE(sink_.PutValue(uint32_t(0x00)));
+ EXPECT_EQ(size_t(2), sink_.Remaining());
+
+ EXPECT_TRUE(sink_.PutValue(uint16_t(0x0010)));
+ EXPECT_EQ(size_t(0), sink_.Remaining());
+
+ // Assuming little-endian architecture.
+ EXPECT_EQ(std::vector<uint8_t>(
+ {0x10, 0x32, 0x54, 0x76, 0x98, 0xBA, 0xDC, 0xFE, 0x10, 0x00}),
+ buffer_);
+}
+
+TEST_F(BufferSinkTest, PutRange) {
+ std::vector<uint8_t> range = {0x10, 0x32, 0x54, 0x76, 0x98, 0xBA,
+ 0xDC, 0xFE, 0x10, 0x00, 0x42};
+
+ EXPECT_EQ(size_t(10), sink_.Remaining());
+ EXPECT_FALSE(sink_.PutRange(range.begin(), range.end()));
+ EXPECT_EQ(size_t(10), sink_.Remaining());
+
+ EXPECT_TRUE(sink_.PutRange(range.begin(), range.begin() + 8));
+ EXPECT_EQ(size_t(2), sink_.Remaining());
+ EXPECT_EQ(std::vector<uint8_t>({0x10, 0x32, 0x54, 0x76, 0x98, 0xBA, 0xDC,
+ 0xFE, kUninit, kUninit}),
+ buffer_);
+
+ EXPECT_FALSE(sink_.PutRange(range.begin(), range.begin() + 4));
+ EXPECT_EQ(size_t(2), sink_.Remaining());
+
+ // range is not written
+ EXPECT_EQ(std::vector<uint8_t>({0x10, 0x32, 0x54, 0x76, 0x98, 0xBA, 0xDC,
+ 0xFE, kUninit, kUninit}),
+ buffer_);
+}
+
+} // namespace zucchini
diff --git a/buffer_source.cc b/buffer_source.cc
new file mode 100644
index 0000000..d72d329
--- /dev/null
+++ b/buffer_source.cc
@@ -0,0 +1,105 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/buffer_source.h"
+
+#include <algorithm>
+
+#include "components/zucchini/algorithm.h"
+
+namespace zucchini {
+
+BufferSource::BufferSource(ConstBufferView buffer) : ConstBufferView(buffer) {}
+
+BufferSource& BufferSource::Skip(size_type n) {
+ remove_prefix(std::min(n, Remaining()));
+ return *this;
+}
+
+bool BufferSource::CheckNextBytes(std::initializer_list<uint8_t> bytes) const {
+ if (Remaining() < bytes.size())
+ return false;
+ return std::mismatch(bytes.begin(), bytes.end(), begin()).first ==
+ bytes.end();
+}
+
+bool BufferSource::ConsumeBytes(std::initializer_list<uint8_t> bytes) {
+ if (!CheckNextBytes(bytes))
+ return false;
+ remove_prefix(bytes.size());
+ return true;
+}
+
+bool BufferSource::GetRegion(size_type count, ConstBufferView* buffer) {
+ DCHECK_NE(begin(), nullptr);
+ if (Remaining() < count)
+ return false;
+ *buffer = ConstBufferView(begin(), count);
+ remove_prefix(count);
+ return true;
+}
+
+// [0aaaaaaa] => 00000000'00000000'00000000'0aaaaaaa
+// [1aaaaaaa 0bbbbbbb] => 00000000'00000000'00bbbbbb'baaaaaaa
+// [1aaaaaaa 1bbbbbbb 0ccccccc] => 00000000'000ccccc'ccbbbbbb'baaaaaaa
+// [1aaaaaaa 1bbbbbbb 1ccccccc 0ddddddd] => 0000dddd'dddccccc'ccbbbbbb'baaaaaaa
+// [1aaaaaaa 1bbbbbbb 1ccccccc 1ddddddd 0???eeee]
+// => eeeedddd'dddccccc'ccbbbbbb'baaaaaaa
+// Note that "???" is discarded. Meanwhile, 1???eeee is invalid.
+bool BufferSource::GetUleb128(uint32_t* ret) {
+ int shift_lim =
+ static_cast<int>(std::min<size_type>(kMaxLeb128Size, size())) * 7;
+ const_iterator cur = cbegin();
+ uint32_t value = 0U;
+ for (int shift = 0; shift < shift_lim; shift += 7, ++cur) {
+ uint32_t b = *cur;
+ // When |shift == 28|, |(b & 0x7F) << shift| discards the "???" bits.
+ value |= static_cast<uint32_t>(b & 0x7F) << shift;
+ if (!(b & 0x80)) {
+ *ret = value;
+ seek(cur + 1);
+ return true;
+ }
+ }
+ return false;
+}
+
+// [0Saaaaaa] => SSSSSSSS'SSSSSSSS'SSSSSSSS'SSaaaaaa
+// [1aaaaaaa 0Sbbbbbb] => SSSSSSSS'SSSSSSSS'SSSbbbbb'baaaaaaa
+// [1aaaaaaa 1bbbbbbb 0Scccccc] => SSSSSSSS'SSSScccc'ccbbbbbb'baaaaaaa
+// [1aaaaaaa 1bbbbbbb 1ccccccc 0Sdddddd] => SSSSSddd'dddccccc'ccbbbbbb'baaaaaaa
+// [1aaaaaaa 1bbbbbbb 1ccccccc 1ddddddd 0???Seee]
+// => Seeedddd'dddccccc'ccbbbbbb'baaaaaaa
+// Note that "???" is discarded. Meanwhile, 1???eeee is invalid.
+bool BufferSource::GetSleb128(int32_t* ret) {
+ int shift_lim =
+ static_cast<int>(std::min<size_type>(kMaxLeb128Size, size())) * 7;
+ const_iterator cur = cbegin();
+ int32_t value = 0;
+ for (int shift = 0; shift < shift_lim; shift += 7, ++cur) {
+ uint32_t b = *cur;
+ // When |shift == 28|, |(b & 0x7F) << shift| discards the "???" bits.
+ value |= static_cast<int32_t>(static_cast<uint32_t>(b & 0x7F) << shift);
+ if (!(b & 0x80)) {
+ *ret = (shift == 28) ? value : SignExtend(shift + 6, value);
+ seek(cur + 1);
+ return true;
+ }
+ }
+ return false;
+}
+
+bool BufferSource::SkipLeb128() {
+ int lim = static_cast<int>(std::min<size_type>(kMaxLeb128Size, size()));
+ const_iterator cur = cbegin();
+ for (int i = 0; i < lim; ++i, ++cur) {
+ if (!(*cur & 0x80)) {
+ seek(cur + 1);
+ return true;
+ }
+ }
+ return false;
+}
+
+} // namespace zucchini
diff --git a/buffer_source.h b/buffer_source.h
new file mode 100644
index 0000000..7426d4e
--- /dev/null
+++ b/buffer_source.h
@@ -0,0 +1,141 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_BUFFER_SOURCE_H_
+#define COMPONENTS_ZUCCHINI_BUFFER_SOURCE_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <initializer_list>
+#include <type_traits>
+
+#include "base/check_op.h"
+#include "components/zucchini/buffer_view.h"
+
+namespace zucchini {
+
+// BufferSource acts like an input stream with convenience methods to parse data
+// from a contiguous sequence of raw data. The underlying ConstBufferView
+// emulates a cursor to track current read position, and guards against buffer
+// overrun. Where applicable, BufferSource should be passed by pointer to
+// maintain cursor progress across reads.
+class BufferSource : public ConstBufferView {
+ public:
+ // LEB128 info: http://dwarfstd.org/doc/dwarf-2.0.0.pdf , Section 7.6.
+ enum : size_t { kMaxLeb128Size = 5 };
+
+ static BufferSource FromRange(const_iterator first, const_iterator last) {
+ return BufferSource(ConstBufferView::FromRange(first, last));
+ }
+
+ using ConstBufferView::ConstBufferView;
+ BufferSource() = default;
+ explicit BufferSource(ConstBufferView buffer);
+ BufferSource(const BufferSource&) = default;
+ BufferSource& operator=(BufferSource&&) = default;
+
+ // Moves the cursor forward by |n| bytes, or to the end if data is exhausted.
+ // Returns a reference to *this, to allow chaining, e.g.:
+ // if (!buffer_source.Skip(1024).GetValue<uint32_t>(&value)) {
+ // ... // Handle error.
+ // }
+ // Notice that Skip() defers error handling to GetValue().
+ BufferSource& Skip(size_type n);
+
+ // Returns true if |value| matches data starting at the cursor when
+ // reinterpreted as the integral type |T|.
+ template <class T>
+ bool CheckNextValue(const T& value) const {
+ static_assert(std::is_integral<T>::value,
+ "Value type must be an integral type");
+ DCHECK_NE(begin(), nullptr);
+ if (Remaining() < sizeof(T))
+ return false;
+ return value == *reinterpret_cast<const T*>(begin());
+ }
+
+ // Returns true if the next bytes.size() bytes at the cursor match those in
+ // |bytes|.
+ bool CheckNextBytes(std::initializer_list<uint8_t> bytes) const;
+
+ // Same as CheckNextBytes(), but moves the cursor by bytes.size() if read is
+ // successfull.
+ bool ConsumeBytes(std::initializer_list<uint8_t> bytes);
+
+ // Tries to reinterpret data as type |T|, starting at the cursor and to write
+ // the result into |value|, while moving the cursor forward by sizeof(T).
+ // Returns true if sufficient data is available, and false otherwise.
+ template <class T>
+ bool GetValue(T* value) {
+ static_assert(std::is_standard_layout<T>::value,
+ "Value type must be a standard layout type");
+
+ DCHECK_NE(begin(), nullptr);
+ if (Remaining() < sizeof(T))
+ return false;
+ *value = *reinterpret_cast<const T*>(begin());
+ remove_prefix(sizeof(T));
+ return true;
+ }
+
+ // Tries to reinterpret data as type |T| at the cursor and to return a
+ // reinterpreted pointer of type |T| pointing into the underlying data, while
+ // moving the cursor forward by sizeof(T). Returns nullptr if insufficient
+ // data is available.
+ template <class T>
+ const T* GetPointer() {
+ static_assert(std::is_standard_layout<T>::value,
+ "Value type must be a standard layout type");
+
+ DCHECK_NE(begin(), nullptr);
+ if (Remaining() < sizeof(T))
+ return nullptr;
+ const T* ptr = reinterpret_cast<const T*>(begin());
+ remove_prefix(sizeof(T));
+ return ptr;
+ }
+
+ // Tries to reinterpret data as an array of type |T| with |count| elements,
+ // starting at the cursor, and to return a reinterpreted pointer of type |T|
+ // pointing into the underlying data, while advancing the cursor beyond the
+ // array. Returns nullptr if insufficient data is available.
+ template <class T>
+ const T* GetArray(size_t count) {
+ static_assert(std::is_standard_layout<T>::value,
+ "Value type must be a standard layout type");
+
+ if (Remaining() / sizeof(T) < count)
+ return nullptr;
+ const T* array = reinterpret_cast<const T*>(begin());
+ remove_prefix(count * sizeof(T));
+ return array;
+ }
+
+ // If sufficient data is available, assigns |buffer| to point to a region of
+ // |size| bytes starting at the cursor, while advancing the cursor beyond the
+ // region, and returns true. Otherwise returns false.
+ bool GetRegion(size_type size, ConstBufferView* buffer);
+
+ // Reads an Unsigned Little Endian Base 128 (uleb128) int at |first_|. If
+ // successful, writes the result to |value|, advances |first_|, and returns
+ // true. Otherwise returns false.
+ bool GetUleb128(uint32_t* value);
+
+ // Reads a Signed Little Endian Base 128 (sleb128) int at |first_|. If
+ // successful, writes the result to |value|, advances |first_|, and returns
+ // true. Otherwise returns false.
+ bool GetSleb128(int32_t* value);
+
+ // Reads uleb128 / sleb128 at |first_| but discards the result. If successful,
+ // advances |first_| and returns true. Otherwise returns false.
+ bool SkipLeb128();
+
+ // Returns the number of bytes remaining from cursor until end.
+ size_type Remaining() const { return size(); }
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_BUFFER_SOURCE_H_
diff --git a/buffer_source_unittest.cc b/buffer_source_unittest.cc
new file mode 100644
index 0000000..8cb8b3e
--- /dev/null
+++ b/buffer_source_unittest.cc
@@ -0,0 +1,347 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/buffer_source.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <iterator>
+#include <string>
+#include <tuple>
+#include <vector>
+
+#include "components/zucchini/test_utils.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+using vec = std::vector<uint8_t>;
+
+class BufferSourceTest : public testing::Test {
+ protected:
+ std::vector<uint8_t> bytes_ = ParseHexString("10 32 54 76 98 BA DC FE 10 00");
+
+ BufferSource source_ = {bytes_.data(), bytes_.size()};
+};
+
+TEST_F(BufferSourceTest, Skip) {
+ EXPECT_EQ(bytes_.size(), source_.Remaining());
+ source_.Skip(2);
+ EXPECT_EQ(bytes_.size() - 2, source_.Remaining());
+ source_.Skip(10); // Skipping past end just moves cursor to end.
+ EXPECT_EQ(size_t(0), source_.Remaining());
+}
+
+TEST_F(BufferSourceTest, CheckNextBytes) {
+ EXPECT_TRUE(source_.CheckNextBytes({0x10, 0x32, 0x54, 0x76}));
+ source_.Skip(4);
+ EXPECT_TRUE(source_.CheckNextBytes({0x98, 0xBA, 0xDC, 0xFE}));
+
+ // Cursor has not advanced, so check fails.
+ EXPECT_FALSE(source_.CheckNextBytes({0x10, 0x00}));
+
+ source_.Skip(4);
+ EXPECT_EQ(size_t(2), source_.Remaining());
+
+ // Goes beyond end by 2 bytes.
+ EXPECT_FALSE(source_.CheckNextBytes({0x10, 0x00, 0x00, 0x00}));
+ EXPECT_EQ(size_t(2), source_.Remaining());
+}
+
+TEST_F(BufferSourceTest, ConsumeBytes) {
+ EXPECT_FALSE(source_.ConsumeBytes({0x10, 0x00}));
+ EXPECT_EQ(bytes_.size(), source_.Remaining());
+ EXPECT_TRUE(source_.ConsumeBytes({0x10, 0x32, 0x54, 0x76}));
+ EXPECT_EQ(size_t(6), source_.Remaining());
+ EXPECT_TRUE(source_.ConsumeBytes({0x98, 0xBA, 0xDC, 0xFE}));
+ EXPECT_EQ(size_t(2), source_.Remaining());
+
+ // Goes beyond end by 2 bytes.
+ EXPECT_FALSE(source_.ConsumeBytes({0x10, 0x00, 0x00, 0x00}));
+ EXPECT_EQ(size_t(2), source_.Remaining());
+}
+
+TEST_F(BufferSourceTest, CheckNextValue) {
+ EXPECT_TRUE(source_.CheckNextValue(uint32_t(0x76543210)));
+ EXPECT_FALSE(source_.CheckNextValue(uint32_t(0x0)));
+ EXPECT_TRUE(source_.CheckNextValue(uint64_t(0xFEDCBA9876543210)));
+ EXPECT_FALSE(source_.CheckNextValue(uint64_t(0x0)));
+
+ source_.Skip(8);
+ EXPECT_EQ(size_t(2), source_.Remaining());
+
+ // Goes beyond end by 2 bytes.
+ EXPECT_FALSE(source_.CheckNextValue(uint32_t(0x1000)));
+}
+
+// Supported by MSVC, g++, and clang++.
+// Ensures no gaps in packing.
+#pragma pack(push, 1)
+struct ValueType {
+ uint32_t a;
+ uint16_t b;
+};
+#pragma pack(pop)
+
+TEST_F(BufferSourceTest, GetValueIntegral) {
+ uint32_t value = 0;
+ EXPECT_TRUE(source_.GetValue(&value));
+ EXPECT_EQ(uint32_t(0x76543210), value);
+ EXPECT_EQ(size_t(6), source_.Remaining());
+
+ EXPECT_TRUE(source_.GetValue(&value));
+ EXPECT_EQ(uint32_t(0xFEDCBA98), value);
+ EXPECT_EQ(size_t(2), source_.Remaining());
+
+ EXPECT_FALSE(source_.GetValue(&value));
+ EXPECT_EQ(size_t(2), source_.Remaining());
+}
+
+TEST_F(BufferSourceTest, GetValueAggregate) {
+ ValueType value = {};
+ EXPECT_TRUE(source_.GetValue(&value));
+ EXPECT_EQ(uint32_t(0x76543210), value.a);
+ EXPECT_EQ(uint32_t(0xBA98), value.b);
+ EXPECT_EQ(size_t(4), source_.Remaining());
+}
+
+TEST_F(BufferSourceTest, GetRegion) {
+ ConstBufferView region;
+ EXPECT_TRUE(source_.GetRegion(0, &region));
+ EXPECT_EQ(bytes_.size(), source_.Remaining());
+ EXPECT_TRUE(region.empty());
+
+ EXPECT_TRUE(source_.GetRegion(2, &region));
+ EXPECT_EQ(size_t(2), region.size());
+ EXPECT_EQ(vec({0x10, 0x32}), vec(region.begin(), region.end()));
+ EXPECT_EQ(size_t(8), source_.Remaining());
+
+ EXPECT_FALSE(source_.GetRegion(bytes_.size(), &region));
+ EXPECT_EQ(size_t(8), source_.Remaining());
+ // |region| is left untouched.
+ EXPECT_EQ(vec({0x10, 0x32}), vec(region.begin(), region.end()));
+ EXPECT_EQ(size_t(2), region.size());
+}
+
+TEST_F(BufferSourceTest, GetPointerIntegral) {
+ const uint32_t* ptr = source_.GetPointer<uint32_t>();
+ EXPECT_NE(nullptr, ptr);
+ EXPECT_EQ(uint32_t(0x76543210), *ptr);
+ EXPECT_EQ(size_t(6), source_.Remaining());
+
+ ptr = source_.GetPointer<uint32_t>();
+ EXPECT_NE(nullptr, ptr);
+ EXPECT_EQ(uint32_t(0xFEDCBA98), *ptr);
+ EXPECT_EQ(size_t(2), source_.Remaining());
+
+ EXPECT_EQ(nullptr, source_.GetPointer<uint32_t>());
+ EXPECT_EQ(size_t(2), source_.Remaining());
+}
+
+TEST_F(BufferSourceTest, GetPointerAggregate) {
+ const ValueType* ptr = source_.GetPointer<ValueType>();
+ EXPECT_NE(nullptr, ptr);
+ EXPECT_EQ(uint32_t(0x76543210), ptr->a);
+ EXPECT_EQ(uint32_t(0xBA98), ptr->b);
+ EXPECT_EQ(size_t(4), source_.Remaining());
+}
+
+TEST_F(BufferSourceTest, GetArrayIntegral) {
+ EXPECT_EQ(nullptr, source_.GetArray<uint32_t>(3));
+
+ const uint32_t* ptr = source_.GetArray<uint32_t>(2);
+ EXPECT_NE(nullptr, ptr);
+ EXPECT_EQ(uint32_t(0x76543210), ptr[0]);
+ EXPECT_EQ(uint32_t(0xFEDCBA98), ptr[1]);
+ EXPECT_EQ(size_t(2), source_.Remaining());
+}
+
+TEST_F(BufferSourceTest, GetArrayAggregate) {
+ const ValueType* ptr = source_.GetArray<ValueType>(2);
+ EXPECT_EQ(nullptr, ptr);
+
+ ptr = source_.GetArray<ValueType>(1);
+
+ EXPECT_NE(nullptr, ptr);
+ EXPECT_EQ(uint32_t(0x76543210), ptr[0].a);
+ EXPECT_EQ(uint32_t(0xBA98), ptr[0].b);
+ EXPECT_EQ(size_t(4), source_.Remaining());
+}
+
+TEST_F(BufferSourceTest, GetUleb128) {
+ using size_type = BufferSource::size_type;
+ // Result = {success, value, bytes_consumed}.
+ using Result = std::tuple<bool, uint32_t, size_type>;
+
+ constexpr uint32_t kUnInit = 0xCCCCCCCC; // Arbitrary value.
+ constexpr Result kBad{false, kUnInit, 0U};
+
+ auto run = [](const std::string hex_string) -> Result {
+ std::vector<uint8_t> bytes = ParseHexString(hex_string);
+ BufferSource source(ConstBufferView{bytes.data(), bytes.size()});
+ BufferSource::iterator base = source.begin();
+ // Initialize |value| to |kUnInit| to ensure no write on failure.
+ uint32_t value = kUnInit;
+ bool success = source.GetUleb128(&value);
+ return {success, value, source.begin() - base};
+ };
+
+ auto good = [](uint32_t value, size_type bytes_consumed) -> Result {
+ return Result{true, value, bytes_consumed};
+ };
+
+ EXPECT_EQ(good(0x0U, 1U), run("00"));
+ EXPECT_EQ(good(0x20U, 1U), run("20"));
+ EXPECT_EQ(good(0x42U, 1U), run("42"));
+ EXPECT_EQ(good(0x7FU, 1U), run("7F"));
+ EXPECT_EQ(kBad, run("80")); // Out of data.
+ EXPECT_EQ(good(0x0U, 2U), run("80 00")); // Redundant code.
+ EXPECT_EQ(good(0x80U, 2U), run("80 01"));
+ EXPECT_EQ(good(0x7FU, 2U), run("FF 00")); // Redundant (unsigned).
+ EXPECT_EQ(good(0x3FFFU, 2U), run("FF 7F"));
+ EXPECT_EQ(good(0x0U, 1U), run("00 80")); // Only reads byte 0.
+ EXPECT_EQ(kBad, run("80 80")); // Out of data.
+ EXPECT_EQ(kBad, run("F1 88")); // Out of data.
+ EXPECT_EQ(good(0x0U, 3U), run("80 80 00")); // Redundant code.
+ EXPECT_EQ(good(0x4000U, 3U), run("80 80 01"));
+ EXPECT_EQ(good(0x00100000U, 3U), run("80 80 40"));
+ EXPECT_EQ(good(0x001FFFFFU, 3U), run("FF FF 7F"));
+ EXPECT_EQ(good(0x0U, 1U), run("00 00 80")); // Only reads byte 0.
+ EXPECT_EQ(kBad, run("80 80 80")); // Out of data.
+ EXPECT_EQ(kBad, run("AB CD EF")); // Out of data.
+ EXPECT_EQ(good(0x0U, 4U), run("80 80 80 00")); // Redundant code.
+ EXPECT_EQ(good(0x00100000U, 4U), run("80 80 C0 00"));
+ EXPECT_EQ(good(0x00200000U, 4U), run("80 80 80 01"));
+ EXPECT_EQ(good(0x08000000U, 4U), run("80 80 80 40"));
+ EXPECT_EQ(good(0x001FC07FU, 4U), run("FF 80 FF 00"));
+ EXPECT_EQ(good(0x0U, 5U), run("80 80 80 80 00")); // Redundant code.
+ EXPECT_EQ(good(0x10000000U, 5U), run("80 80 80 80 01"));
+ EXPECT_EQ(good(0x10204081U, 5U), run("81 81 81 81 01"));
+ EXPECT_EQ(good(0x7FFFFFFFU, 5U), run("FF FF FF FF 07"));
+ EXPECT_EQ(good(0x80000000U, 5U), run("80 80 80 80 08"));
+ EXPECT_EQ(good(0xFFFFFFFFU, 5U), run("FF FF FF FF 0F"));
+ EXPECT_EQ(kBad, run("FF FF FF FF 80")); // Too long / out of data.
+ EXPECT_EQ(good(0x0FFFFFFFU, 5U), run("FF FF FF FF 10")); // "1" discarded.
+ EXPECT_EQ(good(0x00000000U, 5U), run("80 80 80 80 20")); // "2" discarded.
+ EXPECT_EQ(good(0xA54A952AU, 5U), run("AA AA AA AA 7A")); // "7" discarded.
+ EXPECT_EQ(kBad, run("FF FF FF FF FF 00")); // Too long.
+}
+
+TEST_F(BufferSourceTest, GetSleb128) {
+ using size_type = BufferSource::size_type;
+ // Result = {success, value, bytes_consumed}.
+ using Result = std::tuple<bool, int32_t, size_type>;
+
+ constexpr int32_t kUnInit = 0xCCCCCCCC; // Arbitrary value.
+ constexpr Result kBad{false, kUnInit, 0U};
+
+ auto run = [](const std::string hex_string) -> Result {
+ std::vector<uint8_t> bytes = ParseHexString(hex_string);
+ BufferSource source(ConstBufferView{bytes.data(), bytes.size()});
+ BufferSource::iterator base = source.begin();
+ // Initialize |value| to |kUnInit| to ensure no write on failure.
+ int32_t value = kUnInit;
+ bool success = source.GetSleb128(&value);
+ return {success, value, source.begin() - base};
+ };
+
+ auto good = [](int32_t value, size_type bytes_consumed) -> Result {
+ return Result{true, value, bytes_consumed};
+ };
+
+ EXPECT_EQ(good(0x0, 1U), run("00"));
+ EXPECT_EQ(good(0x20U, 1U), run("20"));
+ EXPECT_EQ(good(-0x3E, 1U), run("42"));
+ EXPECT_EQ(good(-0x1, 1U), run("7F"));
+ EXPECT_EQ(kBad, run("80")); // Out of data.
+ EXPECT_EQ(good(0x0, 2U), run("80 00")); // Redundant code.
+ EXPECT_EQ(good(0x80, 2U), run("80 01"));
+ EXPECT_EQ(good(0x7F, 2U), run("FF 00")); // Not redudnant.
+ EXPECT_EQ(good(-0x1, 2U), run("FF 7F")); // Redundant code.
+ EXPECT_EQ(good(0x0, 1U), run("00 80")); // Only reads byte 0.
+ EXPECT_EQ(kBad, run("80 80")); // Out of data.
+ EXPECT_EQ(kBad, run("F1 88")); // Out of data.
+ EXPECT_EQ(good(0x0, 3U), run("80 80 00")); // Redundant code.
+ EXPECT_EQ(good(0x4000, 3U), run("80 80 01"));
+ EXPECT_EQ(good(-0x100000, 3U), run("80 80 40"));
+ EXPECT_EQ(good(-0x1, 3U), run("FF FF 7F")); // Redundant code.
+ EXPECT_EQ(good(0x0, 1U), run("00 00 80")); // Only reads byte 0.
+ EXPECT_EQ(kBad, run("80 80 80")); // Out of data.
+ EXPECT_EQ(kBad, run("AB CD EF")); // Out of data.
+ EXPECT_EQ(good(0x0, 4U), run("80 80 80 00")); // Redundant code.
+ EXPECT_EQ(good(0x00100000, 4U), run("80 80 C0 00"));
+ EXPECT_EQ(good(0x00200000, 4U), run("80 80 80 01"));
+ EXPECT_EQ(good(-static_cast<int32_t>(0x08000000), 4U), run("80 80 80 40"));
+ EXPECT_EQ(good(0x001FC07F, 4U), run("FF 80 FF 00"));
+ EXPECT_EQ(good(0x0, 5U), run("80 80 80 80 00")); // Redundant code.
+ EXPECT_EQ(good(0x10000000, 5U), run("80 80 80 80 01"));
+ EXPECT_EQ(good(0x10204081, 5U), run("81 81 81 81 01"));
+ EXPECT_EQ(good(0x7FFFFFFF, 5U), run("FF FF FF FF 07"));
+ EXPECT_EQ(good(-static_cast<int32_t>(0x80000000), 5U), run("80 80 80 80 08"));
+ EXPECT_EQ(good(-0x1, 5U), run("FF FF FF FF 0F")); // Redundant code.
+ EXPECT_EQ(kBad, run("FF FF FF FF 80")); // Too long / out of data.
+ EXPECT_EQ(good(0x0FFFFFFF, 5U), run("FF FF FF FF 10")); // "1" discarded.
+ EXPECT_EQ(good(0x00000000, 5U), run("80 80 80 80 20")); // "2" discarded.
+ EXPECT_EQ(good(-0x5AB56AD6, 5U), run("AA AA AA AA 7A")); // "7" discarded.
+ EXPECT_EQ(kBad, run("FF FF FF FF FF 00")); // Too long.
+}
+
+TEST_F(BufferSourceTest, SkipLeb128) {
+ using size_type = BufferSource::size_type;
+ // Result = {success, value, bytes_consumed}.
+ using Result = std::tuple<bool, size_type>;
+
+ constexpr Result kBad{false, 0U};
+
+ auto run = [](const std::string hex_string) -> Result {
+ std::vector<uint8_t> bytes = ParseHexString(hex_string);
+ BufferSource source(ConstBufferView{bytes.data(), bytes.size()});
+ BufferSource::iterator base = source.begin();
+ bool success = source.SkipLeb128();
+ return {success, source.begin() - base};
+ };
+
+ auto good = [](size_type bytes_consumed) -> Result {
+ return Result{true, bytes_consumed};
+ };
+
+ EXPECT_EQ(good(1U), run("00"));
+ EXPECT_EQ(good(1U), run("20"));
+ EXPECT_EQ(good(1U), run("42"));
+ EXPECT_EQ(good(1U), run("7F"));
+ EXPECT_EQ(kBad, run("80")); // Out of data.
+ EXPECT_EQ(good(2U), run("80 00")); // Redundant code.
+ EXPECT_EQ(good(2U), run("80 01"));
+ EXPECT_EQ(good(2U), run("FF 00")); // Redundant (unsigned).
+ EXPECT_EQ(good(2U), run("FF 7F"));
+ EXPECT_EQ(good(1U), run("00 80")); // Only reads byte 0.
+ EXPECT_EQ(kBad, run("80 80")); // Out of data.
+ EXPECT_EQ(kBad, run("F1 88")); // Out of data.
+ EXPECT_EQ(good(3U), run("80 80 00")); // Redundant code.
+ EXPECT_EQ(good(3U), run("80 80 01"));
+ EXPECT_EQ(good(3U), run("80 80 40"));
+ EXPECT_EQ(good(3U), run("FF FF 7F"));
+ EXPECT_EQ(good(1U), run("00 00 80")); // Only reads byte 0.
+ EXPECT_EQ(kBad, run("80 80 80")); // Out of data.
+ EXPECT_EQ(kBad, run("AB CD EF")); // Out of data.
+ EXPECT_EQ(good(4U), run("80 80 80 00")); // Redundant code.
+ EXPECT_EQ(good(4U), run("80 80 C0 00"));
+ EXPECT_EQ(good(4U), run("80 80 80 01"));
+ EXPECT_EQ(good(4U), run("80 80 80 40"));
+ EXPECT_EQ(good(4U), run("FF 80 FF 00"));
+ EXPECT_EQ(good(5U), run("80 80 80 80 00")); // Redundant code.
+ EXPECT_EQ(good(5U), run("80 80 80 80 01"));
+ EXPECT_EQ(good(5U), run("81 81 81 81 01"));
+ EXPECT_EQ(good(5U), run("FF FF FF FF 07"));
+ EXPECT_EQ(good(5U), run("80 80 80 80 08"));
+ EXPECT_EQ(good(5U), run("FF FF FF FF 0F"));
+ EXPECT_EQ(kBad, run("FF FF FF FF 80")); // Too long / out of data.
+ EXPECT_EQ(good(5U), run("FF FF FF FF 10")); // "1" discarded.
+ EXPECT_EQ(good(5U), run("80 80 80 80 20")); // "2" discarded.
+ EXPECT_EQ(good(5U), run("AA AA AA AA 7A")); // "7" discarded.
+ EXPECT_EQ(kBad, run("FF FF FF FF FF 00")); // Too long.
+}
+
+} // namespace zucchini
diff --git a/buffer_view.h b/buffer_view.h
new file mode 100644
index 0000000..661e3c3
--- /dev/null
+++ b/buffer_view.h
@@ -0,0 +1,217 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_BUFFER_VIEW_H_
+#define COMPONENTS_ZUCCHINI_BUFFER_VIEW_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <type_traits>
+
+#include "base/check_op.h"
+#include "components/zucchini/algorithm.h"
+
+namespace zucchini {
+
+// Describes a region within a buffer, with starting offset and size.
+struct BufferRegion {
+ // The region data are stored as |offset| and |size|, but often it is useful
+ // to represent it as an interval [lo(), hi()) = [offset, offset + size).
+ size_t lo() const { return offset; }
+ size_t hi() const { return offset + size; }
+
+ // Returns whether the Region fits in |[0, container_size)|. Special case:
+ // a size-0 region starting at |container_size| fits.
+ bool FitsIn(size_t container_size) const {
+ return offset <= container_size && container_size - offset >= size;
+ }
+
+ // Returns |v| clipped to the inclusive range |[lo(), hi()]|.
+ size_t InclusiveClamp(size_t v) const {
+ return zucchini::InclusiveClamp(v, lo(), hi());
+ }
+
+ // Region data use size_t to match BufferViewBase::size_type, to make it
+ // convenient to index into buffer view.
+ size_t offset;
+ size_t size;
+};
+
+namespace internal {
+
+// TODO(huangs): Rename to BasicBufferView.
+// BufferViewBase should not be used directly; it is an implementation used for
+// both BufferView and MutableBufferView.
+template <class T>
+class BufferViewBase {
+ public:
+ using value_type = T;
+ using reference = T&;
+ using pointer = T*;
+ using iterator = T*;
+ using const_iterator = typename std::add_const<T>::type*;
+ using size_type = std::size_t;
+ using difference_type = std::ptrdiff_t;
+
+ static BufferViewBase FromRange(iterator first, iterator last) {
+ DCHECK_GE(last, first);
+ BufferViewBase ret;
+ ret.first_ = first;
+ ret.last_ = last;
+ return ret;
+ }
+
+ BufferViewBase() = default;
+
+ BufferViewBase(iterator first, size_type size)
+ : first_(first), last_(first_ + size) {
+ DCHECK_GE(last_, first_);
+ }
+
+ template <class U>
+ BufferViewBase(const BufferViewBase<U>& that)
+ : first_(that.begin()), last_(that.end()) {}
+
+ template <class U>
+ BufferViewBase(BufferViewBase<U>&& that)
+ : first_(that.begin()), last_(that.end()) {}
+
+ BufferViewBase(const BufferViewBase&) = default;
+ BufferViewBase& operator=(const BufferViewBase&) = default;
+
+ // Iterators
+
+ iterator begin() const { return first_; }
+ iterator end() const { return last_; }
+ const_iterator cbegin() const { return begin(); }
+ const_iterator cend() const { return end(); }
+
+ // Capacity
+
+ bool empty() const { return first_ == last_; }
+ size_type size() const { return last_ - first_; }
+
+ // Returns whether the buffer is large enough to cover |region|.
+ bool covers(const BufferRegion& region) const {
+ return region.FitsIn(size());
+ }
+
+ // Returns whether the buffer is large enough to cover an array starting at
+ // |offset| with |num| elements, each taking |elt_size| bytes.
+ bool covers_array(size_t offset, size_t num, size_t elt_size) {
+ DCHECK_GT(elt_size, 0U);
+ // Use subtraction and division to avoid overflow.
+ return offset <= size() && (size() - offset) / elt_size >= num;
+ }
+
+ // Element access
+
+ // Returns the raw value at specified location |pos|.
+ // If |pos| is not within the range of the buffer, the process is terminated.
+ reference operator[](size_type pos) const {
+ CHECK_LT(pos, size());
+ return first_[pos];
+ }
+
+ // Returns a sub-buffer described by |region|.
+ BufferViewBase operator[](BufferRegion region) const {
+ DCHECK_LE(region.offset, size());
+ DCHECK_LE(region.size, size() - region.offset);
+ return {begin() + region.offset, region.size};
+ }
+
+ template <class U>
+ const U& read(size_type pos) const {
+ // TODO(huangs): Use can_access<U>(pos) after fixing can_access().
+ CHECK_LE(sizeof(U), size());
+ CHECK_LE(pos, size() - sizeof(U));
+ return *reinterpret_cast<const U*>(begin() + pos);
+ }
+
+ template <class U>
+ void write(size_type pos, const U& value) {
+ // TODO(huangs): Use can_access<U>(pos) after fixing can_access().
+ CHECK_LE(sizeof(U), size());
+ CHECK_LE(pos, size() - sizeof(U));
+ *reinterpret_cast<U*>(begin() + pos) = value;
+ }
+
+ // Returns a mutable reference to an object type U whose raw storage starts
+ // at location |pos|.
+ template <class U>
+ U& modify(size_type pos) {
+ // TODO(huangs): Use can_access<U>(pos) after fixing can_access().
+ CHECK_LE(sizeof(U), size());
+ CHECK_LE(pos, size() - sizeof(U));
+ return *reinterpret_cast<U*>(begin() + pos);
+ }
+
+ template <class U>
+ bool can_access(size_type pos) const {
+ return pos < size() && size() - pos >= sizeof(U);
+ }
+
+ // Returns a BufferRegion describing the full view, with offset = 0. If the
+ // BufferViewBase is derived from another, this does *not* return the
+ // original region used for its definition (hence "local").
+ BufferRegion local_region() const { return BufferRegion{0, size()}; }
+
+ bool equals(BufferViewBase other) const {
+ return size() == other.size() && std::equal(begin(), end(), other.begin());
+ }
+
+ // Modifiers
+
+ void shrink(size_type new_size) {
+ DCHECK_LE(first_ + new_size, last_);
+ last_ = first_ + new_size;
+ }
+
+ // Moves the start of the view forward by n bytes.
+ void remove_prefix(size_type n) {
+ DCHECK_LE(n, size());
+ first_ += n;
+ }
+
+ // Moves the start of the view to |it|, which is in range [begin(), end()).
+ void seek(iterator it) {
+ DCHECK_GE(it, begin());
+ DCHECK_LE(it, end());
+ first_ = it;
+ }
+
+ // Given |origin| that contains |*this|, minimally increase |first_| (possibly
+ // by 0) so that |first_ <= last_|, and |first_ - origin.first_| is a multiple
+ // of |alignment|. On success, updates |first_| and returns true. Otherwise
+ // returns false.
+ bool AlignOn(BufferViewBase origin, size_type alignment) {
+ DCHECK_GT(alignment, 0U);
+ DCHECK_LE(origin.first_, first_);
+ DCHECK_GE(origin.last_, last_);
+ size_type aligned_size =
+ AlignCeil(static_cast<size_type>(first_ - origin.first_), alignment);
+ if (aligned_size > static_cast<size_type>(last_ - origin.first_))
+ return false;
+ first_ = origin.first_ + aligned_size;
+ return true;
+ }
+
+ private:
+ iterator first_ = nullptr;
+ iterator last_ = nullptr;
+};
+
+} // namespace internal
+
+// Classes to encapsulate a contiguous sequence of raw data, without owning the
+// encapsulated memory regions. These are intended to be used as value types.
+
+using ConstBufferView = internal::BufferViewBase<const uint8_t>;
+using MutableBufferView = internal::BufferViewBase<uint8_t>;
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_BUFFER_VIEW_H_
diff --git a/buffer_view_unittest.cc b/buffer_view_unittest.cc
new file mode 100644
index 0000000..30170d7
--- /dev/null
+++ b/buffer_view_unittest.cc
@@ -0,0 +1,298 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/buffer_view.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <iterator>
+#include <type_traits>
+#include <vector>
+
+#include "base/test/gtest_util.h"
+#include "components/zucchini/test_utils.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+class BufferViewTest : public testing::Test {
+ protected:
+ // Some tests might modify this.
+ std::vector<uint8_t> bytes_ = ParseHexString("10 32 54 76 98 BA DC FE 10 00");
+};
+
+TEST_F(BufferViewTest, Size) {
+ for (size_t len = 0; len <= bytes_.size(); ++len) {
+ EXPECT_EQ(len, ConstBufferView(bytes_.data(), len).size());
+ EXPECT_EQ(len, MutableBufferView(bytes_.data(), len).size());
+ }
+}
+
+TEST_F(BufferViewTest, Empty) {
+ // Empty view.
+ EXPECT_TRUE(ConstBufferView(bytes_.data(), 0).empty());
+ EXPECT_TRUE(MutableBufferView(bytes_.data(), 0).empty());
+
+ for (size_t len = 1; len <= bytes_.size(); ++len) {
+ EXPECT_FALSE(ConstBufferView(bytes_.data(), len).empty());
+ EXPECT_FALSE(MutableBufferView(bytes_.data(), len).empty());
+ }
+}
+
+TEST_F(BufferViewTest, FromRange) {
+ constexpr size_t kSize = 10;
+ uint8_t raw_data[kSize] = {0x10, 0x32, 0x54, 0x76, 0x98,
+ 0xBA, 0xDC, 0xFE, 0x10, 0x00};
+ ConstBufferView buffer =
+ ConstBufferView::FromRange(std::begin(raw_data), std::end(raw_data));
+ EXPECT_EQ(bytes_.size(), buffer.size());
+ EXPECT_EQ(std::begin(raw_data), buffer.begin());
+
+ MutableBufferView mutable_buffer =
+ MutableBufferView::FromRange(std::begin(raw_data), std::end(raw_data));
+ EXPECT_EQ(bytes_.size(), mutable_buffer.size());
+ EXPECT_EQ(std::begin(raw_data), mutable_buffer.begin());
+
+ EXPECT_DCHECK_DEATH(
+ ConstBufferView::FromRange(std::end(raw_data), std::begin(raw_data)));
+
+ EXPECT_DCHECK_DEATH(MutableBufferView::FromRange(std::begin(raw_data) + 1,
+ std::begin(raw_data)));
+}
+
+TEST_F(BufferViewTest, Subscript) {
+ ConstBufferView view(bytes_.data(), bytes_.size());
+
+ EXPECT_EQ(0x10, view[0]);
+ static_assert(!std::is_assignable<decltype(view[0]), uint8_t>::value,
+ "BufferView values should not be mutable.");
+
+ MutableBufferView mutable_view(bytes_.data(), bytes_.size());
+
+ EXPECT_EQ(bytes_.data(), &mutable_view[0]);
+ mutable_view[0] = 42;
+ EXPECT_EQ(42, mutable_view[0]);
+}
+
+TEST_F(BufferViewTest, SubRegion) {
+ ConstBufferView view(bytes_.data(), bytes_.size());
+
+ ConstBufferView sub_view = view[{2, 4}];
+ EXPECT_EQ(view.begin() + 2, sub_view.begin());
+ EXPECT_EQ(size_t(4), sub_view.size());
+}
+
+TEST_F(BufferViewTest, Shrink) {
+ ConstBufferView buffer(bytes_.data(), bytes_.size());
+
+ buffer.shrink(bytes_.size());
+ EXPECT_EQ(bytes_.size(), buffer.size());
+ buffer.shrink(2);
+ EXPECT_EQ(size_t(2), buffer.size());
+ EXPECT_DCHECK_DEATH(buffer.shrink(bytes_.size()));
+}
+
+TEST_F(BufferViewTest, Read) {
+ ConstBufferView buffer(bytes_.data(), bytes_.size());
+
+ EXPECT_EQ(0x10U, buffer.read<uint8_t>(0));
+ EXPECT_EQ(0x54U, buffer.read<uint8_t>(2));
+
+ EXPECT_EQ(0x3210U, buffer.read<uint16_t>(0));
+ EXPECT_EQ(0x7654U, buffer.read<uint16_t>(2));
+
+ EXPECT_EQ(0x76543210U, buffer.read<uint32_t>(0));
+ EXPECT_EQ(0xBA987654U, buffer.read<uint32_t>(2));
+
+ EXPECT_EQ(0xFEDCBA9876543210ULL, buffer.read<uint64_t>(0));
+
+ EXPECT_EQ(0x00, buffer.read<uint8_t>(9));
+ EXPECT_DEATH(buffer.read<uint8_t>(10), "");
+
+ EXPECT_EQ(0x0010FEDCU, buffer.read<uint32_t>(6));
+ EXPECT_DEATH(buffer.read<uint32_t>(7), "");
+}
+
+TEST_F(BufferViewTest, Write) {
+ MutableBufferView buffer(bytes_.data(), bytes_.size());
+
+ buffer.write<uint32_t>(0, 0x01234567);
+ buffer.write<uint32_t>(4, 0x89ABCDEF);
+ EXPECT_EQ(ParseHexString("67 45 23 01 EF CD AB 89 10 00"),
+ std::vector<uint8_t>(buffer.begin(), buffer.end()));
+
+ buffer.write<uint8_t>(9, 0xFF);
+ EXPECT_DEATH(buffer.write<uint8_t>(10, 0xFF), "");
+
+ buffer.write<uint32_t>(6, 0xFFFFFFFF);
+ EXPECT_DEATH(buffer.write<uint32_t>(7, 0xFFFFFFFF), "");
+}
+
+TEST_F(BufferViewTest, Modify) {
+ struct TestStruct {
+ uint32_t a;
+ uint32_t b;
+ };
+
+ MutableBufferView buffer(bytes_.data(), bytes_.size());
+
+ buffer.modify<TestStruct>(0).a = 0x01234567;
+ buffer.modify<TestStruct>(0).b = 0x89ABCDEF;
+ EXPECT_EQ(ParseHexString("67 45 23 01 EF CD AB 89 10 00"),
+ std::vector<uint8_t>(buffer.begin(), buffer.end()));
+
+ buffer.modify<uint8_t>(9);
+ EXPECT_DEATH(buffer.modify<uint8_t>(10), "");
+
+ buffer.modify<uint32_t>(6);
+ EXPECT_DEATH(buffer.modify<uint32_t>(7), "");
+}
+
+TEST_F(BufferViewTest, CanAccess) {
+ MutableBufferView buffer(bytes_.data(), bytes_.size());
+ EXPECT_TRUE(buffer.can_access<uint32_t>(0));
+ EXPECT_TRUE(buffer.can_access<uint32_t>(6));
+ EXPECT_FALSE(buffer.can_access<uint32_t>(7));
+ EXPECT_FALSE(buffer.can_access<uint32_t>(10));
+ EXPECT_FALSE(buffer.can_access<uint32_t>(0xFFFFFFFFU));
+
+ EXPECT_TRUE(buffer.can_access<uint8_t>(0));
+ EXPECT_TRUE(buffer.can_access<uint8_t>(7));
+ EXPECT_TRUE(buffer.can_access<uint8_t>(9));
+ EXPECT_FALSE(buffer.can_access<uint8_t>(10));
+ EXPECT_FALSE(buffer.can_access<uint8_t>(0xFFFFFFFF));
+}
+
+TEST_F(BufferViewTest, LocalRegion) {
+ ConstBufferView view(bytes_.data(), bytes_.size());
+
+ BufferRegion region = view.local_region();
+ EXPECT_EQ(0U, region.offset);
+ EXPECT_EQ(bytes_.size(), region.size);
+}
+
+TEST_F(BufferViewTest, Covers) {
+ EXPECT_TRUE(ConstBufferView().covers({0, 0}));
+ EXPECT_FALSE(ConstBufferView().covers({0, 1}));
+
+ ConstBufferView view(bytes_.data(), bytes_.size());
+
+ EXPECT_TRUE(view.covers({0, 0}));
+ EXPECT_TRUE(view.covers({0, 1}));
+ EXPECT_TRUE(view.covers({0, bytes_.size()}));
+ EXPECT_FALSE(view.covers({0, bytes_.size() + 1}));
+ EXPECT_FALSE(view.covers({1, bytes_.size()}));
+
+ EXPECT_TRUE(view.covers({bytes_.size() - 1, 0}));
+ EXPECT_TRUE(view.covers({bytes_.size() - 1, 1}));
+ EXPECT_FALSE(view.covers({bytes_.size() - 1, 2}));
+ EXPECT_TRUE(view.covers({bytes_.size(), 0}));
+ EXPECT_FALSE(view.covers({bytes_.size(), 1}));
+ EXPECT_FALSE(view.covers({bytes_.size() + 1, 0}));
+ EXPECT_FALSE(view.covers({bytes_.size() + 1, 1}));
+
+ EXPECT_FALSE(view.covers({1, size_t(-1)}));
+ EXPECT_FALSE(view.covers({size_t(-1), 1}));
+ EXPECT_FALSE(view.covers({size_t(-1), size_t(-1)}));
+}
+
+TEST_F(BufferViewTest, CoversArray) {
+ ConstBufferView view(bytes_.data(), bytes_.size());
+
+ for (uint32_t i = 1; i <= bytes_.size(); ++i) {
+ EXPECT_TRUE(view.covers_array(0, 1, i));
+ EXPECT_TRUE(view.covers_array(0, i, 1));
+ EXPECT_TRUE(view.covers_array(0, i, bytes_.size() / i));
+ EXPECT_TRUE(view.covers_array(0, bytes_.size() / i, i));
+ if (i < bytes_.size()) {
+ EXPECT_TRUE(view.covers_array(i, 1, bytes_.size() - i));
+ EXPECT_TRUE(view.covers_array(i, bytes_.size() - i, 1));
+ }
+ EXPECT_TRUE(view.covers_array(bytes_.size() - (bytes_.size() / i) * i, 1,
+ bytes_.size() / i));
+ }
+
+ EXPECT_TRUE(view.covers_array(0, 0, bytes_.size()));
+ EXPECT_TRUE(view.covers_array(bytes_.size() - 1, 0, bytes_.size()));
+ EXPECT_TRUE(view.covers_array(bytes_.size(), 0, bytes_.size()));
+ EXPECT_TRUE(view.covers_array(0, 0, 0x10000));
+ EXPECT_TRUE(view.covers_array(bytes_.size() - 1, 0, 0x10000));
+ EXPECT_TRUE(view.covers_array(bytes_.size(), 0, 0x10000));
+
+ EXPECT_FALSE(view.covers_array(0, 1, bytes_.size() + 1));
+ EXPECT_FALSE(view.covers_array(0, 2, bytes_.size()));
+ EXPECT_FALSE(view.covers_array(0, bytes_.size() + 11, 1));
+ EXPECT_FALSE(view.covers_array(0, bytes_.size(), 2));
+ EXPECT_FALSE(view.covers_array(1, bytes_.size(), 1));
+
+ EXPECT_FALSE(view.covers_array(bytes_.size(), 1, 1));
+ EXPECT_TRUE(view.covers_array(bytes_.size(), 0, 1));
+ EXPECT_FALSE(view.covers_array(0, 0x10000, 0x10000));
+}
+
+TEST_F(BufferViewTest, Equals) {
+ // Almost identical to |bytes_|, except at 2 places: v v
+ std::vector<uint8_t> bytes2 = ParseHexString("10 32 54 76 98 AB CD FE 10 00");
+ ConstBufferView view1(bytes_.data(), bytes_.size());
+ ConstBufferView view2(&bytes2[0], bytes2.size());
+
+ EXPECT_TRUE(view1.equals(view1));
+ EXPECT_TRUE(view2.equals(view2));
+ EXPECT_FALSE(view1.equals(view2));
+ EXPECT_FALSE(view2.equals(view1));
+
+ EXPECT_TRUE((view1[{0, 0}]).equals(view2[{0, 0}]));
+ EXPECT_TRUE((view1[{0, 0}]).equals(view2[{5, 0}]));
+ EXPECT_TRUE((view1[{0, 5}]).equals(view2[{0, 5}]));
+ EXPECT_FALSE((view1[{0, 6}]).equals(view2[{0, 6}]));
+ EXPECT_FALSE((view1[{0, 7}]).equals(view1[{0, 6}]));
+ EXPECT_TRUE((view1[{5, 3}]).equals(view1[{5, 3}]));
+ EXPECT_FALSE((view1[{5, 1}]).equals(view1[{5, 3}]));
+ EXPECT_TRUE((view2[{0, 1}]).equals(view2[{8, 1}]));
+ EXPECT_FALSE((view2[{1, 1}]).equals(view2[{8, 1}]));
+}
+
+TEST_F(BufferViewTest, AlignOn) {
+ using size_type = ConstBufferView::size_type;
+ ConstBufferView image(bytes_.data(), bytes_.size());
+ ConstBufferView view = image;
+ ASSERT_EQ(10U, view.size());
+
+ auto get_pos = [&image, &view]() -> size_type {
+ EXPECT_TRUE(view.begin() >= image.begin()); // Iterator compare.
+ return static_cast<size_type>(view.begin() - image.begin());
+ };
+
+ EXPECT_EQ(0U, get_pos());
+ view.remove_prefix(1U);
+ EXPECT_EQ(1U, get_pos());
+ view.remove_prefix(4U);
+ EXPECT_EQ(5U, get_pos());
+
+ // Align.
+ EXPECT_TRUE(view.AlignOn(image, 1U)); // Trival case.
+ EXPECT_EQ(5U, get_pos());
+
+ EXPECT_TRUE(view.AlignOn(image, 2U));
+ EXPECT_EQ(6U, get_pos());
+ EXPECT_TRUE(view.AlignOn(image, 2U));
+ EXPECT_EQ(6U, get_pos());
+
+ EXPECT_TRUE(view.AlignOn(image, 4U));
+ EXPECT_EQ(8U, get_pos());
+ EXPECT_TRUE(view.AlignOn(image, 2U));
+ EXPECT_EQ(8U, get_pos());
+
+ view.remove_prefix(1U);
+ EXPECT_EQ(9U, get_pos());
+
+ // Pos is at 9, align to 4 would yield 12, but size is 10, so this fails.
+ EXPECT_FALSE(view.AlignOn(image, 4U));
+ EXPECT_EQ(9U, get_pos());
+ EXPECT_TRUE(view.AlignOn(image, 2U));
+ EXPECT_EQ(10U, get_pos());
+}
+
+} // namespace zucchini
diff --git a/crc32.cc b/crc32.cc
new file mode 100644
index 0000000..1c45dfe
--- /dev/null
+++ b/crc32.cc
@@ -0,0 +1,43 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/crc32.h"
+
+#include <array>
+
+#include "base/check_op.h"
+
+namespace zucchini {
+
+namespace {
+
+std::array<uint32_t, 256> MakeCrc32Table() {
+ constexpr uint32_t kCrc32Poly = 0xEDB88320;
+
+ std::array<uint32_t, 256> crc32Table;
+ for (uint32_t i = 0; i < 256; ++i) {
+ uint32_t r = i;
+ for (int j = 0; j < 8; ++j)
+ r = (r >> 1) ^ (kCrc32Poly & ~((r & 1) - 1));
+ crc32Table[i] = r;
+ }
+ return crc32Table;
+}
+
+} // namespace
+
+// Minimalistic CRC-32 implementation for Zucchini usage. Adapted from LZMA SDK
+// (found at third_party/lzma_sdk/7zCrc.c), which is public domain.
+uint32_t CalculateCrc32(const uint8_t* first, const uint8_t* last) {
+ DCHECK_GE(last, first);
+
+ static const std::array<uint32_t, 256> kCrc32Table = MakeCrc32Table();
+
+ uint32_t ret = 0xFFFFFFFF;
+ for (; first != last; ++first)
+ ret = kCrc32Table[(ret ^ *first) & 0xFF] ^ (ret >> 8);
+ return ret ^ 0xFFFFFFFF;
+}
+
+} // namespace zucchini
diff --git a/crc32.h b/crc32.h
new file mode 100644
index 0000000..c729f5b
--- /dev/null
+++ b/crc32.h
@@ -0,0 +1,17 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_CRC32_H_
+#define COMPONENTS_ZUCCHINI_CRC32_H_
+
+#include <stdint.h>
+
+namespace zucchini {
+
+// Calculates CRC-32 of the given range [|first|, |last|).
+uint32_t CalculateCrc32(const uint8_t* first, const uint8_t* last);
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_CRC32_H_
diff --git a/crc32_unittest.cc b/crc32_unittest.cc
new file mode 100644
index 0000000..5ec85a8
--- /dev/null
+++ b/crc32_unittest.cc
@@ -0,0 +1,47 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/crc32.h"
+
+#include <stdint.h>
+
+#include <iterator>
+
+#include "base/test/gtest_util.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+constexpr uint8_t bytes[] = {0x10, 0x32, 0x54, 0x76, 0x98,
+ 0xBA, 0xDC, 0xFE, 0x10, 0x00};
+
+TEST(Crc32Test, All) {
+ // Results can be verified with any CRC-32 calculator found online.
+
+ // Empty region.
+ EXPECT_EQ(0x00000000U, CalculateCrc32(std::begin(bytes), std::begin(bytes)));
+
+ // Single byte.
+ EXPECT_EQ(0xCFB5FFE9U,
+ CalculateCrc32(std::begin(bytes), std::begin(bytes) + 1));
+
+ // Same byte (0x10) appearing at different location.
+ EXPECT_EQ(0xCFB5FFE9U,
+ CalculateCrc32(std::begin(bytes) + 8, std::begin(bytes) + 9));
+
+ // Single byte of 0.
+ EXPECT_EQ(0xD202EF8DU,
+ CalculateCrc32(std::begin(bytes) + 9, std::end(bytes)));
+
+ // Whole region.
+ EXPECT_EQ(0xA86FD7D6U, CalculateCrc32(std::begin(bytes), std::end(bytes)));
+
+ // Whole region excluding 0 at end.
+ EXPECT_EQ(0x0762F38BU,
+ CalculateCrc32(std::begin(bytes), std::begin(bytes) + 9));
+
+ EXPECT_DCHECK_DEATH(CalculateCrc32(std::begin(bytes) + 1, std::begin(bytes)));
+}
+
+} // namespace zucchini
diff --git a/disassembler.cc b/disassembler.cc
new file mode 100644
index 0000000..4a210ac
--- /dev/null
+++ b/disassembler.cc
@@ -0,0 +1,52 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/disassembler.h"
+
+#include "base/check_op.h"
+
+namespace zucchini {
+
+/******** EmptyReferenceReader ********/
+
+absl::optional<Reference> EmptyReferenceReader::GetNext() {
+ return absl::nullopt;
+}
+
+/******** EmptyReferenceWriter ********/
+
+void EmptyReferenceWriter::PutNext(Reference /* reference */) {}
+
+/******** ReferenceGroup ********/
+
+std::unique_ptr<ReferenceReader> ReferenceGroup::GetReader(
+ offset_t lower,
+ offset_t upper,
+ Disassembler* disasm) const {
+ DCHECK_LE(lower, upper);
+ DCHECK_LE(upper, disasm->size());
+ return (disasm->*reader_factory_)(lower, upper);
+}
+
+std::unique_ptr<ReferenceReader> ReferenceGroup::GetReader(
+ Disassembler* disasm) const {
+ return (disasm->*reader_factory_)(0, static_cast<offset_t>(disasm->size()));
+}
+
+std::unique_ptr<ReferenceWriter> ReferenceGroup::GetWriter(
+ MutableBufferView image,
+ Disassembler* disasm) const {
+ DCHECK_EQ(image.begin(), disasm->image().begin());
+ DCHECK_EQ(image.size(), disasm->size());
+ return (disasm->*writer_factory_)(image);
+}
+
+/******** Disassembler ********/
+
+Disassembler::Disassembler(int num_equivalence_iterations)
+ : num_equivalence_iterations_(num_equivalence_iterations) {}
+
+Disassembler::~Disassembler() = default;
+
+} // namespace zucchini
diff --git a/disassembler.h b/disassembler.h
new file mode 100644
index 0000000..48ee0fb
--- /dev/null
+++ b/disassembler.h
@@ -0,0 +1,154 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_DISASSEMBLER_H_
+#define COMPONENTS_ZUCCHINI_DISASSEMBLER_H_
+
+#include <stddef.h>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/image_utils.h"
+
+namespace zucchini {
+
+// A vacuous ReferenceReader that produces no references.
+class EmptyReferenceReader : public ReferenceReader {
+ public:
+ absl::optional<Reference> GetNext() override;
+};
+
+// A vacuous EmptyReferenceWriter that does not write.
+class EmptyReferenceWriter : public ReferenceWriter {
+ public:
+ void PutNext(Reference reference) override;
+};
+
+// Disassembler needs to be declared before ReferenceGroup because the latter
+// contains member pointers based on the former, and we use a compiler flag,
+// -fcomplete-member-pointers, which enforces that member pointer base types are
+// complete. This flag helps prevent us from running into problems in the
+// Microsoft C++ ABI (see https://crbug.com/847724).
+
+class ReferenceGroup;
+
+// A Disassembler is used to encapsulate architecture specific operations, to:
+// - Describe types of references found in the architecture using traits.
+// - Extract references contained in an image file.
+// - Correct target for some references.
+class Disassembler {
+ public:
+ // Attempts to parse |image| and create an architecture-specifc Disassembler,
+ // as determined by DIS, which is inherited from Disassembler. Returns an
+ // instance of DIS if successful, and null otherwise.
+ template <class DIS>
+ static std::unique_ptr<DIS> Make(ConstBufferView image) {
+ auto disasm = std::make_unique<DIS>();
+ if (!disasm->Parse(image))
+ return nullptr;
+ return disasm;
+ }
+
+ Disassembler(const Disassembler&) = delete;
+ const Disassembler& operator=(const Disassembler&) = delete;
+ virtual ~Disassembler();
+
+ // Returns the type of executable handled by the Disassembler.
+ virtual ExecutableType GetExeType() const = 0;
+
+ // Returns a more detailed description of the executable type.
+ virtual std::string GetExeTypeString() const = 0;
+
+ // Creates and returns a vector that contains all groups of references.
+ // Groups must be aggregated by pool.
+ virtual std::vector<ReferenceGroup> MakeReferenceGroups() const = 0;
+
+ ConstBufferView image() const { return image_; }
+ size_t size() const { return image_.size(); }
+
+ int num_equivalence_iterations() const { return num_equivalence_iterations_; }
+
+ protected:
+ explicit Disassembler(int num_equivalence_iterations);
+
+ // Parses |image| and initializes internal states. Returns true on success.
+ // This must be called once and before any other operation.
+ virtual bool Parse(ConstBufferView image) = 0;
+
+ // Raw image data. After Parse(), a Disassembler should shrink this to contain
+ // only the portion containing the executable file it recognizes.
+ ConstBufferView image_;
+
+ // The number of iterations to run for equivalence map generation. This should
+ // roughly be the max length of reference indirection chains.
+ int num_equivalence_iterations_;
+};
+
+// A ReferenceGroup is associated with a specific |type| and has convenience
+// methods to obtain readers and writers for that type. A ReferenceGroup does
+// not store references; it is a lightweight class that communicates with the
+// disassembler to operate on them.
+class ReferenceGroup {
+ public:
+ // Member function pointer used to obtain a ReferenceReader.
+ using ReaderFactory = std::unique_ptr<ReferenceReader> (
+ Disassembler::*)(offset_t lower, offset_t upper);
+
+ // Member function pointer used to obtain a ReferenceWriter.
+ using WriterFactory = std::unique_ptr<ReferenceWriter> (Disassembler::*)(
+ MutableBufferView image);
+
+ // RefinedGeneratorFactory and RefinedReceptorFactory don't have to be
+ // identical to GeneratorFactory and ReceptorFactory, but they must be
+ // convertible. As a result, they can be pointer to member function of a
+ // derived Disassembler.
+ template <class RefinedReaderFactory, class RefinedWriterFactory>
+ ReferenceGroup(ReferenceTypeTraits traits,
+ RefinedReaderFactory reader_factory,
+ RefinedWriterFactory writer_factory)
+ : traits_(traits),
+ reader_factory_(static_cast<ReaderFactory>(reader_factory)),
+ writer_factory_(static_cast<WriterFactory>(writer_factory)) {}
+
+ // Returns a reader for all references in the binary.
+ // Invalidates any other writer or reader previously obtained for |disasm|.
+ std::unique_ptr<ReferenceReader> GetReader(Disassembler* disasm) const;
+
+ // Returns a reader for references whose bytes are entirely contained in
+ // |[lower, upper)|.
+ // Invalidates any other writer or reader previously obtained for |disasm|.
+ std::unique_ptr<ReferenceReader> GetReader(offset_t lower,
+ offset_t upper,
+ Disassembler* disasm) const;
+
+ // Returns a writer for references in |image|, assuming that |image| was the
+ // same one initially parsed by |disasm|.
+ // Invalidates any other writer or reader previously obtained for |disasm|.
+ std::unique_ptr<ReferenceWriter> GetWriter(MutableBufferView image,
+ Disassembler* disasm) const;
+
+ // Returns traits describing the reference type.
+ const ReferenceTypeTraits& traits() const { return traits_; }
+
+ // Shorthand for traits().width.
+ offset_t width() const { return traits().width; }
+
+ // Shorthand for traits().type_tag.
+ TypeTag type_tag() const { return traits().type_tag; }
+
+ // Shorthand for traits().pool_tag.
+ PoolTag pool_tag() const { return traits().pool_tag; }
+
+ private:
+ ReferenceTypeTraits traits_;
+ ReaderFactory reader_factory_ = nullptr;
+ WriterFactory writer_factory_ = nullptr;
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_DISASSEMBLER_H_
diff --git a/disassembler_dex.cc b/disassembler_dex.cc
new file mode 100644
index 0000000..5b25c50
--- /dev/null
+++ b/disassembler_dex.cc
@@ -0,0 +1,1670 @@
+// Copyright 2018 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/disassembler_dex.h"
+
+#include <stddef.h>
+#include <stdlib.h>
+
+#include <algorithm>
+#include <cctype>
+#include <cmath>
+#include <iterator>
+#include <set>
+#include <utility>
+
+#include "base/bind.h"
+#include "base/callback.h"
+#include "base/logging.h"
+#include "base/numerics/checked_math.h"
+#include "base/numerics/safe_conversions.h"
+#include "base/strings/stringprintf.h"
+#include "components/zucchini/buffer_source.h"
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/io_utils.h"
+#include "third_party/abseil-cpp/absl/types/optional.h"
+
+namespace zucchini {
+
+namespace {
+
+// A DEX item specified by an offset, if absent, has a sentinel value of 0 since
+// 0 is never a valid item offset (it points to magic at start of DEX).
+constexpr offset_t kDexSentinelOffset = 0U;
+
+// A DEX item specified by an index, if absent, has a sentinel value of
+// NO_INDEX = 0xFFFFFFFF. This is represented as an offset_t for uniformity.
+constexpr offset_t kDexSentinelIndexAsOffset = 0xFFFFFFFFU;
+
+static_assert(kDexSentinelIndexAsOffset != kInvalidOffset,
+ "Sentinel should not be confused with invalid offset.");
+
+// Size of a Dalvik instruction unit. Need to cast to signed int because
+// sizeof() gives size_t, which dominates when operated on ptrdiff_t, then
+// wrecks havoc for base::checked_cast<int16_t>().
+constexpr int kInstrUnitSize = static_cast<int>(sizeof(uint16_t));
+
+// Checks if |offset| is byte aligned to 32 bits or 4 bytes.
+bool Is32BitAligned(offset_t offset) {
+ return offset % 4 == 0;
+}
+
+// Returns a lower bound for the size of an item of type |type_item_code|.
+// - For fixed-length items (e.g., kTypeFieldIdItem) this is the exact size.
+// - For variant-length items (e.g., kTypeCodeItem), returns a value that is
+// known to be less than the item length (e.g., header size).
+// - For items not handled by this function, returns 1 for sanity check.
+size_t GetItemBaseSize(uint16_t type_item_code) {
+ switch (type_item_code) {
+ case dex::kTypeStringIdItem:
+ return sizeof(dex::StringIdItem);
+ case dex::kTypeTypeIdItem:
+ return sizeof(dex::TypeIdItem);
+ case dex::kTypeProtoIdItem:
+ return sizeof(dex::ProtoIdItem);
+ case dex::kTypeFieldIdItem:
+ return sizeof(dex::FieldIdItem);
+ case dex::kTypeMethodIdItem:
+ return sizeof(dex::MethodIdItem);
+ case dex::kTypeClassDefItem:
+ return sizeof(dex::ClassDefItem);
+ // No need to handle dex::kTypeMapList.
+ case dex::kTypeTypeList:
+ return sizeof(uint32_t); // Variable-length.
+ case dex::kTypeAnnotationSetRefList:
+ return sizeof(uint32_t); // Variable-length.
+ case dex::kTypeAnnotationSetItem:
+ return sizeof(uint32_t); // Variable-length.
+ case dex::kTypeCodeItem:
+ return sizeof(dex::CodeItem); // Variable-length.
+ case dex::kTypeAnnotationsDirectoryItem:
+ return sizeof(dex::AnnotationsDirectoryItem); // Variable-length.
+ default:
+ return 1U; // Unhandled item. For sanity check assume size >= 1.
+ }
+}
+
+/******** CodeItemParser ********/
+
+// A parser to extract successive code items from a DEX image whose header has
+// been parsed.
+class CodeItemParser {
+ public:
+ using size_type = BufferSource::size_type;
+
+ explicit CodeItemParser(ConstBufferView image) : image_(image) {}
+
+ // Initializes the parser, returns true on success and false on error.
+ bool Init(const dex::MapItem& code_map_item) {
+ // Sanity check to quickly fail if |code_map_item.offset| or
+ // |code_map_item.size| is too large. This is a heuristic because code item
+ // sizes need to be parsed (sizeof(dex::CodeItem) is a lower bound).
+ if (!image_.covers_array(code_map_item.offset, code_map_item.size,
+ sizeof(dex::CodeItem))) {
+ return false;
+ }
+ source_ = std::move(BufferSource(image_).Skip(code_map_item.offset));
+ return true;
+ }
+
+ // Extracts the header of the next code item, and skips the variable-length
+ // data. Returns the offset of the code item if successful. Otherwise returns
+ // kInvalidOffset, and thereafter the parser becomes valid. For reference,
+ // here's a pseudo-struct of a complete code item:
+ //
+ // struct code_item {
+ // // 4-byte aligned here.
+ // // 16-byte header defined (dex::CodeItem).
+ // uint16_t registers_size;
+ // uint16_t ins_size;
+ // uint16_t outs_size;
+ // uint16_t tries_size;
+ // uint32_t debug_info_off;
+ // uint32_t insns_size;
+ //
+ // // Variable-length data follow.
+ // uint16_t insns[insns_size]; // Instruction bytes.
+ // uint16_t padding[(tries_size > 0 && insns_size % 2 == 1) ? 1 : 0];
+ //
+ // if (tries_size > 0) {
+ // // 4-byte aligned here.
+ // struct try_item { // dex::TryItem.
+ // uint32_t start_addr;
+ // uint16_t insn_count;
+ // uint16_t handler_off;
+ // } tries[tries_size];
+ //
+ // struct encoded_catch_handler_list {
+ // uleb128 handlers_size;
+ // struct encoded_catch_handler {
+ // sleb128 encoded_catch_handler_size;
+ // struct encoded_type_addr_pair {
+ // uleb128 type_idx;
+ // uleb128 addr;
+ // } handlers[abs(encoded_catch_handler_size)];
+ // if (encoded_catch_handler_size <= 0) {
+ // uleb128 catch_all_addr;
+ // }
+ // } handlers_list[handlers_size];
+ // } handlers_group; // Confusingly called "handlers" in DEX doc.
+ // }
+ //
+ // // Padding to 4-bytes align next code_item *only if more exist*.
+ // }
+ offset_t GetNext() {
+ // Read header CodeItem.
+ if (!source_.AlignOn(image_, 4U))
+ return kInvalidOffset;
+ const offset_t code_item_offset =
+ base::checked_cast<offset_t>(source_.begin() - image_.begin());
+ const auto* code_item = source_.GetPointer<const dex::CodeItem>();
+ if (!code_item)
+ return kInvalidOffset;
+ DCHECK(Is32BitAligned(code_item_offset));
+
+ // TODO(huangs): Fail if |code_item->insns_size == 0| (Constraint A1).
+ // Skip instruction bytes.
+ if (!source_.GetArray<uint16_t>(code_item->insns_size))
+ return kInvalidOffset;
+ // Skip padding if present.
+ if (code_item->tries_size > 0 && !source_.AlignOn(image_, 4U))
+ return kInvalidOffset;
+
+ // Skip tries[] and handlers_group to arrive at the next code item. Parsing
+ // is nontrivial due to use of uleb128 / sleb128.
+ if (code_item->tries_size > 0) {
+ // Skip (try_item) tries[].
+ if (!source_.GetArray<dex::TryItem>(code_item->tries_size))
+ return kInvalidOffset;
+
+ // Skip handlers_group.
+ uint32_t handlers_size = 0;
+ if (!source_.GetUleb128(&handlers_size))
+ return kInvalidOffset;
+ // Sanity check to quickly reject excessively large |handlers_size|.
+ if (source_.Remaining() < static_cast<size_type>(handlers_size))
+ return kInvalidOffset;
+
+ // Skip (encoded_catch_handler) handlers_list[].
+ for (uint32_t k = 0; k < handlers_size; ++k) {
+ int32_t encoded_catch_handler_size = 0;
+ if (!source_.GetSleb128(&encoded_catch_handler_size))
+ return kInvalidOffset;
+ const size_type abs_size = std::abs(encoded_catch_handler_size);
+ if (source_.Remaining() < abs_size) // Sanity check.
+ return kInvalidOffset;
+ // Skip (encoded_type_addr_pair) handlers[].
+ for (size_type j = 0; j < abs_size; ++j) {
+ if (!source_.SkipLeb128() || !source_.SkipLeb128())
+ return kInvalidOffset;
+ }
+ // Skip catch_all_addr.
+ if (encoded_catch_handler_size <= 0) {
+ if (!source_.SkipLeb128())
+ return kInvalidOffset;
+ }
+ }
+ }
+ // Success! |code_item->insns_size| is validated, but its content is still
+ // considered unsafe and requires validation.
+ return code_item_offset;
+ }
+
+ // Given |code_item_offset| that points to the start of a valid code item in
+ // |image|, returns |insns| bytes as ConstBufferView.
+ static ConstBufferView GetCodeItemInsns(ConstBufferView image,
+ offset_t code_item_offset) {
+ BufferSource source(BufferSource(image).Skip(code_item_offset));
+ const auto* code_item = source.GetPointer<const dex::CodeItem>();
+ DCHECK(code_item);
+ BufferRegion insns{0, code_item->insns_size * kInstrUnitSize};
+ DCHECK(source.covers(insns));
+ return source[insns];
+ }
+
+ private:
+ ConstBufferView image_;
+ BufferSource source_;
+};
+
+/******** InstructionParser ********/
+
+// A class that successively reads |code_item| for Dalvik instructions, which
+// are found at |insns|, spanning |insns_size| uint16_t "units". These units
+// store instructions followed by optional non-instruction "payload". Finding
+// payload boundary requires parsing: On finding an instruction that uses (and
+// points to) payload, the boundary is updated.
+class InstructionParser {
+ public:
+ struct Value {
+ offset_t instr_offset;
+ const dex::Instruction* instr = nullptr; // null for unknown instructions.
+ };
+
+ // Returns pointer to DEX Instruction data for |opcode|, or null if |opcode|
+ // is unknown. An internal initialize-on-first-use table is used for fast
+ // lookup.
+ const dex::Instruction* FindDalvikInstruction(uint8_t opcode) {
+ static bool is_init = false;
+ static const dex::Instruction* instruction_table[256];
+ if (!is_init) {
+ is_init = true;
+ std::fill(std::begin(instruction_table), std::end(instruction_table),
+ nullptr);
+ for (const dex::Instruction& instr : dex::kByteCode) {
+ std::fill(instruction_table + instr.opcode,
+ instruction_table + instr.opcode + instr.variant, &instr);
+ }
+ }
+ return instruction_table[opcode];
+ }
+
+ InstructionParser() = default;
+
+ InstructionParser(ConstBufferView image, offset_t base_offset)
+ : image_begin_(image.begin()),
+ insns_(CodeItemParser::GetCodeItemInsns(image, base_offset)),
+ payload_boundary_(insns_.end()) {}
+
+ // Reads the next instruction. On success, makes the data read available via
+ // value() and returns true. Otherwise (done or found error) returns false.
+ bool ReadNext() {
+ // Do not scan past payload boundary.
+ if (insns_.begin() >= payload_boundary_)
+ return false;
+
+ const offset_t instr_offset =
+ base::checked_cast<offset_t>(insns_.begin() - image_begin_);
+ const uint8_t op = insns_.read<uint8_t>(0);
+ const dex::Instruction* instr = FindDalvikInstruction(op);
+
+ // Stop on finding unknown instructions. ODEX files might trigger this.
+ if (!instr) {
+ LOG(WARNING) << "Unknown Dalvik instruction detected at "
+ << AsHex<8>(instr_offset) << ".";
+ return false;
+ }
+
+ const int instr_length_units = instr->layout;
+ const size_t instr_length_bytes = instr_length_units * kInstrUnitSize;
+ if (insns_.size() < instr_length_bytes)
+ return false;
+
+ // Handle instructions with variable-length data payload (31t).
+ if (instr->opcode == 0x26 || // fill-array-data
+ instr->opcode == 0x2B || // packed-switch
+ instr->opcode == 0x2C) { // sparse-switch
+ const int32_t unsafe_payload_rel_units = insns_.read<int32_t>(2);
+ // Payload must be in current code item, after current instruction.
+ if (unsafe_payload_rel_units < instr_length_units ||
+ static_cast<uint32_t>(unsafe_payload_rel_units) >=
+ insns_.size() / kInstrUnitSize) {
+ LOG(WARNING) << "Invalid payload found.";
+ return false;
+ }
+ // Update boundary between instructions and payload.
+ const ConstBufferView::const_iterator payload_it =
+ insns_.begin() + unsafe_payload_rel_units * kInstrUnitSize;
+ payload_boundary_ = std::min(payload_boundary_, payload_it);
+ }
+
+ insns_.remove_prefix(instr_length_bytes);
+ value_ = {instr_offset, instr};
+ return true;
+ }
+
+ const Value& value() const { return value_; }
+
+ private:
+ ConstBufferView::const_iterator image_begin_;
+ ConstBufferView insns_;
+ ConstBufferView::const_iterator payload_boundary_;
+ Value value_;
+};
+
+/******** InstructionReferenceReader ********/
+
+// A class to visit |code_items|, parse instructions, and emit embedded
+// References of a type determined by |filter_| and |mapper_|. Only References
+// located in |[lo, hi)| are emitted. |lo| and |hi| are assumed to never
+// straddle the body of a Reference.
+class InstructionReferenceReader : public ReferenceReader {
+ public:
+ // A function that takes a parsed Dalvik instruction and decides whether it
+ // contains a specific type of Reference. If true, then returns the Reference
+ // location. Otherwise returns kInvalidOffset.
+ using Filter =
+ base::RepeatingCallback<offset_t(const InstructionParser::Value&)>;
+ // A function that takes Reference location from |filter_| to extract the
+ // stored target. If valid, returns it. Otherwise returns kInvalidOffset.
+ using Mapper = base::RepeatingCallback<offset_t(offset_t)>;
+
+ InstructionReferenceReader(ConstBufferView image,
+ offset_t lo,
+ offset_t hi,
+ const std::vector<offset_t>& code_item_offsets,
+ Filter&& filter,
+ Mapper&& mapper)
+ : image_(image),
+ lo_(lo),
+ hi_(hi),
+ end_it_(code_item_offsets.end()),
+ filter_(std::move(filter)),
+ mapper_(std::move(mapper)) {
+ const auto begin_it = code_item_offsets.begin();
+ // Use binary search to find the code item that contains |lo_|.
+ auto comp = [](offset_t test_offset, offset_t code_item_offset) {
+ return test_offset < code_item_offset;
+ };
+ cur_it_ = std::upper_bound(begin_it, end_it_, lo_, comp);
+ if (cur_it_ != begin_it)
+ --cur_it_;
+ parser_ = InstructionParser(image_, *cur_it_);
+ }
+
+ // ReferenceReader:
+ absl::optional<Reference> GetNext() override {
+ while (true) {
+ while (parser_.ReadNext()) {
+ const auto& v = parser_.value();
+ DCHECK_NE(v.instr, nullptr);
+ if (v.instr_offset >= hi_)
+ return absl::nullopt;
+ const offset_t location = filter_.Run(v);
+ if (location == kInvalidOffset || location < lo_)
+ continue;
+ // The general check is |location + reference_width > hi_|. However, by
+ // assumption |hi_| and |lo_| do not straddle the body of a Reference.
+ // So |reference_width| is unneeded.
+ if (location >= hi_)
+ return absl::nullopt;
+ offset_t target = mapper_.Run(location);
+ if (target != kInvalidOffset)
+ return Reference{location, target};
+ else
+ LOG(WARNING) << "Invalid target at " << AsHex<8>(location) << ".";
+ }
+ ++cur_it_;
+ if (cur_it_ == end_it_)
+ return absl::nullopt;
+ parser_ = InstructionParser(image_, *cur_it_);
+ }
+ }
+
+ private:
+ const ConstBufferView image_;
+ const offset_t lo_;
+ const offset_t hi_;
+ const std::vector<offset_t>::const_iterator end_it_;
+ const Filter filter_;
+ const Mapper mapper_;
+ std::vector<offset_t>::const_iterator cur_it_;
+ InstructionParser parser_;
+};
+
+/******** ItemReferenceReader ********/
+
+// A class to visit fixed-size item elements (determined by |item_size|) and
+// emit a "member variable of interest" (MVI, determined by |rel_location| and
+// |mapper|) as Reference. Only MVIs lying in |[lo, hi)| are emitted. |lo| and
+// |hi| are assumed to never straddle the body of a Reference.
+class ItemReferenceReader : public ReferenceReader {
+ public:
+ // A function that takes an MVI's location and emit its target offset.
+ using Mapper = base::RepeatingCallback<offset_t(offset_t)>;
+
+ // |item_size| is the size of a fixed-size item. |rel_location| is the
+ // relative location of MVI from the start of the item containing it.
+ ItemReferenceReader(offset_t lo,
+ offset_t hi,
+ const dex::MapItem& map_item,
+ size_t item_size,
+ size_t rel_location,
+ Mapper&& mapper)
+ : hi_(hi),
+ item_base_offset_(base::checked_cast<offset_t>(map_item.offset)),
+ num_items_(base::checked_cast<uint32_t>(map_item.size)),
+ item_size_(base::checked_cast<uint32_t>(item_size)),
+ rel_location_(base::checked_cast<uint32_t>(rel_location)),
+ mapper_(std::move(mapper)) {
+ static_assert(sizeof(decltype(map_item.offset)) <= sizeof(offset_t),
+ "map_item.offset too large.");
+ static_assert(sizeof(decltype(map_item.size)) <= sizeof(offset_t),
+ "map_item.size too large.");
+ if (!item_base_offset_) {
+ // Empty item: Assign |cur_idx| to |num_items_| to skip everything.
+ cur_idx_ = num_items_;
+ } else if (lo < item_base_offset_) {
+ cur_idx_ = 0;
+ } else if (lo < OffsetOfIndex(num_items_)) {
+ cur_idx_ = (lo - item_base_offset_) / item_size_;
+ // Fine-tune: Advance if |lo| lies beyond the MVI.
+ if (lo > OffsetOfIndex(cur_idx_) + rel_location_)
+ ++cur_idx_;
+ } else {
+ cur_idx_ = num_items_;
+ }
+ }
+
+ // ReferenceReader:
+ absl::optional<Reference> GetNext() override {
+ while (cur_idx_ < num_items_) {
+ const offset_t item_offset = OffsetOfIndex(cur_idx_);
+ const offset_t location = item_offset + rel_location_;
+ // The general check is |location + reference_width > hi_|. However, by
+ // assumption |hi_| and |lo_| do not straddle the body of a Reference. So
+ // |reference_width| is unneeded.
+ if (location >= hi_)
+ break;
+ const offset_t target = mapper_.Run(location);
+
+ // kDexSentinelOffset (0) may appear for the following:
+ // - ProtoIdItem: parameters_off.
+ // - ClassDefItem: interfaces_off, annotations_off, class_data_off,
+ // static_values_off.
+ // - AnnotationsDirectoryItem: class_annotations_off.
+ // - AnnotationSetRefItem: annotations_off.
+ // kDexSentinelIndexAsOffset (0xFFFFFFFF) may appear for the following:
+ // - ClassDefItem: superclass_idx, source_file_idx.
+ if (target == kDexSentinelOffset || target == kDexSentinelIndexAsOffset) {
+ ++cur_idx_;
+ continue;
+ }
+
+ if (target == kInvalidOffset) {
+ LOG(WARNING) << "Invalid item target at " << AsHex<8>(location) << ".";
+ break;
+ }
+ ++cur_idx_;
+ return Reference{location, target};
+ }
+ return absl::nullopt;
+ }
+
+ private:
+ offset_t OffsetOfIndex(uint32_t idx) {
+ return base::checked_cast<uint32_t>(item_base_offset_ + idx * item_size_);
+ }
+
+ const offset_t hi_;
+ const offset_t item_base_offset_;
+ const uint32_t num_items_;
+ const uint32_t item_size_;
+ const uint32_t rel_location_;
+ const Mapper mapper_;
+ offset_t cur_idx_ = 0;
+};
+
+// Parses a flattened jagged list of lists of items that looks like:
+// NTTT|NTT|NTTTT|N|NTT...
+// where |N| is an uint32_t representing the number of items in each sub-list,
+// and "T" is a fixed-size item (|item_width|) of type "T". On success, stores
+// the offset of each |T| into |item_offsets|, and returns true. Otherwise
+// (e.g., on finding any structural problem) returns false.
+bool ParseItemOffsets(ConstBufferView image,
+ const dex::MapItem& map_item,
+ size_t item_width,
+ std::vector<offset_t>* item_offsets) {
+ // Sanity check: |image| should at least fit |map_item.size| copies of "N".
+ if (!image.covers_array(map_item.offset, map_item.size, sizeof(uint32_t)))
+ return false;
+ BufferSource source = std::move(BufferSource(image).Skip(map_item.offset));
+ item_offsets->clear();
+ for (uint32_t i = 0; i < map_item.size; ++i) {
+ if (!source.AlignOn(image, 4U))
+ return false;
+ uint32_t unsafe_size;
+ if (!source.GetValue<uint32_t>(&unsafe_size))
+ return false;
+ DCHECK(Is32BitAligned(
+ base::checked_cast<offset_t>(source.begin() - image.begin())));
+ if (!source.covers_array(0, unsafe_size, item_width))
+ return false;
+ for (uint32_t j = 0; j < unsafe_size; ++j) {
+ item_offsets->push_back(
+ base::checked_cast<offset_t>(source.begin() - image.begin()));
+ source.Skip(item_width);
+ }
+ }
+ return true;
+}
+
+// Parses AnnotationDirectoryItems of the format (using RegEx) "(AF*M*P*)*",
+// where:
+// A = AnnotationsDirectoryItem (contains class annotation),
+// F = FieldAnnotation,
+// M = MethodAnnotation,
+// P = ParameterAnnotation.
+// On success, stores the offsets of each class, field, method and parameter
+// annotation for each item into |*_annotation_offsets|. Otherwise on finding
+// structural issues returns false.
+bool ParseAnnotationsDirectoryItems(
+ ConstBufferView image,
+ const dex::MapItem& annotations_directory_map_item,
+ std::vector<offset_t>* annotations_directory_item_offsets,
+ std::vector<offset_t>* field_annotation_offsets,
+ std::vector<offset_t>* method_annotation_offsets,
+ std::vector<offset_t>* parameter_annotation_offsets) {
+ // Sanity check: |image| should at least fit
+ // |annotations_directory_map_item.size| copies of "A".
+ if (!image.covers_array(annotations_directory_map_item.offset,
+ annotations_directory_map_item.size,
+ sizeof(dex::AnnotationsDirectoryItem))) {
+ return false;
+ }
+ BufferSource source = std::move(
+ BufferSource(image).Skip(annotations_directory_map_item.offset));
+ annotations_directory_item_offsets->clear();
+ field_annotation_offsets->clear();
+ method_annotation_offsets->clear();
+ parameter_annotation_offsets->clear();
+
+ // Helper to process sublists.
+ auto parse_list = [&source, image](uint32_t unsafe_size, size_t item_width,
+ std::vector<offset_t>* item_offsets) {
+ DCHECK(Is32BitAligned(
+ base::checked_cast<offset_t>(source.begin() - image.begin())));
+ if (!source.covers_array(0, unsafe_size, item_width))
+ return false;
+ item_offsets->reserve(item_offsets->size() + unsafe_size);
+ for (uint32_t i = 0; i < unsafe_size; ++i) {
+ item_offsets->push_back(
+ base::checked_cast<offset_t>(source.begin() - image.begin()));
+ source.Skip(item_width);
+ }
+ return true;
+ };
+
+ annotations_directory_item_offsets->reserve(
+ annotations_directory_map_item.size);
+ for (uint32_t i = 0; i < annotations_directory_map_item.size; ++i) {
+ if (!source.AlignOn(image, 4U))
+ return false;
+ // Parse header.
+ annotations_directory_item_offsets->push_back(
+ base::checked_cast<offset_t>(source.begin() - image.begin()));
+ dex::AnnotationsDirectoryItem unsafe_annotations_directory_item;
+ if (!source.GetValue(&unsafe_annotations_directory_item))
+ return false;
+ // Parse sublists.
+ if (!(parse_list(unsafe_annotations_directory_item.fields_size,
+ sizeof(dex::FieldAnnotation), field_annotation_offsets) &&
+ parse_list(unsafe_annotations_directory_item.annotated_methods_size,
+ sizeof(dex::MethodAnnotation),
+ method_annotation_offsets) &&
+ parse_list(
+ unsafe_annotations_directory_item.annotated_parameters_size,
+ sizeof(dex::ParameterAnnotation),
+ parameter_annotation_offsets))) {
+ return false;
+ }
+ }
+ return true;
+}
+
+/******** CachedItemListReferenceReader ********/
+
+// A class that takes sorted |item_offsets|, and emits all member variable of
+// interest (MVIs) that fall inside |[lo, hi)|. The MVI of each item has
+// location of |rel_location| from item offset, and has target extracted with
+// |mapper| (which performs validation). By the "atomicity assumption",
+// [|lo, hi)| never cut across an MVI.
+class CachedItemListReferenceReader : public ReferenceReader {
+ public:
+ // A function that takes an MVI's location and emit its target offset.
+ using Mapper = base::RepeatingCallback<offset_t(offset_t)>;
+
+ CachedItemListReferenceReader(offset_t lo,
+ offset_t hi,
+ uint32_t rel_location,
+ const std::vector<offset_t>& item_offsets,
+ Mapper&& mapper)
+ : hi_(hi),
+ rel_location_(rel_location),
+ end_it_(item_offsets.cend()),
+ mapper_(mapper) {
+ cur_it_ = std::upper_bound(item_offsets.cbegin(), item_offsets.cend(), lo);
+ // Adding |rel_location_| is necessary as references can be offset from the
+ // start of the item.
+ if (cur_it_ != item_offsets.begin() && *(cur_it_ - 1) + rel_location_ >= lo)
+ --cur_it_;
+ }
+ CachedItemListReferenceReader(const CachedItemListReferenceReader&) = delete;
+ const CachedItemListReferenceReader& operator=(
+ const CachedItemListReferenceReader&) = delete;
+
+ // ReferenceReader:
+ absl::optional<Reference> GetNext() override {
+ while (cur_it_ < end_it_) {
+ const offset_t location = *cur_it_ + rel_location_;
+ if (location >= hi_) // Check is simplified by atomicity assumption.
+ break;
+ const offset_t target = mapper_.Run(location);
+ if (target == kInvalidOffset) {
+ LOG(WARNING) << "Invalid item target at " << AsHex<8>(location) << ".";
+ break;
+ }
+ ++cur_it_;
+
+ // kDexSentinelOffset is a sentinel for;
+ // - AnnotationsDirectoryItem: class_annotations_off
+ if (target == kDexSentinelOffset)
+ continue;
+ return Reference{location, target};
+ }
+ return absl::nullopt;
+ }
+
+ private:
+ const offset_t hi_;
+ const uint32_t rel_location_;
+ const std::vector<offset_t>::const_iterator end_it_;
+ const Mapper mapper_;
+ std::vector<offset_t>::const_iterator cur_it_;
+};
+
+// Reads an INT index at |location| in |image| and translates the index to the
+// offset of a fixed-size item specified by |target_map_item| and
+// |target_item_size|. Returns the target offset if valid, or kInvalidOffset
+// otherwise. This is compatible with
+// CachedReferenceListReferenceReader::Mapper,
+// InstructionReferenceReader::Mapper, and ItemReferenceReader::Mapper.
+template <typename INT>
+static offset_t ReadTargetIndex(ConstBufferView image,
+ const dex::MapItem& target_map_item,
+ size_t target_item_size,
+ offset_t location) {
+ static_assert(sizeof(INT) <= sizeof(offset_t),
+ "INT may not fit into offset_t.");
+ const offset_t unsafe_idx = image.read<INT>(location);
+ // kDexSentinalIndexAsOffset (0xFFFFFFFF) is a sentinel for
+ // - ClassDefItem: superclass_idx, source_file_idx.
+ if (unsafe_idx == kDexSentinelIndexAsOffset)
+ return unsafe_idx;
+ if (unsafe_idx >= target_map_item.size)
+ return kInvalidOffset;
+ return target_map_item.offset +
+ base::checked_cast<offset_t>(unsafe_idx * target_item_size);
+}
+
+// Reads uint32_t value in |image| at (valid) |location| and checks whether it
+// is a safe offset of a fixed-size item. Returns the target offset (possibly a
+// sentinel) if valid, or kInvalidOffset otherwise. This is compatible with
+// CachedReferenceListReferenceReader::Mapper,
+// InstructionReferenceReader::Mapper, and ItemReferenceReader::Mapper.
+static offset_t ReadTargetOffset32(ConstBufferView image, offset_t location) {
+ const offset_t unsafe_target =
+ static_cast<offset_t>(image.read<uint32_t>(location));
+ // Skip and don't validate kDexSentinelOffset as it is indicative of an
+ // empty reference.
+ if (unsafe_target == kDexSentinelOffset)
+ return unsafe_target;
+
+ // TODO(huangs): Check that |unsafe_target| is within the correct data
+ // section.
+ if (unsafe_target >= image.size())
+ return kInvalidOffset;
+ return unsafe_target;
+}
+
+/******** ReferenceWriterAdaptor ********/
+
+// A ReferenceWriter that adapts a callback that performs type-specific
+// Reference writes.
+class ReferenceWriterAdaptor : public ReferenceWriter {
+ public:
+ using Writer = base::RepeatingCallback<void(Reference, MutableBufferView)>;
+
+ ReferenceWriterAdaptor(MutableBufferView image, Writer&& writer)
+ : image_(image), writer_(std::move(writer)) {}
+
+ // ReferenceWriter:
+ void PutNext(Reference ref) override { writer_.Run(ref, image_); }
+
+ private:
+ MutableBufferView image_;
+ Writer writer_;
+};
+
+// Helper that's compatible with ReferenceWriterAdaptor::Writer.
+// Given that |ref.target| points to the start of a fixed size DEX item (e.g.,
+// FieldIdItem), translates |ref.target| to item index, and writes the result to
+// |ref.location| as |INT|.
+template <typename INT>
+static void WriteTargetIndex(const dex::MapItem& target_map_item,
+ size_t target_item_size,
+ Reference ref,
+ MutableBufferView image) {
+ const size_t unsafe_idx =
+ (ref.target - target_map_item.offset) / target_item_size;
+ // Verify that index is within bound.
+ if (unsafe_idx >= target_map_item.size) {
+ LOG(ERROR) << "Target index out of bounds at: " << AsHex<8>(ref.location)
+ << ".";
+ return;
+ }
+ // Verify that |ref.target| points to start of item.
+ DCHECK_EQ(ref.target, target_map_item.offset + unsafe_idx * target_item_size);
+ image.write<INT>(ref.location, base::checked_cast<INT>(unsafe_idx));
+}
+
+// Buffer for ReadDexHeader() to optionally return results.
+struct ReadDexHeaderResults {
+ BufferSource source;
+ const dex::HeaderItem* header;
+ int dex_version;
+};
+
+// Returns whether |image| points to a DEX file. If this is a possibility and
+// |opt_results| is not null, then uses it to pass extracted data to enable
+// further parsing.
+bool ReadDexHeader(ConstBufferView image, ReadDexHeaderResults* opt_results) {
+ // This part needs to be fairly efficient since it may be called many times.
+ BufferSource source(image);
+ const dex::HeaderItem* header = source.GetPointer<dex::HeaderItem>();
+ if (!header)
+ return false;
+ if (header->magic[0] != 'd' || header->magic[1] != 'e' ||
+ header->magic[2] != 'x' || header->magic[3] != '\n' ||
+ header->magic[7] != '\0') {
+ return false;
+ }
+
+ // Magic matches: More detailed tests can be conducted.
+ int dex_version = 0;
+ for (int i = 4; i < 7; ++i) {
+ if (!isdigit(header->magic[i]))
+ return false;
+ dex_version = dex_version * 10 + (header->magic[i] - '0');
+ }
+
+ // Only support DEX versions 35 and 37.
+ // TODO(huangs): Handle version 38.
+ if (dex_version != 35 && dex_version != 37)
+ return false;
+
+ if (header->file_size > image.size() ||
+ header->file_size < sizeof(dex::HeaderItem) ||
+ header->map_off < sizeof(dex::HeaderItem)) {
+ return false;
+ }
+
+ if (opt_results)
+ *opt_results = {source, header, dex_version};
+ return true;
+}
+
+} // namespace
+
+/******** DisassemblerDex ********/
+
+DisassemblerDex::DisassemblerDex() : Disassembler(4) {}
+
+DisassemblerDex::~DisassemblerDex() = default;
+
+// static.
+bool DisassemblerDex::QuickDetect(ConstBufferView image) {
+ return ReadDexHeader(image, nullptr);
+}
+
+ExecutableType DisassemblerDex::GetExeType() const {
+ return kExeTypeDex;
+}
+
+std::string DisassemblerDex::GetExeTypeString() const {
+ return base::StringPrintf("DEX (version %d)", dex_version_);
+}
+
+std::vector<ReferenceGroup> DisassemblerDex::MakeReferenceGroups() const {
+ // Must follow DisassemblerDex::ReferenceType order. Initialized on first use.
+ return {
+ {{4, TypeTag(kTypeIdToDescriptorStringId), PoolTag(kStringId)},
+ &DisassemblerDex::MakeReadTypeIdToDescriptorStringId32,
+ &DisassemblerDex::MakeWriteStringId32},
+ {{4, TypeTag(kProtoIdToShortyStringId), PoolTag(kStringId)},
+ &DisassemblerDex::MakeReadProtoIdToShortyStringId32,
+ &DisassemblerDex::MakeWriteStringId32},
+ {{4, TypeTag(kFieldIdToNameStringId), PoolTag(kStringId)},
+ &DisassemblerDex::MakeReadFieldToNameStringId32,
+ &DisassemblerDex::MakeWriteStringId32},
+ {{4, TypeTag(kMethodIdToNameStringId), PoolTag(kStringId)},
+ &DisassemblerDex::MakeReadMethodIdToNameStringId32,
+ &DisassemblerDex::MakeWriteStringId32},
+ {{4, TypeTag(kClassDefToSourceFileStringId), PoolTag(kStringId)},
+ &DisassemblerDex::MakeReadClassDefToSourceFileStringId32,
+ &DisassemblerDex::MakeWriteStringId32},
+ {{2, TypeTag(kCodeToStringId16), PoolTag(kStringId)},
+ &DisassemblerDex::MakeReadCodeToStringId16,
+ &DisassemblerDex::MakeWriteStringId16},
+ {{4, TypeTag(kCodeToStringId32), PoolTag(kStringId)},
+ &DisassemblerDex::MakeReadCodeToStringId32,
+ &DisassemblerDex::MakeWriteStringId32},
+ {{4, TypeTag(kProtoIdToReturnTypeId), PoolTag(kTypeId)},
+ &DisassemblerDex::MakeReadProtoIdToReturnTypeId32,
+ &DisassemblerDex::MakeWriteTypeId32},
+ {{2, TypeTag(kFieldIdToClassTypeId), PoolTag(kTypeId)},
+ &DisassemblerDex::MakeReadFieldToClassTypeId16,
+ &DisassemblerDex::MakeWriteTypeId16},
+ {{2, TypeTag(kFieldIdToTypeId), PoolTag(kTypeId)},
+ &DisassemblerDex::MakeReadFieldToTypeId16,
+ &DisassemblerDex::MakeWriteTypeId16},
+ {{2, TypeTag(kMethodIdToClassTypeId), PoolTag(kTypeId)},
+ &DisassemblerDex::MakeReadMethodIdToClassTypeId16,
+ &DisassemblerDex::MakeWriteTypeId16},
+ {{4, TypeTag(kClassDefToClassTypeId), PoolTag(kTypeId)},
+ &DisassemblerDex::MakeReadClassDefToClassTypeId32,
+ &DisassemblerDex::MakeWriteTypeId32},
+ {{4, TypeTag(kClassDefToSuperClassTypeId), PoolTag(kTypeId)},
+ &DisassemblerDex::MakeReadClassDefToSuperClassTypeId32,
+ &DisassemblerDex::MakeWriteTypeId32},
+ {{2, TypeTag(kTypeListToTypeId), PoolTag(kTypeId)},
+ &DisassemblerDex::MakeReadTypeListToTypeId16,
+ &DisassemblerDex::MakeWriteTypeId16},
+ {{2, TypeTag(kCodeToTypeId), PoolTag(kTypeId)},
+ &DisassemblerDex::MakeReadCodeToTypeId16,
+ &DisassemblerDex::MakeWriteTypeId16},
+ {{2, TypeTag(kMethodIdToProtoId), PoolTag(kProtoId)},
+ &DisassemblerDex::MakeReadMethodIdToProtoId16,
+ &DisassemblerDex::MakeWriteProtoId16},
+ {{2, TypeTag(kCodeToFieldId), PoolTag(kFieldId)},
+ &DisassemblerDex::MakeReadCodeToFieldId16,
+ &DisassemblerDex::MakeWriteFieldId16},
+ {{4, TypeTag(kAnnotationsDirectoryToFieldId), PoolTag(kFieldId)},
+ &DisassemblerDex::MakeReadAnnotationsDirectoryToFieldId32,
+ &DisassemblerDex::MakeWriteFieldId32},
+ {{2, TypeTag(kCodeToMethodId), PoolTag(kMethodId)},
+ &DisassemblerDex::MakeReadCodeToMethodId16,
+ &DisassemblerDex::MakeWriteMethodId16},
+ {{4, TypeTag(kAnnotationsDirectoryToMethodId), PoolTag(kMethodId)},
+ &DisassemblerDex::MakeReadAnnotationsDirectoryToMethodId32,
+ &DisassemblerDex::MakeWriteMethodId32},
+ {{4, TypeTag(kAnnotationsDirectoryToParameterMethodId),
+ PoolTag(kMethodId)},
+ &DisassemblerDex::MakeReadAnnotationsDirectoryToParameterMethodId32,
+ &DisassemblerDex::MakeWriteMethodId32},
+ {{4, TypeTag(kProtoIdToParametersTypeList), PoolTag(kTypeList)},
+ &DisassemblerDex::MakeReadProtoIdToParametersTypeList,
+ &DisassemblerDex::MakeWriteAbs32},
+ {{4, TypeTag(kClassDefToInterfacesTypeList), PoolTag(kTypeList)},
+ &DisassemblerDex::MakeReadClassDefToInterfacesTypeList,
+ &DisassemblerDex::MakeWriteAbs32},
+ {{4, TypeTag(kAnnotationsDirectoryToParameterAnnotationSetRef),
+ PoolTag(kAnnotationSetRefList)},
+ &DisassemblerDex::
+ MakeReadAnnotationsDirectoryToParameterAnnotationSetRef,
+ &DisassemblerDex::MakeWriteAbs32},
+ {{4, TypeTag(kAnnotationSetRefListToAnnotationSet),
+ PoolTag(kAnnotionSet)},
+ &DisassemblerDex::MakeReadAnnotationSetRefListToAnnotationSet,
+ &DisassemblerDex::MakeWriteAbs32},
+ {{4, TypeTag(kAnnotationsDirectoryToClassAnnotationSet),
+ PoolTag(kAnnotionSet)},
+ &DisassemblerDex::MakeReadAnnotationsDirectoryToClassAnnotationSet,
+ &DisassemblerDex::MakeWriteAbs32},
+ {{4, TypeTag(kAnnotationsDirectoryToFieldAnnotationSet),
+ PoolTag(kAnnotionSet)},
+ &DisassemblerDex::MakeReadAnnotationsDirectoryToFieldAnnotationSet,
+ &DisassemblerDex::MakeWriteAbs32},
+ {{4, TypeTag(kAnnotationsDirectoryToMethodAnnotationSet),
+ PoolTag(kAnnotionSet)},
+ &DisassemblerDex::MakeReadAnnotationsDirectoryToMethodAnnotationSet,
+ &DisassemblerDex::MakeWriteAbs32},
+ {{4, TypeTag(kClassDefToClassData), PoolTag(kClassData)},
+ &DisassemblerDex::MakeReadClassDefToClassData,
+ &DisassemblerDex::MakeWriteAbs32},
+ {{1, TypeTag(kCodeToRelCode8), PoolTag(kCode)},
+ &DisassemblerDex::MakeReadCodeToRelCode8,
+ &DisassemblerDex::MakeWriteRelCode8},
+ {{2, TypeTag(kCodeToRelCode16), PoolTag(kCode)},
+ &DisassemblerDex::MakeReadCodeToRelCode16,
+ &DisassemblerDex::MakeWriteRelCode16},
+ {{4, TypeTag(kCodeToRelCode32), PoolTag(kCode)},
+ &DisassemblerDex::MakeReadCodeToRelCode32,
+ &DisassemblerDex::MakeWriteRelCode32},
+ {{4, TypeTag(kStringIdToStringData), PoolTag(kStringData)},
+ &DisassemblerDex::MakeReadStringIdToStringData,
+ &DisassemblerDex::MakeWriteAbs32},
+ {{4, TypeTag(kAnnotationSetToAnnotation), PoolTag(kAnnotation)},
+ &DisassemblerDex::MakeReadAnnotationSetToAnnotation,
+ &DisassemblerDex::MakeWriteAbs32},
+ {{4, TypeTag(kClassDefToStaticValuesEncodedArray),
+ PoolTag(kEncodedArray)},
+ &DisassemblerDex::MakeReadClassDefToStaticValuesEncodedArray,
+ &DisassemblerDex::MakeWriteAbs32},
+ {{4, TypeTag(kClassDefToAnnotationDirectory),
+ PoolTag(kAnnotationsDirectory)},
+ &DisassemblerDex::MakeReadClassDefToAnnotationDirectory,
+ &DisassemblerDex::MakeWriteAbs32},
+ };
+}
+
+std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadStringIdToStringData(
+ offset_t lo,
+ offset_t hi) {
+ // dex::StringIdItem::string_data_off mapper.
+ auto mapper = base::BindRepeating(ReadTargetOffset32, image_);
+ return std::make_unique<ItemReferenceReader>(
+ lo, hi, string_map_item_, sizeof(dex::StringIdItem),
+ offsetof(dex::StringIdItem, string_data_off), std::move(mapper));
+}
+
+std::unique_ptr<ReferenceReader>
+DisassemblerDex::MakeReadTypeIdToDescriptorStringId32(offset_t lo,
+ offset_t hi) {
+ auto mapper = base::BindRepeating(
+ ReadTargetIndex<decltype(dex::TypeIdItem::descriptor_idx)>, image_,
+ string_map_item_, sizeof(dex::StringIdItem));
+ return std::make_unique<ItemReferenceReader>(
+ lo, hi, type_map_item_, sizeof(dex::TypeIdItem),
+ offsetof(dex::TypeIdItem, descriptor_idx), std::move(mapper));
+}
+
+std::unique_ptr<ReferenceReader>
+DisassemblerDex::MakeReadProtoIdToShortyStringId32(offset_t lo, offset_t hi) {
+ auto mapper = base::BindRepeating(
+ ReadTargetIndex<decltype(dex::ProtoIdItem::shorty_idx)>, image_,
+ string_map_item_, sizeof(dex::StringIdItem));
+ return std::make_unique<ItemReferenceReader>(
+ lo, hi, proto_map_item_, sizeof(dex::ProtoIdItem),
+ offsetof(dex::ProtoIdItem, shorty_idx), std::move(mapper));
+}
+
+std::unique_ptr<ReferenceReader>
+DisassemblerDex::MakeReadProtoIdToReturnTypeId32(offset_t lo, offset_t hi) {
+ auto mapper = base::BindRepeating(
+ ReadTargetIndex<decltype(dex::ProtoIdItem::return_type_idx)>, image_,
+ type_map_item_, sizeof(dex::TypeIdItem));
+ return std::make_unique<ItemReferenceReader>(
+ lo, hi, proto_map_item_, sizeof(dex::ProtoIdItem),
+ offsetof(dex::ProtoIdItem, return_type_idx), std::move(mapper));
+}
+
+std::unique_ptr<ReferenceReader>
+DisassemblerDex::MakeReadProtoIdToParametersTypeList(offset_t lo, offset_t hi) {
+ // dex::ProtoIdItem::parameters_off mapper.
+ auto mapper = base::BindRepeating(ReadTargetOffset32, image_);
+ return std::make_unique<ItemReferenceReader>(
+ lo, hi, proto_map_item_, sizeof(dex::ProtoIdItem),
+ offsetof(dex::ProtoIdItem, parameters_off), std::move(mapper));
+}
+
+std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadFieldToClassTypeId16(
+ offset_t lo,
+ offset_t hi) {
+ auto mapper = base::BindRepeating(
+ ReadTargetIndex<decltype(dex::FieldIdItem::class_idx)>, image_,
+ type_map_item_, sizeof(dex::TypeIdItem));
+ return std::make_unique<ItemReferenceReader>(
+ lo, hi, field_map_item_, sizeof(dex::FieldIdItem),
+ offsetof(dex::FieldIdItem, class_idx), std::move(mapper));
+}
+
+std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadFieldToTypeId16(
+ offset_t lo,
+ offset_t hi) {
+ auto mapper =
+ base::BindRepeating(ReadTargetIndex<decltype(dex::FieldIdItem::type_idx)>,
+ image_, type_map_item_, sizeof(dex::TypeIdItem));
+ return std::make_unique<ItemReferenceReader>(
+ lo, hi, field_map_item_, sizeof(dex::FieldIdItem),
+ offsetof(dex::FieldIdItem, type_idx), std::move(mapper));
+}
+
+std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadFieldToNameStringId32(
+ offset_t lo,
+ offset_t hi) {
+ auto mapper =
+ base::BindRepeating(ReadTargetIndex<decltype(dex::FieldIdItem::name_idx)>,
+ image_, string_map_item_, sizeof(dex::StringIdItem));
+ return std::make_unique<ItemReferenceReader>(
+ lo, hi, field_map_item_, sizeof(dex::FieldIdItem),
+ offsetof(dex::FieldIdItem, name_idx), std::move(mapper));
+}
+
+std::unique_ptr<ReferenceReader>
+DisassemblerDex::MakeReadMethodIdToClassTypeId16(offset_t lo, offset_t hi) {
+ auto mapper = base::BindRepeating(
+ ReadTargetIndex<decltype(dex::MethodIdItem::class_idx)>, image_,
+ type_map_item_, sizeof(dex::TypeIdItem));
+ return std::make_unique<ItemReferenceReader>(
+ lo, hi, method_map_item_, sizeof(dex::MethodIdItem),
+ offsetof(dex::MethodIdItem, class_idx), std::move(mapper));
+}
+
+std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadMethodIdToProtoId16(
+ offset_t lo,
+ offset_t hi) {
+ auto mapper = base::BindRepeating(
+ ReadTargetIndex<decltype(dex::MethodIdItem::proto_idx)>, image_,
+ proto_map_item_, sizeof(dex::ProtoIdItem));
+ return std::make_unique<ItemReferenceReader>(
+ lo, hi, method_map_item_, sizeof(dex::MethodIdItem),
+ offsetof(dex::MethodIdItem, proto_idx), std::move(mapper));
+}
+
+std::unique_ptr<ReferenceReader>
+DisassemblerDex::MakeReadMethodIdToNameStringId32(offset_t lo, offset_t hi) {
+ auto mapper = base::BindRepeating(
+ ReadTargetIndex<decltype(dex::MethodIdItem::name_idx)>, image_,
+ string_map_item_, sizeof(dex::StringIdItem));
+ return std::make_unique<ItemReferenceReader>(
+ lo, hi, method_map_item_, sizeof(dex::MethodIdItem),
+ offsetof(dex::MethodIdItem, name_idx), std::move(mapper));
+}
+
+std::unique_ptr<ReferenceReader>
+DisassemblerDex::MakeReadClassDefToClassTypeId32(offset_t lo, offset_t hi) {
+ auto mapper = base::BindRepeating(
+ ReadTargetIndex<decltype(dex::ClassDefItem::superclass_idx)>, image_,
+ type_map_item_, sizeof(dex::TypeIdItem));
+ return std::make_unique<ItemReferenceReader>(
+ lo, hi, class_def_map_item_, sizeof(dex::ClassDefItem),
+ offsetof(dex::ClassDefItem, class_idx), std::move(mapper));
+}
+
+std::unique_ptr<ReferenceReader>
+DisassemblerDex::MakeReadClassDefToSuperClassTypeId32(offset_t lo,
+ offset_t hi) {
+ auto mapper = base::BindRepeating(
+ ReadTargetIndex<decltype(dex::ClassDefItem::superclass_idx)>, image_,
+ type_map_item_, sizeof(dex::TypeIdItem));
+ return std::make_unique<ItemReferenceReader>(
+ lo, hi, class_def_map_item_, sizeof(dex::ClassDefItem),
+ offsetof(dex::ClassDefItem, superclass_idx), std::move(mapper));
+}
+
+std::unique_ptr<ReferenceReader>
+DisassemblerDex::MakeReadClassDefToInterfacesTypeList(offset_t lo,
+ offset_t hi) {
+ // dex::ClassDefItem::interfaces_off mapper.
+ auto mapper = base::BindRepeating(ReadTargetOffset32, image_);
+ return std::make_unique<ItemReferenceReader>(
+ lo, hi, class_def_map_item_, sizeof(dex::ClassDefItem),
+ offsetof(dex::ClassDefItem, interfaces_off), std::move(mapper));
+}
+
+std::unique_ptr<ReferenceReader>
+DisassemblerDex::MakeReadClassDefToSourceFileStringId32(offset_t lo,
+ offset_t hi) {
+ auto mapper = base::BindRepeating(
+ ReadTargetIndex<decltype(dex::ClassDefItem::source_file_idx)>, image_,
+ string_map_item_, sizeof(dex::StringIdItem));
+ return std::make_unique<ItemReferenceReader>(
+ lo, hi, class_def_map_item_, sizeof(dex::ClassDefItem),
+ offsetof(dex::ClassDefItem, source_file_idx), std::move(mapper));
+}
+
+std::unique_ptr<ReferenceReader>
+DisassemblerDex::MakeReadClassDefToAnnotationDirectory(offset_t lo,
+ offset_t hi) {
+ // dex::ClassDefItem::annotations_off mapper.
+ auto mapper = base::BindRepeating(ReadTargetOffset32, image_);
+ return std::make_unique<ItemReferenceReader>(
+ lo, hi, class_def_map_item_, sizeof(dex::ClassDefItem),
+ offsetof(dex::ClassDefItem, annotations_off), std::move(mapper));
+}
+
+std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadClassDefToClassData(
+ offset_t lo,
+ offset_t hi) {
+ // dex::ClassDefItem::class_data_off mapper.
+ auto mapper = base::BindRepeating(ReadTargetOffset32, image_);
+ return std::make_unique<ItemReferenceReader>(
+ lo, hi, class_def_map_item_, sizeof(dex::ClassDefItem),
+ offsetof(dex::ClassDefItem, class_data_off), std::move(mapper));
+}
+
+std::unique_ptr<ReferenceReader>
+DisassemblerDex::MakeReadClassDefToStaticValuesEncodedArray(offset_t lo,
+ offset_t hi) {
+ // dex::ClassDefItem::static_values_off mapper.
+ auto mapper = base::BindRepeating(ReadTargetOffset32, image_);
+ return std::make_unique<ItemReferenceReader>(
+ lo, hi, class_def_map_item_, sizeof(dex::ClassDefItem),
+ offsetof(dex::ClassDefItem, static_values_off), std::move(mapper));
+}
+
+std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadTypeListToTypeId16(
+ offset_t lo,
+ offset_t hi) {
+ auto mapper =
+ base::BindRepeating(ReadTargetIndex<decltype(dex::TypeItem::type_idx)>,
+ image_, type_map_item_, sizeof(dex::TypeIdItem));
+ return std::make_unique<CachedItemListReferenceReader>(
+ lo, hi, offsetof(dex::TypeItem, type_idx), type_list_offsets_,
+ std::move(mapper));
+}
+
+std::unique_ptr<ReferenceReader>
+DisassemblerDex::MakeReadAnnotationSetToAnnotation(offset_t lo, offset_t hi) {
+ // dex::AnnotationOffItem::annotation_off mapper.
+ auto mapper = base::BindRepeating(ReadTargetOffset32, image_);
+ return std::make_unique<CachedItemListReferenceReader>(
+ lo, hi, offsetof(dex::AnnotationOffItem, annotation_off),
+ annotation_set_offsets_, std::move(mapper));
+}
+
+std::unique_ptr<ReferenceReader>
+DisassemblerDex::MakeReadAnnotationSetRefListToAnnotationSet(offset_t lo,
+ offset_t hi) {
+ // dex::AnnotationSetRefItem::annotations_off mapper.
+ auto mapper = base::BindRepeating(ReadTargetOffset32, image_);
+ return std::make_unique<CachedItemListReferenceReader>(
+ lo, hi, offsetof(dex::AnnotationSetRefItem, annotations_off),
+ annotation_set_ref_list_offsets_, std::move(mapper));
+}
+
+std::unique_ptr<ReferenceReader>
+DisassemblerDex::MakeReadAnnotationsDirectoryToClassAnnotationSet(offset_t lo,
+ offset_t hi) {
+ // dex::AnnotationsDirectoryItem::class_annotations_off mapper.
+ auto mapper = base::BindRepeating(ReadTargetOffset32, image_);
+ return std::make_unique<CachedItemListReferenceReader>(
+ lo, hi, offsetof(dex::AnnotationsDirectoryItem, class_annotations_off),
+ annotations_directory_item_offsets_, std::move(mapper));
+}
+
+std::unique_ptr<ReferenceReader>
+DisassemblerDex::MakeReadAnnotationsDirectoryToFieldId32(offset_t lo,
+ offset_t hi) {
+ auto mapper = base::BindRepeating(
+ ReadTargetIndex<decltype(dex::FieldAnnotation::field_idx)>, image_,
+ field_map_item_, sizeof(dex::FieldIdItem));
+ return std::make_unique<CachedItemListReferenceReader>(
+ lo, hi, offsetof(dex::FieldAnnotation, field_idx),
+ annotations_directory_item_field_annotation_offsets_, std::move(mapper));
+}
+
+std::unique_ptr<ReferenceReader>
+DisassemblerDex::MakeReadAnnotationsDirectoryToFieldAnnotationSet(offset_t lo,
+ offset_t hi) {
+ // dex::FieldAnnotation::annotations_off mapper.
+ auto mapper = base::BindRepeating(ReadTargetOffset32, image_);
+ return std::make_unique<CachedItemListReferenceReader>(
+ lo, hi, offsetof(dex::FieldAnnotation, annotations_off),
+ annotations_directory_item_field_annotation_offsets_, std::move(mapper));
+}
+
+std::unique_ptr<ReferenceReader>
+DisassemblerDex::MakeReadAnnotationsDirectoryToMethodId32(offset_t lo,
+ offset_t hi) {
+ auto mapper = base::BindRepeating(
+ ReadTargetIndex<decltype(dex::MethodAnnotation::method_idx)>, image_,
+ method_map_item_, sizeof(dex::MethodIdItem));
+ return std::make_unique<CachedItemListReferenceReader>(
+ lo, hi, offsetof(dex::MethodAnnotation, method_idx),
+ annotations_directory_item_method_annotation_offsets_, std::move(mapper));
+}
+
+std::unique_ptr<ReferenceReader>
+DisassemblerDex::MakeReadAnnotationsDirectoryToMethodAnnotationSet(
+ offset_t lo,
+ offset_t hi) {
+ // dex::MethodAnnotation::annotations_off mapper.
+ auto mapper = base::BindRepeating(ReadTargetOffset32, image_);
+ return std::make_unique<CachedItemListReferenceReader>(
+ lo, hi, offsetof(dex::MethodAnnotation, annotations_off),
+ annotations_directory_item_method_annotation_offsets_, std::move(mapper));
+}
+
+std::unique_ptr<ReferenceReader>
+DisassemblerDex::MakeReadAnnotationsDirectoryToParameterMethodId32(
+ offset_t lo,
+ offset_t hi) {
+ auto mapper = base::BindRepeating(
+ ReadTargetIndex<decltype(dex::ParameterAnnotation::method_idx)>, image_,
+ method_map_item_, sizeof(dex::MethodIdItem));
+ return std::make_unique<CachedItemListReferenceReader>(
+ lo, hi, offsetof(dex::ParameterAnnotation, method_idx),
+ annotations_directory_item_parameter_annotation_offsets_,
+ std::move(mapper));
+}
+
+std::unique_ptr<ReferenceReader>
+DisassemblerDex::MakeReadAnnotationsDirectoryToParameterAnnotationSetRef(
+ offset_t lo,
+ offset_t hi) {
+ // dex::ParameterAnnotation::annotations_off mapper.
+ auto mapper = base::BindRepeating(ReadTargetOffset32, image_);
+ return std::make_unique<CachedItemListReferenceReader>(
+ lo, hi, offsetof(dex::ParameterAnnotation, annotations_off),
+ annotations_directory_item_parameter_annotation_offsets_,
+ std::move(mapper));
+}
+
+std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadCodeToStringId16(
+ offset_t lo,
+ offset_t hi) {
+ auto filter = base::BindRepeating(
+ [](const InstructionParser::Value& value) -> offset_t {
+ if (value.instr->format == dex::FormatId::c &&
+ (value.instr->opcode == 0x1A)) { // const-string
+ // BBBB from e.g., const-string vAA, string@BBBB.
+ return value.instr_offset + 2;
+ }
+ return kInvalidOffset;
+ });
+ auto mapper =
+ base::BindRepeating(ReadTargetIndex<uint16_t>, image_, string_map_item_,
+ sizeof(dex::StringIdItem));
+ return std::make_unique<InstructionReferenceReader>(
+ image_, lo, hi, code_item_offsets_, std::move(filter), std::move(mapper));
+}
+
+std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadCodeToStringId32(
+ offset_t lo,
+ offset_t hi) {
+ auto filter = base::BindRepeating(
+ [](const InstructionParser::Value& value) -> offset_t {
+ if (value.instr->format == dex::FormatId::c &&
+ (value.instr->opcode == 0x1B)) { // const-string/jumbo
+ // BBBBBBBB from e.g., const-string/jumbo vAA, string@BBBBBBBB.
+ return value.instr_offset + 2;
+ }
+ return kInvalidOffset;
+ });
+ auto mapper =
+ base::BindRepeating(ReadTargetIndex<uint32_t>, image_, string_map_item_,
+ sizeof(dex::StringIdItem));
+ return std::make_unique<InstructionReferenceReader>(
+ image_, lo, hi, code_item_offsets_, std::move(filter), std::move(mapper));
+}
+
+std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadCodeToTypeId16(
+ offset_t lo,
+ offset_t hi) {
+ auto filter = base::BindRepeating(
+ [](const InstructionParser::Value& value) -> offset_t {
+ if (value.instr->format == dex::FormatId::c &&
+ (value.instr->opcode == 0x1C || // const-class
+ value.instr->opcode == 0x1F || // check-cast
+ value.instr->opcode == 0x20 || // instance-of
+ value.instr->opcode == 0x22 || // new-instance
+ value.instr->opcode == 0x23 || // new-array
+ value.instr->opcode == 0x24 || // filled-new-array
+ value.instr->opcode == 0x25)) { // filled-new-array/range
+ // BBBB from e.g., const-class vAA, type@BBBB.
+ return value.instr_offset + 2;
+ }
+ return kInvalidOffset;
+ });
+ auto mapper = base::BindRepeating(ReadTargetIndex<uint16_t>, image_,
+ type_map_item_, sizeof(dex::TypeIdItem));
+ return std::make_unique<InstructionReferenceReader>(
+ image_, lo, hi, code_item_offsets_, std::move(filter), std::move(mapper));
+}
+
+std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadCodeToFieldId16(
+ offset_t lo,
+ offset_t hi) {
+ auto filter = base::BindRepeating(
+ [](const InstructionParser::Value& value) -> offset_t {
+ if (value.instr->format == dex::FormatId::c &&
+ (value.instr->opcode == 0x52 || // iinstanceop (iget-*, iput-*)
+ value.instr->opcode == 0x60)) { // sstaticop (sget-*, sput-*)
+ // CCCC from e.g., iget vA, vB, field@CCCC.
+ return value.instr_offset + 2;
+ }
+ return kInvalidOffset;
+ });
+ auto mapper = base::BindRepeating(ReadTargetIndex<uint16_t>, image_,
+ field_map_item_, sizeof(dex::FieldIdItem));
+ return std::make_unique<InstructionReferenceReader>(
+ image_, lo, hi, code_item_offsets_, std::move(filter), std::move(mapper));
+}
+
+std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadCodeToMethodId16(
+ offset_t lo,
+ offset_t hi) {
+ auto filter = base::BindRepeating(
+ [](const InstructionParser::Value& value) -> offset_t {
+ if (value.instr->format == dex::FormatId::c &&
+ (value.instr->opcode == 0x6E || // invoke-kind
+ value.instr->opcode == 0x74)) { // invoke-kind/range
+ // BBBB from e.g., invoke-virtual {vC, vD, vE, vF, vG}, meth@BBBB.
+ return value.instr_offset + 2;
+ }
+ return kInvalidOffset;
+ });
+ auto mapper =
+ base::BindRepeating(ReadTargetIndex<uint16_t>, image_, method_map_item_,
+ sizeof(dex::MethodIdItem));
+ return std::make_unique<InstructionReferenceReader>(
+ image_, lo, hi, code_item_offsets_, std::move(filter), std::move(mapper));
+}
+
+std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadCodeToRelCode8(
+ offset_t lo,
+ offset_t hi) {
+ auto filter = base::BindRepeating(
+ [](const InstructionParser::Value& value) -> offset_t {
+ if (value.instr->format == dex::FormatId::t &&
+ value.instr->opcode == 0x28) { // goto
+ // +AA from e.g., goto +AA.
+ return value.instr_offset + 1;
+ }
+ return kInvalidOffset;
+ });
+ auto mapper = base::BindRepeating(
+ [](DisassemblerDex* dis, offset_t location) {
+ // Address is relative to the current instruction, which begins 1 unit
+ // before |location|. This needs to be subtracted out. Also, store as
+ // int32_t so |unsafe_delta - 1| won't underflow!
+ int32_t unsafe_delta = dis->image_.read<int8_t>(location);
+ offset_t unsafe_target = static_cast<offset_t>(
+ location + (unsafe_delta - 1) * kInstrUnitSize);
+ // TODO(huangs): Check that |unsafe_target| stays within code item.
+ return unsafe_target;
+ },
+ base::Unretained(this));
+ return std::make_unique<InstructionReferenceReader>(
+ image_, lo, hi, code_item_offsets_, std::move(filter), std::move(mapper));
+}
+
+std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadCodeToRelCode16(
+ offset_t lo,
+ offset_t hi) {
+ auto filter = base::BindRepeating(
+ [](const InstructionParser::Value& value) -> offset_t {
+ if (value.instr->format == dex::FormatId::t &&
+ (value.instr->opcode == 0x29 || // goto/16
+ value.instr->opcode == 0x32 || // if-test
+ value.instr->opcode == 0x38)) { // if-testz
+ // +AAAA from e.g., goto/16 +AAAA.
+ return value.instr_offset + 2;
+ }
+ return kInvalidOffset;
+ });
+ auto mapper = base::BindRepeating(
+ [](DisassemblerDex* dis, offset_t location) {
+ // Address is relative to the current instruction, which begins 1 unit
+ // before |location|. This needs to be subtracted out. Also, store as
+ // int32_t so |unsafe_delta - 1| won't underflow!
+ int32_t unsafe_delta = dis->image_.read<int16_t>(location);
+ offset_t unsafe_target = static_cast<offset_t>(
+ location + (unsafe_delta - 1) * kInstrUnitSize);
+ // TODO(huangs): Check that |unsafe_target| stays within code item.
+ return unsafe_target;
+ },
+ base::Unretained(this));
+ return std::make_unique<InstructionReferenceReader>(
+ image_, lo, hi, code_item_offsets_, std::move(filter), std::move(mapper));
+}
+
+std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadCodeToRelCode32(
+ offset_t lo,
+ offset_t hi) {
+ auto filter = base::BindRepeating(
+ [](const InstructionParser::Value& value) -> offset_t {
+ if (value.instr->format == dex::FormatId::t &&
+ (value.instr->opcode == 0x26 || // fill-array-data
+ value.instr->opcode == 0x2A || // goto/32
+ value.instr->opcode == 0x2B || // packed-switch
+ value.instr->opcode == 0x2C)) { // sparse-switch
+ // +BBBBBBBB from e.g., fill-array-data vAA, +BBBBBBBB.
+ // +AAAAAAAA from e.g., goto/32 +AAAAAAAA.
+ return value.instr_offset + 2;
+ }
+ return kInvalidOffset;
+ });
+ auto mapper = base::BindRepeating(
+ [](DisassemblerDex* dis, offset_t location) {
+ // Address is relative to the current instruction, which begins 1 unit
+ // before |location|. This needs to be subtracted out. Use int64_t to
+ // avoid underflow and overflow.
+ int64_t unsafe_delta = dis->image_.read<int32_t>(location);
+ int64_t unsafe_target = location + (unsafe_delta - 1) * kInstrUnitSize;
+
+ // TODO(huangs): Check that |unsafe_target| stays within code item.
+ offset_t checked_unsafe_target =
+ static_cast<offset_t>(base::CheckedNumeric<offset_t>(unsafe_target)
+ .ValueOrDefault(kInvalidOffset));
+ return checked_unsafe_target < kOffsetBound ? checked_unsafe_target
+ : kInvalidOffset;
+ },
+ base::Unretained(this));
+ return std::make_unique<InstructionReferenceReader>(
+ image_, lo, hi, code_item_offsets_, std::move(filter), std::move(mapper));
+}
+
+std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteStringId16(
+ MutableBufferView image) {
+ auto writer = base::BindRepeating(
+ WriteTargetIndex<uint16_t>, string_map_item_, sizeof(dex::StringIdItem));
+ return std::make_unique<ReferenceWriterAdaptor>(image, std::move(writer));
+}
+
+std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteStringId32(
+ MutableBufferView image) {
+ auto writer = base::BindRepeating(
+ WriteTargetIndex<uint32_t>, string_map_item_, sizeof(dex::StringIdItem));
+ return std::make_unique<ReferenceWriterAdaptor>(image, std::move(writer));
+}
+
+std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteTypeId16(
+ MutableBufferView image) {
+ auto writer = base::BindRepeating(WriteTargetIndex<uint16_t>, type_map_item_,
+ sizeof(dex::TypeIdItem));
+ return std::make_unique<ReferenceWriterAdaptor>(image, std::move(writer));
+}
+
+std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteTypeId32(
+ MutableBufferView image) {
+ auto writer = base::BindRepeating(WriteTargetIndex<uint32_t>, type_map_item_,
+ sizeof(dex::TypeIdItem));
+ return std::make_unique<ReferenceWriterAdaptor>(image, std::move(writer));
+}
+
+std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteProtoId16(
+ MutableBufferView image) {
+ auto writer = base::BindRepeating(WriteTargetIndex<uint16_t>, proto_map_item_,
+ sizeof(dex::ProtoIdItem));
+ return std::make_unique<ReferenceWriterAdaptor>(image, std::move(writer));
+}
+
+std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteFieldId16(
+ MutableBufferView image) {
+ auto writer = base::BindRepeating(WriteTargetIndex<uint16_t>, field_map_item_,
+ sizeof(dex::FieldIdItem));
+ return std::make_unique<ReferenceWriterAdaptor>(image, std::move(writer));
+}
+
+std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteFieldId32(
+ MutableBufferView image) {
+ auto writer = base::BindRepeating(WriteTargetIndex<uint32_t>, field_map_item_,
+ sizeof(dex::FieldIdItem));
+ return std::make_unique<ReferenceWriterAdaptor>(image, std::move(writer));
+}
+
+std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteMethodId16(
+ MutableBufferView image) {
+ auto writer = base::BindRepeating(
+ WriteTargetIndex<uint16_t>, method_map_item_, sizeof(dex::MethodIdItem));
+ return std::make_unique<ReferenceWriterAdaptor>(image, std::move(writer));
+}
+
+std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteMethodId32(
+ MutableBufferView image) {
+ auto writer = base::BindRepeating(
+ WriteTargetIndex<uint32_t>, method_map_item_, sizeof(dex::MethodIdItem));
+ return std::make_unique<ReferenceWriterAdaptor>(image, std::move(writer));
+}
+
+std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteRelCode8(
+ MutableBufferView image) {
+ auto writer = base::BindRepeating([](Reference ref, MutableBufferView image) {
+ ptrdiff_t unsafe_byte_diff =
+ static_cast<ptrdiff_t>(ref.target) - ref.location;
+ DCHECK_EQ(0, unsafe_byte_diff % kInstrUnitSize);
+ // |delta| is relative to start of instruction, which is 1 unit before
+ // |ref.location|. The subtraction above removed too much, so +1 to fix.
+ base::CheckedNumeric<int8_t> delta((unsafe_byte_diff / kInstrUnitSize) + 1);
+ if (!delta.IsValid()) {
+ LOG(ERROR) << "Invalid reference at: " << AsHex<8>(ref.location) << ".";
+ return;
+ }
+ image.write<int8_t>(ref.location, delta.ValueOrDie());
+ });
+ return std::make_unique<ReferenceWriterAdaptor>(image, std::move(writer));
+}
+
+std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteRelCode16(
+ MutableBufferView image) {
+ auto writer = base::BindRepeating([](Reference ref, MutableBufferView image) {
+ ptrdiff_t unsafe_byte_diff =
+ static_cast<ptrdiff_t>(ref.target) - ref.location;
+ DCHECK_EQ(0, unsafe_byte_diff % kInstrUnitSize);
+ // |delta| is relative to start of instruction, which is 1 unit before
+ // |ref.location|. The subtraction above removed too much, so +1 to fix.
+ base::CheckedNumeric<int16_t> delta((unsafe_byte_diff / kInstrUnitSize) +
+ 1);
+ if (!delta.IsValid()) {
+ LOG(ERROR) << "Invalid reference at: " << AsHex<8>(ref.location) << ".";
+ return;
+ }
+ image.write<int16_t>(ref.location, delta.ValueOrDie());
+ });
+ return std::make_unique<ReferenceWriterAdaptor>(image, std::move(writer));
+}
+
+std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteRelCode32(
+ MutableBufferView image) {
+ auto writer = base::BindRepeating([](Reference ref, MutableBufferView image) {
+ ptrdiff_t unsafe_byte_diff =
+ static_cast<ptrdiff_t>(ref.target) - ref.location;
+ DCHECK_EQ(0, unsafe_byte_diff % kInstrUnitSize);
+ // |delta| is relative to start of instruction, which is 1 unit before
+ // |ref.location|. The subtraction above removed too much, so +1 to fix.
+ base::CheckedNumeric<int32_t> delta((unsafe_byte_diff / kInstrUnitSize) +
+ 1);
+ if (!delta.IsValid()) {
+ LOG(ERROR) << "Invalid reference at: " << AsHex<8>(ref.location) << ".";
+ return;
+ }
+ image.write<int32_t>(ref.location, delta.ValueOrDie());
+ });
+ return std::make_unique<ReferenceWriterAdaptor>(image, std::move(writer));
+}
+
+std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteAbs32(
+ MutableBufferView image) {
+ auto writer = base::BindRepeating([](Reference ref, MutableBufferView image) {
+ image.write<uint32_t>(ref.location, ref.target);
+ });
+ return std::make_unique<ReferenceWriterAdaptor>(image, std::move(writer));
+}
+
+bool DisassemblerDex::Parse(ConstBufferView image) {
+ image_ = image;
+ return ParseHeader();
+}
+
+bool DisassemblerDex::ParseHeader() {
+ ReadDexHeaderResults results;
+ if (!ReadDexHeader(image_, &results))
+ return false;
+
+ header_ = results.header;
+ dex_version_ = results.dex_version;
+ BufferSource source = results.source;
+
+ // DEX header contains file size, so use it to resize |image_| right away.
+ image_.shrink(header_->file_size);
+
+ // Read map list. This is not a fixed-size array, so instead of reading
+ // MapList directly, read |MapList::size| first, then visit elements in
+ // |MapList::list|.
+ static_assert(
+ offsetof(dex::MapList, list) == sizeof(decltype(dex::MapList::size)),
+ "MapList size error.");
+ source = std::move(BufferSource(image_).Skip(header_->map_off));
+ decltype(dex::MapList::size) list_size = 0;
+ if (!source.GetValue(&list_size) || list_size > dex::kMaxItemListSize)
+ return false;
+ const auto* item_list = source.GetArray<const dex::MapItem>(list_size);
+ if (!item_list)
+ return false;
+
+ // Read and validate map list, ensuring that required item types are present.
+ // - GetItemBaseSize() should have an entry for each item.
+ // - dex::kTypeCodeItem is actually not required; it's possible to have a DEX
+ // file with classes that have no code. However, this is unlikely to appear
+ // in application, so for simplicity we require DEX files to have code.
+ std::set<uint16_t> required_item_types = {
+ dex::kTypeStringIdItem, dex::kTypeTypeIdItem, dex::kTypeProtoIdItem,
+ dex::kTypeFieldIdItem, dex::kTypeMethodIdItem, dex::kTypeClassDefItem,
+ dex::kTypeTypeList, dex::kTypeCodeItem,
+ };
+ for (offset_t i = 0; i < list_size; ++i) {
+ const dex::MapItem* item = &item_list[i];
+ // Reject unreasonably large |item->size|.
+ size_t item_size = GetItemBaseSize(item->type);
+ // Confusing name: |item->size| is actually the number of items.
+ if (!image_.covers_array(item->offset, item->size, item_size))
+ return false;
+ if (!map_item_map_.insert(std::make_pair(item->type, item)).second)
+ return false; // A given type must appear at most once.
+ required_item_types.erase(item->type);
+ }
+ // TODO(huangs): Replace this with guards throughout file.
+ if (!required_item_types.empty())
+ return false;
+
+ // Make local copies of main map items.
+ string_map_item_ = *map_item_map_[dex::kTypeStringIdItem];
+ type_map_item_ = *map_item_map_[dex::kTypeTypeIdItem];
+ proto_map_item_ = *map_item_map_[dex::kTypeProtoIdItem];
+ field_map_item_ = *map_item_map_[dex::kTypeFieldIdItem];
+ method_map_item_ = *map_item_map_[dex::kTypeMethodIdItem];
+ class_def_map_item_ = *map_item_map_[dex::kTypeClassDefItem];
+ type_list_map_item_ = *map_item_map_[dex::kTypeTypeList];
+ code_map_item_ = *map_item_map_[dex::kTypeCodeItem];
+
+ // The following types are optional and may not be present in every DEX file.
+ if (map_item_map_.count(dex::kTypeAnnotationSetRefList)) {
+ annotation_set_ref_list_map_item_ =
+ *map_item_map_[dex::kTypeAnnotationSetRefList];
+ }
+ if (map_item_map_.count(dex::kTypeAnnotationSetItem))
+ annotation_set_map_item_ = *map_item_map_[dex::kTypeAnnotationSetItem];
+ if (map_item_map_.count(dex::kTypeAnnotationsDirectoryItem)) {
+ annotations_directory_map_item_ =
+ *map_item_map_[dex::kTypeAnnotationsDirectoryItem];
+ }
+
+ // Iteratively parse variable length lists, annotations directory items, and
+ // code items blocks. Any failure would indicate invalid DEX. Success
+ // indicates that no structural problem is found. However, contained
+ // references data read from parsed items still require validation.
+ if (!(ParseItemOffsets(image_, type_list_map_item_, sizeof(dex::TypeItem),
+ &type_list_offsets_) &&
+ ParseItemOffsets(image_, annotation_set_ref_list_map_item_,
+ sizeof(dex::AnnotationSetRefItem),
+ &annotation_set_ref_list_offsets_) &&
+ ParseItemOffsets(image_, annotation_set_map_item_,
+ sizeof(dex::AnnotationOffItem),
+ &annotation_set_offsets_) &&
+ ParseAnnotationsDirectoryItems(
+ image_, annotations_directory_map_item_,
+ &annotations_directory_item_offsets_,
+ &annotations_directory_item_field_annotation_offsets_,
+ &annotations_directory_item_method_annotation_offsets_,
+ &annotations_directory_item_parameter_annotation_offsets_))) {
+ return false;
+ }
+ CodeItemParser code_item_parser(image_);
+ if (!code_item_parser.Init(code_map_item_))
+ return false;
+ code_item_offsets_.resize(code_map_item_.size);
+ for (size_t i = 0; i < code_map_item_.size; ++i) {
+ const offset_t code_item_offset = code_item_parser.GetNext();
+ if (code_item_offset == kInvalidOffset)
+ return false;
+ code_item_offsets_[i] = code_item_offset;
+ }
+ // DEX files are required to have parsable code items.
+ return !code_item_offsets_.empty();
+}
+
+} // namespace zucchini
diff --git a/disassembler_dex.h b/disassembler_dex.h
new file mode 100644
index 0000000..2038a3c
--- /dev/null
+++ b/disassembler_dex.h
@@ -0,0 +1,273 @@
+// Copyright 2018 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_DISASSEMBLER_DEX_H_
+#define COMPONENTS_ZUCCHINI_DISASSEMBLER_DEX_H_
+
+#include <stdint.h>
+
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "components/zucchini/disassembler.h"
+#include "components/zucchini/image_utils.h"
+#include "components/zucchini/type_dex.h"
+
+namespace zucchini {
+
+// For consistency, let "canonical order" of DEX data types be the order defined
+// in https://source.android.com/devices/tech/dalvik/dex-format "Type Codes"
+// section.
+
+class DisassemblerDex : public Disassembler {
+ public:
+ // Pools follow canonical order.
+ enum ReferencePool : uint8_t {
+ kStringId,
+ kTypeId,
+ kProtoId,
+ kFieldId,
+ kMethodId,
+ // kClassDef, // Unused
+ // kCallSiteId, // Unused
+ // kMethodHandle, // Unused
+ kTypeList,
+ kAnnotationSetRefList,
+ kAnnotionSet,
+ kClassData,
+ kCode,
+ kStringData,
+ kAnnotation,
+ kEncodedArray,
+ kAnnotationsDirectory,
+ // kCallSite, // Unused
+ kNumPools
+ };
+
+ // Types are grouped and ordered by target ReferencePool. This is required by
+ // Zucchini-apply, which visits references by type order and sequentially
+ // handles pools in the same order. Type-pool association is established in
+ // MakeReferenceGroups(), and verified by a unit test.
+ enum ReferenceType : uint8_t {
+ kTypeIdToDescriptorStringId, // kStringId
+ kProtoIdToShortyStringId,
+ kFieldIdToNameStringId,
+ kMethodIdToNameStringId,
+ kClassDefToSourceFileStringId,
+ kCodeToStringId16,
+ kCodeToStringId32,
+
+ kProtoIdToReturnTypeId, // kTypeId
+ kFieldIdToClassTypeId,
+ kFieldIdToTypeId,
+ kMethodIdToClassTypeId,
+ kClassDefToClassTypeId,
+ kClassDefToSuperClassTypeId,
+ kTypeListToTypeId,
+ kCodeToTypeId,
+
+ kMethodIdToProtoId, // kProtoId
+
+ kCodeToFieldId, // kFieldId
+ kAnnotationsDirectoryToFieldId,
+
+ kCodeToMethodId, // kMethodId
+ kAnnotationsDirectoryToMethodId,
+ kAnnotationsDirectoryToParameterMethodId,
+
+ kProtoIdToParametersTypeList, // kTypeList
+ kClassDefToInterfacesTypeList,
+
+ kAnnotationsDirectoryToParameterAnnotationSetRef, // kAnnotationSetRef,
+
+ kAnnotationSetRefListToAnnotationSet, // kAnnotationSet,
+ kAnnotationsDirectoryToClassAnnotationSet,
+ kAnnotationsDirectoryToFieldAnnotationSet,
+ kAnnotationsDirectoryToMethodAnnotationSet,
+
+ kClassDefToClassData, // kClassData
+
+ kCodeToRelCode8, // kCode
+ kCodeToRelCode16,
+ kCodeToRelCode32,
+
+ kStringIdToStringData, // kStringData
+
+ kAnnotationSetToAnnotation, // kAnnotation
+
+ kClassDefToStaticValuesEncodedArray, // kEncodedArrayItem
+
+ kClassDefToAnnotationDirectory, // kAnnotationsDirectory
+
+ // Intentionally ignored references (never appeared in test corpus).
+ // kMethodHandleToFieldId,
+ // kMethodHandleToMethodId,
+ // kCallSiteIdToCallSite,
+
+ kNumTypes
+ };
+
+ DisassemblerDex();
+ DisassemblerDex(const DisassemblerDex&) = delete;
+ const DisassemblerDex& operator=(const DisassemblerDex&) = delete;
+ ~DisassemblerDex() override;
+
+ // Applies quick checks to determine if |image| *may* point to the start of an
+ // executable. Returns true on success.
+ static bool QuickDetect(ConstBufferView image);
+
+ // Disassembler:
+ ExecutableType GetExeType() const override;
+ std::string GetExeTypeString() const override;
+ std::vector<ReferenceGroup> MakeReferenceGroups() const override;
+
+ // Functions that return reference readers. These follow canonical order of
+ // *locations* (unlike targets for ReferenceType). This allows functions with
+ // similar parsing logic to appear togeter.
+ std::unique_ptr<ReferenceReader> MakeReadStringIdToStringData(offset_t lo,
+ offset_t hi);
+ std::unique_ptr<ReferenceReader> MakeReadTypeIdToDescriptorStringId32(
+ offset_t lo,
+ offset_t hi);
+ std::unique_ptr<ReferenceReader> MakeReadProtoIdToShortyStringId32(
+ offset_t lo,
+ offset_t hi);
+ std::unique_ptr<ReferenceReader> MakeReadProtoIdToReturnTypeId32(offset_t lo,
+ offset_t hi);
+ std::unique_ptr<ReferenceReader> MakeReadProtoIdToParametersTypeList(
+ offset_t lo,
+ offset_t hi);
+ std::unique_ptr<ReferenceReader> MakeReadFieldToClassTypeId16(offset_t lo,
+ offset_t hi);
+ std::unique_ptr<ReferenceReader> MakeReadFieldToTypeId16(offset_t lo,
+ offset_t hi);
+ std::unique_ptr<ReferenceReader> MakeReadFieldToNameStringId32(offset_t lo,
+ offset_t hi);
+ std::unique_ptr<ReferenceReader> MakeReadMethodIdToClassTypeId16(offset_t lo,
+ offset_t hi);
+ std::unique_ptr<ReferenceReader> MakeReadMethodIdToProtoId16(offset_t lo,
+ offset_t hi);
+ std::unique_ptr<ReferenceReader> MakeReadMethodIdToNameStringId32(
+ offset_t lo,
+ offset_t hi);
+ std::unique_ptr<ReferenceReader> MakeReadClassDefToClassTypeId32(offset_t lo,
+ offset_t hi);
+ std::unique_ptr<ReferenceReader> MakeReadClassDefToSuperClassTypeId32(
+ offset_t lo,
+ offset_t hi);
+ std::unique_ptr<ReferenceReader> MakeReadClassDefToInterfacesTypeList(
+ offset_t lo,
+ offset_t hi);
+ std::unique_ptr<ReferenceReader> MakeReadClassDefToSourceFileStringId32(
+ offset_t lo,
+ offset_t hi);
+ std::unique_ptr<ReferenceReader> MakeReadClassDefToAnnotationDirectory(
+ offset_t lo,
+ offset_t hi);
+ std::unique_ptr<ReferenceReader> MakeReadClassDefToClassData(offset_t lo,
+ offset_t hi);
+ std::unique_ptr<ReferenceReader> MakeReadClassDefToStaticValuesEncodedArray(
+ offset_t lo,
+ offset_t hi);
+ std::unique_ptr<ReferenceReader> MakeReadTypeListToTypeId16(offset_t lo,
+ offset_t hi);
+ std::unique_ptr<ReferenceReader> MakeReadAnnotationSetToAnnotation(
+ offset_t lo,
+ offset_t hi);
+ std::unique_ptr<ReferenceReader> MakeReadAnnotationSetRefListToAnnotationSet(
+ offset_t lo,
+ offset_t hi);
+ std::unique_ptr<ReferenceReader>
+ MakeReadAnnotationsDirectoryToClassAnnotationSet(offset_t lo, offset_t hi);
+ std::unique_ptr<ReferenceReader> MakeReadAnnotationsDirectoryToFieldId32(
+ offset_t lo,
+ offset_t hi);
+ std::unique_ptr<ReferenceReader>
+ MakeReadAnnotationsDirectoryToFieldAnnotationSet(offset_t lo, offset_t hi);
+ std::unique_ptr<ReferenceReader> MakeReadAnnotationsDirectoryToMethodId32(
+ offset_t lo,
+ offset_t hi);
+ std::unique_ptr<ReferenceReader>
+ MakeReadAnnotationsDirectoryToMethodAnnotationSet(offset_t lo, offset_t hi);
+ std::unique_ptr<ReferenceReader>
+ MakeReadAnnotationsDirectoryToParameterMethodId32(offset_t lo, offset_t hi);
+ std::unique_ptr<ReferenceReader>
+ MakeReadAnnotationsDirectoryToParameterAnnotationSetRef(offset_t lo,
+ offset_t hi);
+ std::unique_ptr<ReferenceReader> MakeReadCodeToStringId16(offset_t lo,
+ offset_t hi);
+ std::unique_ptr<ReferenceReader> MakeReadCodeToStringId32(offset_t lo,
+ offset_t hi);
+ std::unique_ptr<ReferenceReader> MakeReadCodeToTypeId16(offset_t lo,
+ offset_t hi);
+ std::unique_ptr<ReferenceReader> MakeReadCodeToFieldId16(offset_t lo,
+ offset_t hi);
+ std::unique_ptr<ReferenceReader> MakeReadCodeToMethodId16(offset_t lo,
+ offset_t hi);
+ std::unique_ptr<ReferenceReader> MakeReadCodeToRelCode8(offset_t lo,
+ offset_t hi);
+ std::unique_ptr<ReferenceReader> MakeReadCodeToRelCode16(offset_t lo,
+ offset_t hi);
+ std::unique_ptr<ReferenceReader> MakeReadCodeToRelCode32(offset_t lo,
+ offset_t hi);
+
+ // Functions that return reference writers. Different readers may share a
+ // common writer. Therefore these loosely follow canonical order of locations,
+ std::unique_ptr<ReferenceWriter> MakeWriteStringId16(MutableBufferView image);
+ std::unique_ptr<ReferenceWriter> MakeWriteStringId32(MutableBufferView image);
+ std::unique_ptr<ReferenceWriter> MakeWriteTypeId16(MutableBufferView image);
+ std::unique_ptr<ReferenceWriter> MakeWriteTypeId32(MutableBufferView image);
+ std::unique_ptr<ReferenceWriter> MakeWriteProtoId16(MutableBufferView image);
+ std::unique_ptr<ReferenceWriter> MakeWriteFieldId16(MutableBufferView image);
+ std::unique_ptr<ReferenceWriter> MakeWriteFieldId32(MutableBufferView image);
+ std::unique_ptr<ReferenceWriter> MakeWriteMethodId16(MutableBufferView image);
+ std::unique_ptr<ReferenceWriter> MakeWriteMethodId32(MutableBufferView image);
+ std::unique_ptr<ReferenceWriter> MakeWriteRelCode8(MutableBufferView image);
+ std::unique_ptr<ReferenceWriter> MakeWriteRelCode16(MutableBufferView image);
+ std::unique_ptr<ReferenceWriter> MakeWriteRelCode32(MutableBufferView image);
+ std::unique_ptr<ReferenceWriter> MakeWriteAbs32(MutableBufferView image);
+
+ private:
+ friend Disassembler;
+ using MapItemMap = std::map<uint16_t, const dex::MapItem*>;
+
+ // Disassembler:
+ bool Parse(ConstBufferView image) override;
+
+ bool ParseHeader();
+
+ const dex::HeaderItem* header_ = nullptr;
+ int dex_version_ = 0;
+ MapItemMap map_item_map_ = {};
+ dex::MapItem string_map_item_ = {};
+ dex::MapItem type_map_item_ = {};
+ dex::MapItem proto_map_item_ = {};
+ dex::MapItem field_map_item_ = {};
+ dex::MapItem method_map_item_ = {};
+ dex::MapItem class_def_map_item_ = {};
+ dex::MapItem type_list_map_item_ = {};
+ dex::MapItem code_map_item_ = {};
+
+ // Optionally supported (not all DEX files have these).
+ dex::MapItem annotation_set_ref_list_map_item_ = {};
+ dex::MapItem annotation_set_map_item_ = {};
+ dex::MapItem annotations_directory_map_item_ = {};
+
+ // Sorted list of offsets of parsed items in |image_|.
+ std::vector<offset_t> code_item_offsets_;
+ std::vector<offset_t> type_list_offsets_;
+ std::vector<offset_t> annotation_set_ref_list_offsets_;
+ std::vector<offset_t> annotation_set_offsets_;
+ std::vector<offset_t> annotations_directory_item_offsets_;
+ std::vector<offset_t> annotations_directory_item_field_annotation_offsets_;
+ std::vector<offset_t> annotations_directory_item_method_annotation_offsets_;
+ std::vector<offset_t>
+ annotations_directory_item_parameter_annotation_offsets_;
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_DISASSEMBLER_DEX_H_
diff --git a/disassembler_dex_unittest.cc b/disassembler_dex_unittest.cc
new file mode 100644
index 0000000..04fe6eb
--- /dev/null
+++ b/disassembler_dex_unittest.cc
@@ -0,0 +1,51 @@
+// Copyright 2018 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/disassembler_dex.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <set>
+#include <vector>
+
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+namespace {
+
+template <typename T>
+size_t CountDistinct(const std::vector<T>& v) {
+ return std::set<T>(v.begin(), v.end()).size();
+}
+
+} // namespace
+
+// Ensures that ReferenceGroups from DisassemblerDex::MakeReferenceGroups()
+// cover each non-sentinel element in ReferenceType in order, exactly once. Also
+// ensures that the ReferenceType elements are grouped by ReferencePool, and
+// listed in increasing order.
+TEST(DisassemblerDexTest, ReferenceGroups) {
+ std::vector<uint32_t> pool_list;
+ std::vector<uint32_t> type_list;
+ DisassemblerDex dis;
+ for (ReferenceGroup group : dis.MakeReferenceGroups()) {
+ pool_list.push_back(static_cast<uint32_t>(group.pool_tag().value()));
+ type_list.push_back(static_cast<uint32_t>(group.type_tag().value()));
+ }
+
+ // Check ReferenceByte coverage.
+ constexpr size_t kNumTypes = DisassemblerDex::kNumTypes;
+ EXPECT_EQ(kNumTypes, type_list.size());
+ EXPECT_EQ(kNumTypes, CountDistinct(type_list));
+ EXPECT_TRUE(std::is_sorted(type_list.begin(), type_list.end()));
+
+ // Check that ReferenceType elements are grouped by ReferencePool. Note that
+ // repeats can occur, and pools can be skipped.
+ EXPECT_TRUE(std::is_sorted(pool_list.begin(), pool_list.end()));
+}
+
+} // namespace zucchini
diff --git a/disassembler_elf.cc b/disassembler_elf.cc
new file mode 100644
index 0000000..94dc12a
--- /dev/null
+++ b/disassembler_elf.cc
@@ -0,0 +1,855 @@
+// Copyright 2018 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/disassembler_elf.h"
+
+#include <stddef.h>
+
+#include <utility>
+
+#include "base/logging.h"
+#include "base/numerics/checked_math.h"
+#include "base/numerics/safe_conversions.h"
+#include "components/zucchini/abs32_utils.h"
+#include "components/zucchini/algorithm.h"
+#include "components/zucchini/arm_utils.h"
+#include "components/zucchini/buffer_source.h"
+
+namespace zucchini {
+
+namespace {
+
+constexpr uint64_t kElfImageBase = 0;
+constexpr size_t kSizeBound = 0x7FFF0000;
+
+// Threshold value for heuristics to detect THUMB2 code.
+constexpr double kAArch32BitCondAlwaysDensityThreshold = 0.4;
+
+// Bit fields for JudgeSection() return value.
+enum SectionJudgement : int {
+ // Bit: Section does not invalidate ELF, but may or may not be useful.
+ SECTION_BIT_SAFE = 1 << 0,
+ // Bit: Section useful for AddressTranslator, to map between offsets and RVAs.
+ SECTION_BIT_USEFUL_FOR_ADDRESS_TRANSLATOR = 1 << 1,
+ // Bit: Section useful for |offset_bound|, to estimate ELF size.
+ SECTION_BIT_USEFUL_FOR_OFFSET_BOUND = 1 << 2,
+ // Bit: Section potentially useful for pointer extraction.
+ SECTION_BIT_MAYBE_USEFUL_FOR_POINTERS = 1 << 3,
+
+ // The following are verdicts from combining bits, to improve semantics.
+ // Default value: A section is malformed and invalidates ELF.
+ SECTION_IS_MALFORMED = 0,
+ // Section does not invalidate ELF, but is also not used for anything.
+ SECTION_IS_USELESS = SECTION_BIT_SAFE,
+};
+
+// Decides how a section affects ELF parsing, and returns a bit field composed
+// from SectionJudgement values.
+template <class TRAITS>
+int JudgeSection(size_t image_size, const typename TRAITS::Elf_Shdr* section) {
+ // BufferRegion uses |size_t| this can be 32-bit in some cases. For Elf64
+ // |sh_addr|, |sh_offset| and |sh_size| are 64-bit this can result in
+ // overflows in the subsequent validation steps.
+ if (!base::IsValueInRangeForNumericType<size_t>(section->sh_addr) ||
+ !base::IsValueInRangeForNumericType<size_t>(section->sh_offset) ||
+ !base::IsValueInRangeForNumericType<size_t>(section->sh_size)) {
+ return SECTION_IS_MALFORMED;
+ }
+
+ // Examine RVA range: Reject if numerical overflow may happen.
+ if (!BufferRegion{static_cast<size_t>(section->sh_addr),
+ static_cast<size_t>(section->sh_size)}
+ .FitsIn(kSizeBound))
+ return SECTION_IS_MALFORMED;
+
+ // Examine offset range: If section takes up |image| data then be stricter.
+ size_t offset_bound =
+ (section->sh_type == elf::SHT_NOBITS) ? kSizeBound : image_size;
+ if (!BufferRegion{static_cast<size_t>(section->sh_offset),
+ static_cast<size_t>(section->sh_size)}
+ .FitsIn(offset_bound))
+ return SECTION_IS_MALFORMED;
+
+ // Empty sections don't contribute to offset-RVA mapping. For consistency, it
+ // should also not affect |offset_bounds|.
+ if (section->sh_size == 0)
+ return SECTION_IS_USELESS;
+
+ // Sections with |sh_addr == 0| are ignored because these tend to duplicates
+ // (can cause problems for lookup) and uninteresting. For consistency, it
+ // should also not affect |offset_bounds|.
+ if (section->sh_addr == 0)
+ return SECTION_IS_USELESS;
+
+ if (section->sh_type == elf::SHT_NOBITS) {
+ // Special case for .tbss sections: These should be ignored because they may
+ // have offset-RVA map that don't match other sections.
+ if (section->sh_flags & elf::SHF_TLS)
+ return SECTION_IS_USELESS;
+
+ // Section is useful for offset-RVA translation, but does not affect
+ // |offset_bounds| since it can have large virtual size (e.g., .bss).
+ return SECTION_BIT_SAFE | SECTION_BIT_USEFUL_FOR_ADDRESS_TRANSLATOR;
+ }
+
+ return SECTION_BIT_SAFE | SECTION_BIT_USEFUL_FOR_ADDRESS_TRANSLATOR |
+ SECTION_BIT_USEFUL_FOR_OFFSET_BOUND |
+ SECTION_BIT_MAYBE_USEFUL_FOR_POINTERS;
+}
+
+// Determines whether |section| is a reloc section.
+template <class TRAITS>
+bool IsRelocSection(const typename TRAITS::Elf_Shdr& section) {
+ DCHECK_GT(section.sh_size, 0U);
+ if (section.sh_type == elf::SHT_REL) {
+ // Also validate |section.sh_entsize|, which gets used later.
+ return section.sh_entsize == sizeof(typename TRAITS::Elf_Rel);
+ }
+ if (section.sh_type == elf::SHT_RELA)
+ return section.sh_entsize == sizeof(typename TRAITS::Elf_Rela);
+ return false;
+}
+
+// Determines whether |section| is a section with executable code.
+template <class TRAITS>
+bool IsExecSection(const typename TRAITS::Elf_Shdr& section) {
+ DCHECK_GT(section.sh_size, 0U);
+ return section.sh_type == elf::SHT_PROGBITS &&
+ (section.sh_flags & elf::SHF_EXECINSTR) != 0;
+}
+
+} // namespace
+
+/******** Elf32Traits ********/
+
+// static
+constexpr Bitness Elf32Traits::kBitness;
+constexpr elf::FileClass Elf32Traits::kIdentificationClass;
+
+/******** Elf32IntelTraits ********/
+
+// static
+constexpr ExecutableType Elf32IntelTraits::kExeType;
+const char Elf32IntelTraits::kExeTypeString[] = "ELF x86";
+constexpr elf::MachineArchitecture Elf32IntelTraits::kMachineValue;
+constexpr uint32_t Elf32IntelTraits::kRelType;
+
+/******** ElfAArch32Traits ********/
+
+// static
+constexpr ExecutableType ElfAArch32Traits::kExeType;
+const char ElfAArch32Traits::kExeTypeString[] = "ELF ARM";
+constexpr elf::MachineArchitecture ElfAArch32Traits::kMachineValue;
+constexpr uint32_t ElfAArch32Traits::kRelType;
+
+/******** Elf64Traits ********/
+
+// static
+constexpr Bitness Elf64Traits::kBitness;
+constexpr elf::FileClass Elf64Traits::kIdentificationClass;
+
+/******** Elf64IntelTraits ********/
+
+// static
+constexpr ExecutableType Elf64IntelTraits::kExeType;
+const char Elf64IntelTraits::kExeTypeString[] = "ELF x64";
+constexpr elf::MachineArchitecture Elf64IntelTraits::kMachineValue;
+constexpr uint32_t Elf64IntelTraits::kRelType;
+
+/******** ElfAArch64Traits ********/
+
+// static
+constexpr ExecutableType ElfAArch64Traits::kExeType;
+const char ElfAArch64Traits::kExeTypeString[] = "ELF ARM64";
+constexpr elf::MachineArchitecture ElfAArch64Traits::kMachineValue;
+constexpr uint32_t ElfAArch64Traits::kRelType;
+
+/******** DisassemblerElf ********/
+
+// static.
+template <class TRAITS>
+bool DisassemblerElf<TRAITS>::QuickDetect(ConstBufferView image) {
+ BufferSource source(image);
+
+ // Do not consume the bytes for the magic value, as they are part of the
+ // header.
+ if (!source.CheckNextBytes({0x7F, 'E', 'L', 'F'}))
+ return false;
+
+ auto* header = source.GetPointer<typename Traits::Elf_Ehdr>();
+ if (!header)
+ return false;
+
+ if (header->e_ident[elf::EI_CLASS] != Traits::kIdentificationClass)
+ return false;
+
+ if (header->e_ident[elf::EI_DATA] != 1) // Only ELFDATA2LSB is supported.
+ return false;
+
+ if (header->e_type != elf::ET_EXEC && header->e_type != elf::ET_DYN)
+ return false;
+
+ if (header->e_version != 1 || header->e_ident[elf::EI_VERSION] != 1)
+ return false;
+
+ if (header->e_machine != supported_architecture())
+ return false;
+
+ if (header->e_shentsize != sizeof(typename Traits::Elf_Shdr))
+ return false;
+
+ return true;
+}
+
+template <class TRAITS>
+DisassemblerElf<TRAITS>::~DisassemblerElf() = default;
+
+template <class TRAITS>
+ExecutableType DisassemblerElf<TRAITS>::GetExeType() const {
+ return Traits::kExeType;
+}
+
+template <class TRAITS>
+std::string DisassemblerElf<TRAITS>::GetExeTypeString() const {
+ return Traits::kExeTypeString;
+}
+
+// |num_equivalence_iterations_| = 2 for reloc -> abs32.
+template <class TRAITS>
+DisassemblerElf<TRAITS>::DisassemblerElf() : Disassembler(2) {}
+
+template <class TRAITS>
+bool DisassemblerElf<TRAITS>::Parse(ConstBufferView image) {
+ image_ = image;
+ if (!ParseHeader())
+ return false;
+ ParseSections();
+ return true;
+}
+
+template <class TRAITS>
+std::unique_ptr<ReferenceReader> DisassemblerElf<TRAITS>::MakeReadRelocs(
+ offset_t lo,
+ offset_t hi) {
+ DCHECK_LE(lo, hi);
+ DCHECK_LE(hi, image_.size());
+
+ if (reloc_section_dims_.empty())
+ return std::make_unique<EmptyReferenceReader>();
+
+ return std::make_unique<RelocReaderElf>(
+ image_, Traits::kBitness, reloc_section_dims_,
+ supported_relocation_type(), lo, hi, translator_);
+}
+
+template <class TRAITS>
+std::unique_ptr<ReferenceWriter> DisassemblerElf<TRAITS>::MakeWriteRelocs(
+ MutableBufferView image) {
+ return std::make_unique<RelocWriterElf>(image, Traits::kBitness, translator_);
+}
+
+template <class TRAITS>
+bool DisassemblerElf<TRAITS>::ParseHeader() {
+ BufferSource source(image_);
+ // Ensure any offsets will fit within the |image_|'s bounds.
+ if (!base::IsValueInRangeForNumericType<offset_t>(image_.size()))
+ return false;
+
+ // Ensures |header_| is valid later on.
+ if (!QuickDetect(image_))
+ return false;
+
+ header_ = source.GetPointer<typename Traits::Elf_Ehdr>();
+
+ sections_count_ = header_->e_shnum;
+ source = std::move(BufferSource(image_).Skip(header_->e_shoff));
+ sections_ = source.GetArray<typename Traits::Elf_Shdr>(sections_count_);
+ if (!sections_)
+ return false;
+ offset_t section_table_end =
+ base::checked_cast<offset_t>(source.begin() - image_.begin());
+
+ segments_count_ = header_->e_phnum;
+ source = std::move(BufferSource(image_).Skip(header_->e_phoff));
+ segments_ = source.GetArray<typename Traits::Elf_Phdr>(segments_count_);
+ if (!segments_)
+ return false;
+ offset_t segment_table_end =
+ base::checked_cast<offset_t>(source.begin() - image_.begin());
+
+ // Check string section -- even though we've stopped using them.
+ elf::Elf32_Half string_section_id = header_->e_shstrndx;
+ if (string_section_id >= sections_count_)
+ return false;
+ size_t section_names_size = sections_[string_section_id].sh_size;
+ if (section_names_size > 0) {
+ // If nonempty, then last byte of string section must be null.
+ const char* section_names = nullptr;
+ source = std::move(
+ BufferSource(image_).Skip(sections_[string_section_id].sh_offset));
+ section_names = source.GetArray<char>(section_names_size);
+ if (!section_names || section_names[section_names_size - 1] != '\0')
+ return false;
+ }
+
+ // Establish bound on encountered offsets.
+ offset_t offset_bound = std::max(section_table_end, segment_table_end);
+
+ // Visits |segments_| to get estimate on |offset_bound|.
+ for (const typename Traits::Elf_Phdr* segment = segments_;
+ segment != segments_ + segments_count_; ++segment) {
+ // |image_.covers()| is a sufficient check except when size_t is 32 bit and
+ // parsing ELF64. In such cases a value-in-range check is needed on the
+ // segment. This fixes crbug/1035603.
+ offset_t segment_end;
+ base::CheckedNumeric<offset_t> checked_segment_end = segment->p_offset;
+ checked_segment_end += segment->p_filesz;
+ if (!checked_segment_end.AssignIfValid(&segment_end) ||
+ !image_.covers({static_cast<size_t>(segment->p_offset),
+ static_cast<size_t>(segment->p_filesz)})) {
+ return false;
+ }
+ offset_bound = std::max(offset_bound, segment_end);
+ }
+
+ // Visit and validate each section; add address translation data to |units|.
+ std::vector<AddressTranslator::Unit> units;
+ units.reserve(sections_count_);
+ section_judgements_.reserve(sections_count_);
+
+ for (int i = 0; i < sections_count_; ++i) {
+ const typename Traits::Elf_Shdr* section = &sections_[i];
+ int judgement = JudgeSection<Traits>(image_.size(), section);
+ section_judgements_.push_back(judgement);
+ if ((judgement & SECTION_BIT_SAFE) == 0)
+ return false;
+
+ uint32_t sh_size = base::checked_cast<uint32_t>(section->sh_size);
+ offset_t sh_offset = base::checked_cast<offset_t>(section->sh_offset);
+ rva_t sh_addr = base::checked_cast<rva_t>(section->sh_addr);
+ if ((judgement & SECTION_BIT_USEFUL_FOR_ADDRESS_TRANSLATOR) != 0) {
+ // Store mappings between RVA and offset.
+ units.push_back({sh_offset, sh_size, sh_addr, sh_size});
+ }
+ if ((judgement & SECTION_BIT_USEFUL_FOR_OFFSET_BOUND) != 0) {
+ offset_t section_end = base::checked_cast<offset_t>(sh_offset + sh_size);
+ offset_bound = std::max(offset_bound, section_end);
+ }
+ }
+
+ // Initialize |translator_| for offset-RVA translations. Any inconsistency
+ // (e.g., 2 offsets correspond to the same RVA) would invalidate the ELF file.
+ if (translator_.Initialize(std::move(units)) != AddressTranslator::kSuccess)
+ return false;
+
+ DCHECK_LE(offset_bound, image_.size());
+ image_.shrink(offset_bound);
+ return true;
+}
+
+template <class TRAITS>
+void DisassemblerElf<TRAITS>::ExtractInterestingSectionHeaders() {
+ DCHECK(reloc_section_dims_.empty());
+ DCHECK(exec_headers_.empty());
+ for (elf::Elf32_Half i = 0; i < sections_count_; ++i) {
+ const typename Traits::Elf_Shdr* section = sections_ + i;
+ if ((section_judgements_[i] & SECTION_BIT_MAYBE_USEFUL_FOR_POINTERS) != 0) {
+ if (IsRelocSection<Traits>(*section))
+ reloc_section_dims_.emplace_back(*section);
+ else if (IsExecSection<Traits>(*section))
+ exec_headers_.push_back(section);
+ }
+ }
+ auto comp = [](const typename Traits::Elf_Shdr* a,
+ const typename Traits::Elf_Shdr* b) {
+ return a->sh_offset < b->sh_offset;
+ };
+ std::sort(reloc_section_dims_.begin(), reloc_section_dims_.end());
+ std::sort(exec_headers_.begin(), exec_headers_.end(), comp);
+}
+
+template <class TRAITS>
+void DisassemblerElf<TRAITS>::GetAbs32FromRelocSections() {
+ constexpr int kAbs32Width = Traits::kVAWidth;
+ DCHECK(abs32_locations_.empty());
+
+ // Read reloc targets to get preliminary abs32 locations.
+ std::unique_ptr<ReferenceReader> relocs = MakeReadRelocs(0, offset_t(size()));
+ for (auto ref = relocs->GetNext(); ref.has_value(); ref = relocs->GetNext())
+ abs32_locations_.push_back(ref->target);
+
+ std::sort(abs32_locations_.begin(), abs32_locations_.end());
+
+ // Abs32 references must have targets translatable to offsets. Remove those
+ // that are unable to do so.
+ size_t num_untranslatable =
+ RemoveUntranslatableAbs32(image_, {Traits::kBitness, kElfImageBase},
+ translator_, &abs32_locations_);
+ LOG_IF(WARNING, num_untranslatable) << "Removed " << num_untranslatable
+ << " untranslatable abs32 references.";
+
+ // Abs32 reference bodies must not overlap. If found, simply remove them.
+ size_t num_overlapping =
+ RemoveOverlappingAbs32Locations(kAbs32Width, &abs32_locations_);
+ LOG_IF(WARNING, num_overlapping)
+ << "Removed " << num_overlapping
+ << " abs32 references with overlapping bodies.";
+
+ abs32_locations_.shrink_to_fit();
+}
+
+template <class TRAITS>
+void DisassemblerElf<TRAITS>::GetRel32FromCodeSections() {
+ for (const typename Traits::Elf_Shdr* section : exec_headers_)
+ ParseExecSection(*section);
+ PostProcessRel32();
+}
+
+template <class TRAITS>
+void DisassemblerElf<TRAITS>::ParseSections() {
+ ExtractInterestingSectionHeaders();
+ GetAbs32FromRelocSections();
+ GetRel32FromCodeSections();
+}
+
+/******** DisassemblerElfIntel ********/
+
+template <class TRAITS>
+DisassemblerElfIntel<TRAITS>::DisassemblerElfIntel() = default;
+
+template <class TRAITS>
+DisassemblerElfIntel<TRAITS>::~DisassemblerElfIntel() = default;
+
+template <class TRAITS>
+std::vector<ReferenceGroup> DisassemblerElfIntel<TRAITS>::MakeReferenceGroups()
+ const {
+ return {
+ {ReferenceTypeTraits{sizeof(TRAITS::Elf_Rel::r_offset), TypeTag(kReloc),
+ PoolTag(kReloc)},
+ &DisassemblerElfIntel<TRAITS>::MakeReadRelocs,
+ &DisassemblerElfIntel<TRAITS>::MakeWriteRelocs},
+ {ReferenceTypeTraits{Traits::kVAWidth, TypeTag(kAbs32), PoolTag(kAbs32)},
+ &DisassemblerElfIntel<TRAITS>::MakeReadAbs32,
+ &DisassemblerElfIntel<TRAITS>::MakeWriteAbs32},
+ // N.B.: Rel32 |width| is 4 bytes, even for x64.
+ {ReferenceTypeTraits{4, TypeTag(kRel32), PoolTag(kRel32)},
+ &DisassemblerElfIntel<TRAITS>::MakeReadRel32,
+ &DisassemblerElfIntel<TRAITS>::MakeWriteRel32}};
+}
+
+template <class TRAITS>
+void DisassemblerElfIntel<TRAITS>::ParseExecSection(
+ const typename TRAITS::Elf_Shdr& section) {
+ constexpr int kAbs32Width = Traits::kVAWidth;
+
+ // |this->| is needed to access protected members of templated base class. To
+ // reduce noise, use local references for these.
+ ConstBufferView& image_ = this->image_;
+ const AddressTranslator& translator_ = this->translator_;
+ auto& abs32_locations_ = this->abs32_locations_;
+
+ // Range of values was ensured in ParseHeader().
+ rva_t start_rva = base::checked_cast<rva_t>(section.sh_addr);
+ rva_t end_rva = base::checked_cast<rva_t>(start_rva + section.sh_size);
+
+ AddressTranslator::RvaToOffsetCache target_rva_checker(translator_);
+
+ ConstBufferView region(image_.begin() + section.sh_offset, section.sh_size);
+ Abs32GapFinder gap_finder(image_, region, abs32_locations_, kAbs32Width);
+ typename TRAITS::Rel32FinderUse rel_finder(image_, translator_);
+ // Iterate over gaps between abs32 references, to avoid collision.
+ while (gap_finder.FindNext()) {
+ rel_finder.SetRegion(gap_finder.GetGap());
+ while (rel_finder.FindNext()) {
+ auto rel32 = rel_finder.GetRel32();
+ if (target_rva_checker.IsValid(rel32.target_rva) &&
+ (rel32.can_point_outside_section ||
+ (start_rva <= rel32.target_rva && rel32.target_rva < end_rva))) {
+ rel_finder.Accept();
+ rel32_locations_.push_back(rel32.location);
+ }
+ }
+ }
+}
+
+template <class TRAITS>
+void DisassemblerElfIntel<TRAITS>::PostProcessRel32() {
+ rel32_locations_.shrink_to_fit();
+ std::sort(rel32_locations_.begin(), rel32_locations_.end());
+}
+
+template <class TRAITS>
+std::unique_ptr<ReferenceReader> DisassemblerElfIntel<TRAITS>::MakeReadAbs32(
+ offset_t lo,
+ offset_t hi) {
+ // TODO(huangs): Don't use Abs32RvaExtractorWin32 here; use new class that
+ // caters to different ELF architectures.
+ Abs32RvaExtractorWin32 abs_rva_extractor(
+ this->image_, AbsoluteAddress(TRAITS::kBitness, kElfImageBase),
+ this->abs32_locations_, lo, hi);
+ return std::make_unique<Abs32ReaderWin32>(std::move(abs_rva_extractor),
+ this->translator_);
+}
+
+template <class TRAITS>
+std::unique_ptr<ReferenceWriter> DisassemblerElfIntel<TRAITS>::MakeWriteAbs32(
+ MutableBufferView image) {
+ return std::make_unique<Abs32WriterWin32>(
+ image, AbsoluteAddress(TRAITS::kBitness, kElfImageBase),
+ this->translator_);
+}
+
+template <class TRAITS>
+std::unique_ptr<ReferenceReader> DisassemblerElfIntel<TRAITS>::MakeReadRel32(
+ offset_t lo,
+ offset_t hi) {
+ return std::make_unique<Rel32ReaderX86>(this->image_, lo, hi,
+ &rel32_locations_, this->translator_);
+}
+
+template <class TRAITS>
+std::unique_ptr<ReferenceWriter> DisassemblerElfIntel<TRAITS>::MakeWriteRel32(
+ MutableBufferView image) {
+ return std::make_unique<Rel32WriterX86>(image, this->translator_);
+}
+
+// Explicit instantiation for supported classes.
+template class DisassemblerElfIntel<Elf32IntelTraits>;
+template class DisassemblerElfIntel<Elf64IntelTraits>;
+template bool DisassemblerElf<Elf32IntelTraits>::QuickDetect(
+ ConstBufferView image);
+template bool DisassemblerElf<Elf64IntelTraits>::QuickDetect(
+ ConstBufferView image);
+
+/******** DisassemblerElfArm ********/
+
+template <class Traits>
+DisassemblerElfArm<Traits>::DisassemblerElfArm() = default;
+
+template <class Traits>
+DisassemblerElfArm<Traits>::~DisassemblerElfArm() = default;
+
+template <class Traits>
+bool DisassemblerElfArm<Traits>::IsTargetOffsetInExecSection(
+ offset_t offset) const {
+ // Executable sections can appear in large numbers in .o files and in
+ // pathological cases. Since this function may be called for each reference
+ // candidate, linear search may be too slow (so use binary search).
+ return IsTargetOffsetInElfSectionList(this->exec_headers_, offset);
+}
+
+template <class Traits>
+void DisassemblerElfArm<Traits>::ParseExecSection(
+ const typename Traits::Elf_Shdr& section) {
+ ConstBufferView& image_ = this->image_;
+ const AddressTranslator& translator_ = this->translator_;
+ auto& abs32_locations_ = this->abs32_locations_;
+
+ ConstBufferView region(image_.begin() + section.sh_offset, section.sh_size);
+ Abs32GapFinder gap_finder(image_, region, abs32_locations_, Traits::kVAWidth);
+ std::unique_ptr<typename Traits::Rel32FinderUse> rel_finder =
+ MakeRel32Finder(section);
+ AddressTranslator::RvaToOffsetCache rva_to_offset(translator_);
+ while (gap_finder.FindNext()) {
+ rel_finder->SetRegion(gap_finder.GetGap());
+ while (rel_finder->FindNext()) {
+ auto rel32 = rel_finder->GetRel32();
+ offset_t target_offset = rva_to_offset.Convert(rel32.target_rva);
+ if (target_offset != kInvalidOffset) {
+ // For robustness, reject illegal offsets, which can arise from, e.g.,
+ // misidentify ARM vs. THUMB2 mode, or even misidentifying data as code!
+ if (IsTargetOffsetInExecSection(target_offset)) {
+ rel_finder->Accept();
+ rel32_locations_table_[rel32.type].push_back(rel32.location);
+ }
+ }
+ }
+ }
+}
+
+template <class Traits>
+void DisassemblerElfArm<Traits>::PostProcessRel32() {
+ for (int type = 0; type < AArch32Rel32Translator::NUM_ADDR_TYPE; ++type) {
+ std::sort(rel32_locations_table_[type].begin(),
+ rel32_locations_table_[type].end());
+ rel32_locations_table_[type].shrink_to_fit();
+ }
+}
+
+template <class Traits>
+std::unique_ptr<ReferenceReader> DisassemblerElfArm<Traits>::MakeReadAbs32(
+ offset_t lo,
+ offset_t hi) {
+ // TODO(huangs): Reconcile the use of Win32-specific classes in ARM code!
+ Abs32RvaExtractorWin32 abs_rva_extractor(this->image_,
+ AbsoluteAddress(Traits::kBitness, 0),
+ this->abs32_locations_, lo, hi);
+ return std::make_unique<Abs32ReaderWin32>(std::move(abs_rva_extractor),
+ this->translator_);
+}
+
+template <class Traits>
+std::unique_ptr<ReferenceWriter> DisassemblerElfArm<Traits>::MakeWriteAbs32(
+ MutableBufferView image) {
+ return std::make_unique<Abs32WriterWin32>(
+ image, AbsoluteAddress(Traits::kBitness, 0), this->translator_);
+}
+
+/******** DisassemblerElfAArch32 ********/
+
+DisassemblerElfAArch32::DisassemblerElfAArch32() = default;
+DisassemblerElfAArch32::~DisassemblerElfAArch32() = default;
+
+std::vector<ReferenceGroup> DisassemblerElfAArch32::MakeReferenceGroups()
+ const {
+ return {
+ {ReferenceTypeTraits{sizeof(Traits::Elf_Rel::r_offset),
+ TypeTag(AArch32ReferenceType::kReloc),
+ PoolTag(ArmReferencePool::kPoolReloc)},
+ &DisassemblerElfAArch32::MakeReadRelocs,
+ &DisassemblerElfAArch32::MakeWriteRelocs},
+ {ReferenceTypeTraits{Traits::kVAWidth,
+ TypeTag(AArch32ReferenceType::kAbs32),
+ PoolTag(ArmReferencePool::kPoolAbs32)},
+ &DisassemblerElfAArch32::MakeReadAbs32,
+ &DisassemblerElfAArch32::MakeWriteAbs32},
+ {ReferenceTypeTraits{4, TypeTag(AArch32ReferenceType::kRel32_A24),
+ PoolTag(ArmReferencePool::kPoolRel32)},
+ &DisassemblerElfAArch32::MakeReadRel32A24,
+ &DisassemblerElfAArch32::MakeWriteRel32A24},
+ {ReferenceTypeTraits{2, TypeTag(AArch32ReferenceType::kRel32_T8),
+ PoolTag(ArmReferencePool::kPoolRel32)},
+ &DisassemblerElfAArch32::MakeReadRel32T8,
+ &DisassemblerElfAArch32::MakeWriteRel32T8},
+ {ReferenceTypeTraits{2, TypeTag(AArch32ReferenceType::kRel32_T11),
+ PoolTag(ArmReferencePool::kPoolRel32)},
+ &DisassemblerElfAArch32::MakeReadRel32T11,
+ &DisassemblerElfAArch32::MakeWriteRel32T11},
+ {ReferenceTypeTraits{4, TypeTag(AArch32ReferenceType::kRel32_T20),
+ PoolTag(ArmReferencePool::kPoolRel32)},
+ &DisassemblerElfAArch32::MakeReadRel32T20,
+ &DisassemblerElfAArch32::MakeWriteRel32T20},
+ {ReferenceTypeTraits{4, TypeTag(AArch32ReferenceType::kRel32_T24),
+ PoolTag(ArmReferencePool::kPoolRel32)},
+ &DisassemblerElfAArch32::MakeReadRel32T24,
+ &DisassemblerElfAArch32::MakeWriteRel32T24},
+ };
+}
+
+std::unique_ptr<DisassemblerElfAArch32::Traits::Rel32FinderUse>
+DisassemblerElfAArch32::MakeRel32Finder(
+ const typename Traits::Elf_Shdr& section) {
+ return std::make_unique<Rel32FinderAArch32>(image_, translator_,
+ IsExecSectionThumb2(section));
+}
+
+bool DisassemblerElfAArch32::IsExecSectionThumb2(
+ const typename Traits::Elf_Shdr& section) const {
+ // ARM mode requires 4-byte alignment.
+ if (section.sh_addr % 4 != 0 || section.sh_size % 4 != 0)
+ return true;
+ const uint8_t* first = image_.begin() + section.sh_offset;
+ const uint8_t* end = first + section.sh_size;
+ // Each instruction in 32-bit ARM (little-endian) looks like
+ // ?? ?? ?? X?,
+ // where X specifies conditional execution. X = 0xE represents AL = "ALways
+ // execute", and tends to appear very often. We use this as our main indicator
+ // to discern 32-bit ARM mode from THUMB2 mode.
+ size_t num = 0;
+ size_t den = 0;
+ for (const uint8_t* cur = first; cur < end; cur += 4) {
+ // |cur[3]| is within bounds because |end - cur| is a multiple of 4.
+ uint8_t maybe_cond = cur[3] & 0xF0;
+ if (maybe_cond == 0xE0)
+ ++num;
+ ++den;
+ }
+
+ if (den > 0) {
+ LOG(INFO) << "Section scan: " << num << " / " << den << " => "
+ << base::StringPrintf("%.2f", num * 100.0 / den) << "%";
+ }
+ return num < den * kAArch32BitCondAlwaysDensityThreshold;
+}
+
+std::unique_ptr<ReferenceReader> DisassemblerElfAArch32::MakeReadRel32A24(
+ offset_t lower,
+ offset_t upper) {
+ return std::make_unique<
+ Rel32ReaderArm<AArch32Rel32Translator::AddrTraits_A24>>(
+ translator_, image_,
+ rel32_locations_table_[AArch32Rel32Translator::ADDR_A24], lower, upper);
+}
+
+std::unique_ptr<ReferenceWriter> DisassemblerElfAArch32::MakeWriteRel32A24(
+ MutableBufferView image) {
+ return std::make_unique<
+ Rel32WriterArm<AArch32Rel32Translator::AddrTraits_A24>>(translator_,
+ image);
+}
+
+std::unique_ptr<ReferenceReader> DisassemblerElfAArch32::MakeReadRel32T8(
+ offset_t lower,
+ offset_t upper) {
+ return std::make_unique<
+ Rel32ReaderArm<AArch32Rel32Translator::AddrTraits_T8>>(
+ translator_, image_,
+ rel32_locations_table_[AArch32Rel32Translator::ADDR_T8], lower, upper);
+}
+
+std::unique_ptr<ReferenceWriter> DisassemblerElfAArch32::MakeWriteRel32T8(
+ MutableBufferView image) {
+ return std::make_unique<
+ Rel32WriterArm<AArch32Rel32Translator::AddrTraits_T8>>(translator_,
+ image);
+}
+
+std::unique_ptr<ReferenceReader> DisassemblerElfAArch32::MakeReadRel32T11(
+ offset_t lower,
+ offset_t upper) {
+ return std::make_unique<
+ Rel32ReaderArm<AArch32Rel32Translator::AddrTraits_T11>>(
+ translator_, image_,
+ rel32_locations_table_[AArch32Rel32Translator::ADDR_T11], lower, upper);
+}
+
+std::unique_ptr<ReferenceWriter> DisassemblerElfAArch32::MakeWriteRel32T11(
+ MutableBufferView image) {
+ return std::make_unique<
+ Rel32WriterArm<AArch32Rel32Translator::AddrTraits_T11>>(translator_,
+ image);
+}
+
+std::unique_ptr<ReferenceReader> DisassemblerElfAArch32::MakeReadRel32T20(
+ offset_t lower,
+ offset_t upper) {
+ return std::make_unique<
+ Rel32ReaderArm<AArch32Rel32Translator::AddrTraits_T20>>(
+ translator_, image_,
+ rel32_locations_table_[AArch32Rel32Translator::ADDR_T20], lower, upper);
+}
+
+std::unique_ptr<ReferenceWriter> DisassemblerElfAArch32::MakeWriteRel32T20(
+ MutableBufferView image) {
+ return std::make_unique<
+ Rel32WriterArm<AArch32Rel32Translator::AddrTraits_T20>>(translator_,
+ image);
+}
+
+std::unique_ptr<ReferenceReader> DisassemblerElfAArch32::MakeReadRel32T24(
+ offset_t lower,
+ offset_t upper) {
+ return std::make_unique<
+ Rel32ReaderArm<AArch32Rel32Translator::AddrTraits_T24>>(
+ translator_, image_,
+ rel32_locations_table_[AArch32Rel32Translator::ADDR_T24], lower, upper);
+}
+
+std::unique_ptr<ReferenceWriter> DisassemblerElfAArch32::MakeWriteRel32T24(
+ MutableBufferView image) {
+ return std::make_unique<
+ Rel32WriterArm<AArch32Rel32Translator::AddrTraits_T24>>(translator_,
+ image);
+}
+
+/******** DisassemblerElfAArch64 ********/
+
+DisassemblerElfAArch64::DisassemblerElfAArch64() = default;
+
+DisassemblerElfAArch64::~DisassemblerElfAArch64() = default;
+
+std::vector<ReferenceGroup> DisassemblerElfAArch64::MakeReferenceGroups()
+ const {
+ return {
+ {ReferenceTypeTraits{sizeof(Traits::Elf_Rel::r_offset),
+ TypeTag(AArch64ReferenceType::kReloc),
+ PoolTag(ArmReferencePool::kPoolReloc)},
+ &DisassemblerElfAArch64::MakeReadRelocs,
+ &DisassemblerElfAArch64::MakeWriteRelocs},
+ {ReferenceTypeTraits{Traits::kVAWidth,
+ TypeTag(AArch64ReferenceType::kAbs32),
+ PoolTag(ArmReferencePool::kPoolAbs32)},
+ &DisassemblerElfAArch64::MakeReadAbs32,
+ &DisassemblerElfAArch64::MakeWriteAbs32},
+ {ReferenceTypeTraits{4, TypeTag(AArch64ReferenceType::kRel32_Immd14),
+ PoolTag(ArmReferencePool::kPoolRel32)},
+ &DisassemblerElfAArch64::MakeReadRel32Immd14,
+ &DisassemblerElfAArch64::MakeWriteRel32Immd14},
+ {ReferenceTypeTraits{4, TypeTag(AArch64ReferenceType::kRel32_Immd19),
+ PoolTag(ArmReferencePool::kPoolRel32)},
+ &DisassemblerElfAArch64::MakeReadRel32Immd19,
+ &DisassemblerElfAArch64::MakeWriteRel32Immd19},
+ {ReferenceTypeTraits{4, TypeTag(AArch64ReferenceType::kRel32_Immd26),
+ PoolTag(ArmReferencePool::kPoolRel32)},
+ &DisassemblerElfAArch64::MakeReadRel32Immd26,
+ &DisassemblerElfAArch64::MakeWriteRel32Immd26},
+ };
+}
+
+std::unique_ptr<DisassemblerElfAArch64::Traits::Rel32FinderUse>
+DisassemblerElfAArch64::MakeRel32Finder(
+ const typename Traits::Elf_Shdr& section) {
+ return std::make_unique<Rel32FinderAArch64>(image_, translator_);
+}
+
+std::unique_ptr<ReferenceReader> DisassemblerElfAArch64::MakeReadRel32Immd14(
+ offset_t lower,
+ offset_t upper) {
+ return std::make_unique<
+ Rel32ReaderArm<AArch64Rel32Translator::AddrTraits_Immd14>>(
+ translator_, this->image_,
+ rel32_locations_table_[AArch64Rel32Translator::ADDR_IMMD14], lower,
+ upper);
+}
+
+std::unique_ptr<ReferenceWriter> DisassemblerElfAArch64::MakeWriteRel32Immd14(
+ MutableBufferView image) {
+ return std::make_unique<
+ Rel32WriterArm<AArch64Rel32Translator::AddrTraits_Immd14>>(translator_,
+ image);
+}
+
+std::unique_ptr<ReferenceReader> DisassemblerElfAArch64::MakeReadRel32Immd19(
+ offset_t lower,
+ offset_t upper) {
+ return std::make_unique<
+ Rel32ReaderArm<AArch64Rel32Translator::AddrTraits_Immd19>>(
+ translator_, this->image_,
+ rel32_locations_table_[AArch64Rel32Translator::ADDR_IMMD19], lower,
+ upper);
+}
+
+std::unique_ptr<ReferenceWriter> DisassemblerElfAArch64::MakeWriteRel32Immd19(
+ MutableBufferView image) {
+ return std::make_unique<
+ Rel32WriterArm<AArch64Rel32Translator::AddrTraits_Immd19>>(translator_,
+ image);
+}
+
+std::unique_ptr<ReferenceReader> DisassemblerElfAArch64::MakeReadRel32Immd26(
+ offset_t lower,
+ offset_t upper) {
+ return std::make_unique<
+ Rel32ReaderArm<AArch64Rel32Translator::AddrTraits_Immd26>>(
+ translator_, this->image_,
+ rel32_locations_table_[AArch64Rel32Translator::ADDR_IMMD26], lower,
+ upper);
+}
+
+std::unique_ptr<ReferenceWriter> DisassemblerElfAArch64::MakeWriteRel32Immd26(
+ MutableBufferView image) {
+ return std::make_unique<
+ Rel32WriterArm<AArch64Rel32Translator::AddrTraits_Immd26>>(translator_,
+ image);
+}
+
+// Explicit instantiation for supported classes.
+template class DisassemblerElfArm<ElfAArch32Traits>;
+template class DisassemblerElfArm<ElfAArch64Traits>;
+template bool DisassemblerElf<ElfAArch32Traits>::QuickDetect(
+ ConstBufferView image);
+template bool DisassemblerElf<ElfAArch64Traits>::QuickDetect(
+ ConstBufferView image);
+
+} // namespace zucchini
diff --git a/disassembler_elf.h b/disassembler_elf.h
new file mode 100644
index 0000000..0bd11a6
--- /dev/null
+++ b/disassembler_elf.h
@@ -0,0 +1,383 @@
+// Copyright 2018 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_DISASSEMBLER_ELF_H_
+#define COMPONENTS_ZUCCHINI_DISASSEMBLER_ELF_H_
+
+#include <stdint.h>
+
+#include <algorithm>
+#include <deque>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "components/zucchini/address_translator.h"
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/disassembler.h"
+#include "components/zucchini/image_utils.h"
+#include "components/zucchini/rel32_finder.h"
+#include "components/zucchini/rel32_utils.h"
+#include "components/zucchini/reloc_elf.h"
+#include "components/zucchini/type_elf.h"
+
+namespace zucchini {
+
+struct ArmReferencePool {
+ enum : uint8_t {
+ kPoolReloc,
+ kPoolAbs32,
+ kPoolRel32,
+ };
+};
+
+struct AArch32ReferenceType {
+ enum : uint8_t {
+ kReloc, // kPoolReloc
+
+ kAbs32, // kPoolAbs32
+
+ kRel32_A24, // kPoolRel32
+ kRel32_T8,
+ kRel32_T11,
+ kRel32_T20,
+ kRel32_T24,
+
+ kTypeCount
+ };
+};
+
+struct AArch64ReferenceType {
+ enum : uint8_t {
+ kReloc, // kPoolReloc
+
+ kAbs32, // kPoolAbs32
+
+ kRel32_Immd14, // kPoolRel32
+ kRel32_Immd19,
+ kRel32_Immd26,
+
+ kTypeCount
+ };
+};
+
+struct Elf32Traits {
+ static constexpr Bitness kBitness = kBit32;
+ static constexpr elf::FileClass kIdentificationClass = elf::ELFCLASS32;
+ using Elf_Shdr = elf::Elf32_Shdr;
+ using Elf_Phdr = elf::Elf32_Phdr;
+ using Elf_Ehdr = elf::Elf32_Ehdr;
+ using Elf_Rel = elf::Elf32_Rel;
+ using Elf_Rela = elf::Elf32_Rela;
+};
+
+// Architecture-specific definitions.
+
+struct Elf32IntelTraits : public Elf32Traits {
+ static constexpr ExecutableType kExeType = kExeTypeElfX86;
+ static const char kExeTypeString[];
+ static constexpr elf::MachineArchitecture kMachineValue = elf::EM_386;
+ static constexpr uint32_t kRelType = elf::R_386_RELATIVE;
+ enum : uint32_t { kVAWidth = 4 };
+ using Rel32FinderUse = Rel32FinderX86;
+};
+
+struct ElfAArch32Traits : public Elf32Traits {
+ static constexpr ExecutableType kExeType = kExeTypeElfAArch32;
+ static const char kExeTypeString[];
+ static constexpr elf::MachineArchitecture kMachineValue = elf::EM_ARM;
+ static constexpr uint32_t kRelType = elf::R_ARM_RELATIVE;
+ enum : uint32_t { kVAWidth = 4 };
+ using ArmReferenceType = AArch32ReferenceType;
+ using Rel32FinderUse = Rel32FinderAArch32;
+};
+
+struct Elf64Traits {
+ static constexpr Bitness kBitness = kBit64;
+ static constexpr elf::FileClass kIdentificationClass = elf::ELFCLASS64;
+ using Elf_Shdr = elf::Elf64_Shdr;
+ using Elf_Phdr = elf::Elf64_Phdr;
+ using Elf_Ehdr = elf::Elf64_Ehdr;
+ using Elf_Rel = elf::Elf64_Rel;
+ using Elf_Rela = elf::Elf64_Rela;
+};
+
+// Architecture-specific definitions.
+struct Elf64IntelTraits : public Elf64Traits {
+ static constexpr ExecutableType kExeType = kExeTypeElfX64;
+ static const char kExeTypeString[];
+ static constexpr elf::MachineArchitecture kMachineValue = elf::EM_X86_64;
+ static constexpr uint32_t kRelType = elf::R_X86_64_RELATIVE;
+ enum : uint32_t { kVAWidth = 8 };
+ using Rel32FinderUse = Rel32FinderX64;
+};
+
+struct ElfAArch64Traits : public Elf64Traits {
+ static constexpr ExecutableType kExeType = kExeTypeElfAArch64;
+ static const char kExeTypeString[];
+ static constexpr elf::MachineArchitecture kMachineValue = elf::EM_AARCH64;
+ // TODO(huangs): See if R_AARCH64_GLOB_DAT and R_AARCH64_JUMP_SLOT should be
+ // used.
+ static constexpr uint32_t kRelType = elf::R_AARCH64_RELATIVE;
+ enum : uint32_t { kVAWidth = 8 };
+ using ArmReferenceType = AArch64ReferenceType;
+ using Rel32FinderUse = Rel32FinderAArch64;
+};
+
+// Decides whether target |offset| is covered by a section in |sorted_headers|.
+template <class ELF_SHDR>
+bool IsTargetOffsetInElfSectionList(
+ const std::vector<const ELF_SHDR*>& sorted_headers,
+ offset_t offset) {
+ // Use binary search to search in a list of intervals, in a fashion similar to
+ // AddressTranslator::OffsetToUnit().
+ auto comp = [](offset_t offset, const ELF_SHDR* header) -> bool {
+ return offset < header->sh_offset;
+ };
+ auto it = std::upper_bound(sorted_headers.begin(), sorted_headers.end(),
+ offset, comp);
+ if (it == sorted_headers.begin())
+ return false;
+ --it;
+ // Just check offset without worrying about width, since this is a target.
+ // Not using RangeCovers() because |sh_offset| and |sh_size| can be 64-bit.
+ return offset >= (*it)->sh_offset &&
+ offset - (*it)->sh_offset < (*it)->sh_size;
+}
+
+// Disassembler for ELF.
+template <class TRAITS>
+class DisassemblerElf : public Disassembler {
+ public:
+ using Traits = TRAITS;
+ // Applies quick checks to determine whether |image| *may* point to the start
+ // of an executable. Returns true iff the check passes.
+ static bool QuickDetect(ConstBufferView image);
+
+ DisassemblerElf(const DisassemblerElf&) = delete;
+ const DisassemblerElf& operator=(const DisassemblerElf&) = delete;
+ ~DisassemblerElf() override;
+
+ // Disassembler:
+ ExecutableType GetExeType() const override;
+ std::string GetExeTypeString() const override;
+ std::vector<ReferenceGroup> MakeReferenceGroups() const override = 0;
+
+ // Read/Write functions that are common among different architectures.
+ std::unique_ptr<ReferenceReader> MakeReadRelocs(offset_t lo, offset_t hi);
+ std::unique_ptr<ReferenceWriter> MakeWriteRelocs(MutableBufferView image);
+
+ const AddressTranslator& translator() const { return translator_; }
+
+ protected:
+ friend Disassembler;
+
+ DisassemblerElf();
+
+ bool Parse(ConstBufferView image) override;
+
+ // Returns the supported Elf_Ehdr::e_machine enum.
+ static constexpr elf::MachineArchitecture supported_architecture() {
+ return Traits::kMachineValue;
+ }
+
+ // Returns the type to look for in the reloc section.
+ static constexpr uint32_t supported_relocation_type() {
+ return Traits::kRelType;
+ }
+
+ // Performs architecture-specific parsing of an executable section, to extract
+ // rel32 references.
+ virtual void ParseExecSection(const typename Traits::Elf_Shdr& section) = 0;
+
+ // Processes rel32 data after they are extracted from executable sections.
+ virtual void PostProcessRel32() = 0;
+
+ // Parses ELF header and section headers, and performs basic validation.
+ // Returns whether parsing was successful.
+ bool ParseHeader();
+
+ // Extracts and stores section headers that we need.
+ void ExtractInterestingSectionHeaders();
+
+ // Parsing functions that extract references from various sections.
+ void GetAbs32FromRelocSections();
+ void GetRel32FromCodeSections();
+ void ParseSections();
+
+ // Main ELF header.
+ const typename Traits::Elf_Ehdr* header_ = nullptr;
+
+ // Section header table, ordered by section id.
+ elf::Elf32_Half sections_count_ = 0;
+ const typename Traits::Elf_Shdr* sections_ = nullptr;
+
+ // Program header table.
+ elf::Elf32_Half segments_count_ = 0;
+ const typename Traits::Elf_Phdr* segments_ = nullptr;
+
+ // Bit fields to store the role each section may play.
+ std::vector<int> section_judgements_;
+
+ // Translator between offsets and RVAs.
+ AddressTranslator translator_;
+
+ // Identity translator for abs32 translation.
+ AddressTranslator identity_translator_;
+
+ // Extracted relocation section dimensions data, sorted by file offsets.
+ std::vector<SectionDimensionsElf> reloc_section_dims_;
+
+ // Headers of executable sections, sorted by file offsets of the data each
+ // header points to.
+ std::vector<const typename Traits::Elf_Shdr*> exec_headers_;
+
+ // Sorted file offsets of abs32 locations.
+ std::vector<offset_t> abs32_locations_;
+};
+
+// Disassembler for ELF with Intel architectures.
+template <class TRAITS>
+class DisassemblerElfIntel : public DisassemblerElf<TRAITS> {
+ public:
+ using Traits = TRAITS;
+ enum ReferenceType : uint8_t { kReloc, kAbs32, kRel32, kTypeCount };
+
+ DisassemblerElfIntel();
+ DisassemblerElfIntel(const DisassemblerElfIntel&) = delete;
+ const DisassemblerElfIntel& operator=(const DisassemblerElfIntel&) = delete;
+ ~DisassemblerElfIntel() override;
+
+ // Disassembler:
+ std::vector<ReferenceGroup> MakeReferenceGroups() const override;
+
+ // DisassemblerElf:
+ void ParseExecSection(const typename Traits::Elf_Shdr& section) override;
+ void PostProcessRel32() override;
+
+ // Specialized Read/Write functions.
+ std::unique_ptr<ReferenceReader> MakeReadAbs32(offset_t lo, offset_t hi);
+ std::unique_ptr<ReferenceWriter> MakeWriteAbs32(MutableBufferView image);
+ std::unique_ptr<ReferenceReader> MakeReadRel32(offset_t lo, offset_t hi);
+ std::unique_ptr<ReferenceWriter> MakeWriteRel32(MutableBufferView image);
+
+ private:
+ // Sorted file offsets of rel32 locations.
+ // Using std::deque to reduce peak memory footprint.
+ std::deque<offset_t> rel32_locations_;
+};
+
+using DisassemblerElfX86 = DisassemblerElfIntel<Elf32IntelTraits>;
+using DisassemblerElfX64 = DisassemblerElfIntel<Elf64IntelTraits>;
+
+// Disassembler for ELF with ARM architectures.
+template <class TRAITS>
+class DisassemblerElfArm : public DisassemblerElf<TRAITS> {
+ public:
+ using Traits = TRAITS;
+ DisassemblerElfArm();
+ DisassemblerElfArm(const DisassemblerElfArm&) = delete;
+ const DisassemblerElfArm& operator=(const DisassemblerElfArm&) = delete;
+ ~DisassemblerElfArm() override;
+
+ // Determines whether target |offset| is in an executable section.
+ bool IsTargetOffsetInExecSection(offset_t offset) const;
+
+ // Creates an architecture-specific Rel32Finder for ParseExecSection.
+ virtual std::unique_ptr<typename Traits::Rel32FinderUse> MakeRel32Finder(
+ const typename Traits::Elf_Shdr& section) = 0;
+
+ // DisassemblerElf:
+ void ParseExecSection(const typename Traits::Elf_Shdr& section) override;
+ void PostProcessRel32() override;
+
+ // Specialized Read/Write functions.
+ std::unique_ptr<ReferenceReader> MakeReadAbs32(offset_t lo, offset_t hi);
+ std::unique_ptr<ReferenceWriter> MakeWriteAbs32(MutableBufferView image);
+
+ protected:
+ // Sorted file offsets of rel32 locations for each rel32 address type.
+ std::deque<offset_t>
+ rel32_locations_table_[Traits::ArmReferenceType::kTypeCount];
+};
+
+// Disassembler for ELF with AArch32 (AKA ARM32).
+class DisassemblerElfAArch32 : public DisassemblerElfArm<ElfAArch32Traits> {
+ public:
+ DisassemblerElfAArch32();
+ DisassemblerElfAArch32(const DisassemblerElfAArch32&) = delete;
+ const DisassemblerElfAArch32& operator=(const DisassemblerElfAArch32&) =
+ delete;
+ ~DisassemblerElfAArch32() override;
+
+ // Disassembler:
+ std::vector<ReferenceGroup> MakeReferenceGroups() const override;
+
+ // DisassemblerElfArm:
+ std::unique_ptr<typename Traits::Rel32FinderUse> MakeRel32Finder(
+ const typename Traits::Elf_Shdr& section) override;
+
+ // Under the naive assumption that an executable section is entirely ARM mode
+ // or THUMB2 mode, this function implements heuristics to distinguish between
+ // the two. Returns true if section is THUMB2 mode; otherwise return false.
+ bool IsExecSectionThumb2(const typename Traits::Elf_Shdr& section) const;
+
+ // Specialized Read/Write functions for different rel32 address types.
+ std::unique_ptr<ReferenceReader> MakeReadRel32A24(offset_t lower,
+ offset_t upper);
+ std::unique_ptr<ReferenceWriter> MakeWriteRel32A24(MutableBufferView image);
+
+ std::unique_ptr<ReferenceReader> MakeReadRel32T8(offset_t lower,
+ offset_t upper);
+ std::unique_ptr<ReferenceWriter> MakeWriteRel32T8(MutableBufferView image);
+
+ std::unique_ptr<ReferenceReader> MakeReadRel32T11(offset_t lower,
+ offset_t upper);
+ std::unique_ptr<ReferenceWriter> MakeWriteRel32T11(MutableBufferView image);
+
+ std::unique_ptr<ReferenceReader> MakeReadRel32T20(offset_t lower,
+ offset_t upper);
+ std::unique_ptr<ReferenceWriter> MakeWriteRel32T20(MutableBufferView image);
+
+ std::unique_ptr<ReferenceReader> MakeReadRel32T24(offset_t lower,
+ offset_t upper);
+ std::unique_ptr<ReferenceWriter> MakeWriteRel32T24(MutableBufferView image);
+};
+
+// Disassembler for ELF with AArch64 (AKA ARM64).
+class DisassemblerElfAArch64 : public DisassemblerElfArm<ElfAArch64Traits> {
+ public:
+ DisassemblerElfAArch64();
+ DisassemblerElfAArch64(const DisassemblerElfAArch64&) = delete;
+ const DisassemblerElfAArch64& operator=(const DisassemblerElfAArch64&) =
+ delete;
+ ~DisassemblerElfAArch64() override;
+
+ // Disassembler:
+ std::vector<ReferenceGroup> MakeReferenceGroups() const override;
+
+ // DisassemblerElfArm:
+ std::unique_ptr<typename Traits::Rel32FinderUse> MakeRel32Finder(
+ const typename Traits::Elf_Shdr& section) override;
+
+ // Specialized Read/Write functions for different rel32 address types.
+ std::unique_ptr<ReferenceReader> MakeReadRel32Immd14(offset_t lower,
+ offset_t upper);
+ std::unique_ptr<ReferenceWriter> MakeWriteRel32Immd14(
+ MutableBufferView image);
+
+ std::unique_ptr<ReferenceReader> MakeReadRel32Immd19(offset_t lower,
+ offset_t upper);
+ std::unique_ptr<ReferenceWriter> MakeWriteRel32Immd19(
+ MutableBufferView image);
+
+ std::unique_ptr<ReferenceReader> MakeReadRel32Immd26(offset_t lower,
+ offset_t upper);
+ std::unique_ptr<ReferenceWriter> MakeWriteRel32Immd26(
+ MutableBufferView image);
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_DISASSEMBLER_ELF_H_
diff --git a/disassembler_elf_unittest.cc b/disassembler_elf_unittest.cc
new file mode 100644
index 0000000..d98eb50
--- /dev/null
+++ b/disassembler_elf_unittest.cc
@@ -0,0 +1,179 @@
+// Copyright 2018 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/disassembler_elf.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <random>
+#include <string>
+#include <vector>
+
+#include "components/zucchini/test_utils.h"
+#include "components/zucchini/type_elf.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+TEST(DisassemblerElfTest, IsTargetOffsetInElfSectionList) {
+ // Minimal required fields for IsTargetOffsetInElfSectionList().
+ struct FakeElfShdr {
+ offset_t sh_offset;
+ offset_t sh_size;
+ };
+
+ // Calls IsTargetOffsetInElfSectionList() for fixed |sorted_list|, and sweeps
+ // offsets in [lo, hi). Renders results into a string consisting of '.' (not
+ // in list) and '*' (in list).
+ auto test = [&](const std::vector<FakeElfShdr>& sorted_list, offset_t lo,
+ offset_t hi) -> std::string {
+ // Ensure |sorted_list| is indeed sorted, without overlaps.
+ for (size_t i = 1; i < sorted_list.size(); ++i) {
+ if (sorted_list[i].sh_offset <
+ sorted_list[i - 1].sh_offset + sorted_list[i - 1].sh_size) {
+ return "(Bad input)";
+ }
+ }
+ // The interface to IsTargetOffsetInElfSectionList() takes a list of
+ // pointers (since data can be casted from images), so make the conversion.
+ std::vector<const FakeElfShdr*> ptr_list;
+ for (const FakeElfShdr& header : sorted_list)
+ ptr_list.push_back(&header);
+ std::string result;
+ for (offset_t offset = lo; offset < hi; ++offset) {
+ result += IsTargetOffsetInElfSectionList(ptr_list, offset) ? '*' : '.';
+ }
+ return result;
+ };
+
+ EXPECT_EQ("..........", test(std::vector<FakeElfShdr>(), 0, 10));
+ EXPECT_EQ("*.........", test({{0, 1}}, 0, 10));
+ EXPECT_EQ("...*......", test({{3, 1}}, 0, 10));
+ EXPECT_EQ("...****...", test({{3, 4}}, 0, 10));
+ EXPECT_EQ("...****...", test({{10003, 4}}, 10000, 10010));
+ EXPECT_EQ("...********...", test({{3, 4}, {7, 4}}, 0, 14));
+ EXPECT_EQ("...****.****...", test({{3, 4}, {8, 4}}, 0, 15));
+ EXPECT_EQ("...****..****...", test({{3, 4}, {9, 4}}, 0, 16));
+ EXPECT_EQ("..****...*****..", test({{2, 4}, {9, 5}}, 0, 16));
+ EXPECT_EQ("...***......***..", test({{3, 3}, {12, 3}}, 0, 17));
+
+ // Many small ranges.
+ EXPECT_EQ("..**.**.*.*...*.*.**...**.*.**.*..", // (Comment strut).
+ test({{2, 2},
+ {5, 2},
+ {8, 1},
+ {10, 1},
+ {14, 1},
+ {16, 1},
+ {18, 2},
+ {23, 2},
+ {26, 1},
+ {28, 2},
+ {31, 1}},
+ 0, 34));
+ EXPECT_EQ("..*****.****.***.**.*..",
+ test({{137, 5}, {143, 4}, {148, 3}, {152, 2}, {155, 1}}, 135, 158));
+ // Consecutive.
+ EXPECT_EQ("..***************..",
+ test({{137, 5}, {142, 4}, {146, 3}, {149, 2}, {151, 1}}, 135, 154));
+ // Hover around 32 (power of 2).
+ EXPECT_EQ("..*******************************..",
+ test({{2002, 31}}, 2000, 2035));
+ EXPECT_EQ("..********************************..",
+ test({{5002, 32}}, 5000, 5036));
+ EXPECT_EQ("..*********************************..",
+ test({{8002, 33}}, 8000, 8037));
+ // Consecutive + small gap.
+ EXPECT_EQ(
+ "..*****************.***********..",
+ test({{9876543, 8}, {9876551, 9}, {9876561, 11}}, 9876541, 9876574));
+ // Sample internal of big range.
+ EXPECT_EQ("**************************************************",
+ test({{100, 1000000}}, 5000, 5050));
+ // Sample boundaries of big range.
+ EXPECT_EQ(".........................*************************",
+ test({{100, 1000000}}, 75, 125));
+ EXPECT_EQ("*************************.........................",
+ test({{100, 1000000}}, 1000075, 1000125));
+ // 1E9 is still good.
+ EXPECT_EQ(".....*.....", test({{1000000000, 1}}, 999999995, 1000000006));
+}
+
+TEST(DisassemblerElfTest, QuickDetect) {
+ std::vector<uint8_t> image_data;
+ ConstBufferView image;
+
+ // Empty.
+ EXPECT_FALSE(DisassemblerElfX86::QuickDetect(image));
+ EXPECT_FALSE(DisassemblerElfX64::QuickDetect(image));
+
+ // Unrelated.
+ image_data = ParseHexString("DE AD");
+ image = {image_data.data(), image_data.size()};
+ EXPECT_FALSE(DisassemblerElfX86::QuickDetect(image));
+ EXPECT_FALSE(DisassemblerElfX64::QuickDetect(image));
+
+ // Only Magic.
+ image_data = ParseHexString("7F 45 4C 46");
+ image = {image_data.data(), image_data.size()};
+ EXPECT_FALSE(DisassemblerElfX86::QuickDetect(image));
+ EXPECT_FALSE(DisassemblerElfX64::QuickDetect(image));
+
+ // Only identification.
+ image_data =
+ ParseHexString("7F 45 4C 46 01 01 01 00 00 00 00 00 00 00 00 00");
+ image = {image_data.data(), image_data.size()};
+ EXPECT_FALSE(DisassemblerElfX86::QuickDetect(image));
+ EXPECT_FALSE(DisassemblerElfX64::QuickDetect(image));
+
+ // Large enough, filled with zeros.
+ image_data.assign(sizeof(elf::Elf32_Ehdr), 0);
+ image = {image_data.data(), image_data.size()};
+ EXPECT_FALSE(DisassemblerElfX86::QuickDetect(image));
+ EXPECT_FALSE(DisassemblerElfX64::QuickDetect(image));
+
+ // Random.
+ std::random_device rd;
+ std::mt19937 gen{rd()};
+ std::generate(image_data.begin(), image_data.end(), gen);
+ image = {image_data.data(), image_data.size()};
+ EXPECT_FALSE(DisassemblerElfX86::QuickDetect(image));
+ EXPECT_FALSE(DisassemblerElfX64::QuickDetect(image));
+
+ // Typical x86 elf header.
+ {
+ elf::Elf32_Ehdr header = {};
+ auto e_ident =
+ ParseHexString("7F 45 4C 46 01 01 01 00 00 00 00 00 00 00 00 00");
+ std::copy(e_ident.begin(), e_ident.end(), header.e_ident);
+ header.e_type = elf::ET_EXEC;
+ header.e_machine = elf::EM_386;
+ header.e_version = 1;
+ header.e_shentsize = sizeof(elf::Elf32_Shdr);
+ ConstBufferView image(reinterpret_cast<const uint8_t*>(&header),
+ sizeof(header));
+ EXPECT_TRUE(DisassemblerElfX86::QuickDetect(image));
+ EXPECT_FALSE(DisassemblerElfX64::QuickDetect(image));
+ }
+
+ // Typical x64 elf header.
+ {
+ elf::Elf64_Ehdr header = {};
+ auto e_ident =
+ ParseHexString("7F 45 4C 46 02 01 01 00 00 00 00 00 00 00 00 00");
+ std::copy(e_ident.begin(), e_ident.end(), header.e_ident);
+ header.e_type = elf::ET_EXEC;
+ header.e_machine = elf::EM_X86_64;
+ header.e_version = 1;
+ header.e_shentsize = sizeof(elf::Elf64_Shdr);
+ ConstBufferView image(reinterpret_cast<const uint8_t*>(&header),
+ sizeof(header));
+ EXPECT_FALSE(DisassemblerElfX86::QuickDetect(image));
+ EXPECT_TRUE(DisassemblerElfX64::QuickDetect(image));
+ }
+}
+
+} // namespace zucchini
diff --git a/disassembler_no_op.cc b/disassembler_no_op.cc
new file mode 100644
index 0000000..b17979c
--- /dev/null
+++ b/disassembler_no_op.cc
@@ -0,0 +1,31 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/disassembler_no_op.h"
+
+namespace zucchini {
+
+// |num_equivalence_iterations_| = 1 since no pointers are present.
+DisassemblerNoOp::DisassemblerNoOp() : Disassembler(1) {}
+
+DisassemblerNoOp::~DisassemblerNoOp() = default;
+
+ExecutableType DisassemblerNoOp::GetExeType() const {
+ return kExeTypeNoOp;
+}
+
+std::string DisassemblerNoOp::GetExeTypeString() const {
+ return "(Unknown)";
+}
+
+std::vector<ReferenceGroup> DisassemblerNoOp::MakeReferenceGroups() const {
+ return std::vector<ReferenceGroup>();
+}
+
+bool DisassemblerNoOp::Parse(ConstBufferView image) {
+ image_ = image;
+ return true;
+}
+
+} // namespace zucchini
diff --git a/disassembler_no_op.h b/disassembler_no_op.h
new file mode 100644
index 0000000..ef10651
--- /dev/null
+++ b/disassembler_no_op.h
@@ -0,0 +1,39 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_DISASSEMBLER_NO_OP_H_
+#define COMPONENTS_ZUCCHINI_DISASSEMBLER_NO_OP_H_
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/disassembler.h"
+#include "components/zucchini/image_utils.h"
+
+namespace zucchini {
+
+// This disassembler works on any file and does not look for reference.
+class DisassemblerNoOp : public Disassembler {
+ public:
+ DisassemblerNoOp();
+ DisassemblerNoOp(const DisassemblerNoOp&) = delete;
+ const DisassemblerNoOp& operator=(const DisassemblerNoOp&) = delete;
+ ~DisassemblerNoOp() override;
+
+ // Disassembler:
+ ExecutableType GetExeType() const override;
+ std::string GetExeTypeString() const override;
+ std::vector<ReferenceGroup> MakeReferenceGroups() const override;
+
+ private:
+ friend Disassembler;
+
+ bool Parse(ConstBufferView image) override;
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_DISASSEMBLER_NO_OP_H_
diff --git a/disassembler_win32.cc b/disassembler_win32.cc
new file mode 100644
index 0000000..37e43e5
--- /dev/null
+++ b/disassembler_win32.cc
@@ -0,0 +1,410 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/disassembler_win32.h"
+
+#include <stddef.h>
+
+#include <algorithm>
+
+#include "base/logging.h"
+#include "base/numerics/safe_conversions.h"
+#include "components/zucchini/abs32_utils.h"
+#include "components/zucchini/algorithm.h"
+#include "components/zucchini/buffer_source.h"
+#include "components/zucchini/rel32_finder.h"
+#include "components/zucchini/rel32_utils.h"
+#include "components/zucchini/reloc_win32.h"
+
+namespace zucchini {
+
+namespace {
+
+// Decides whether |image| points to a Win32 PE file. If this is a possibility,
+// assigns |source| to enable further parsing, and returns true. Otherwise
+// leaves |source| at an undefined state and returns false.
+bool ReadWin32Header(ConstBufferView image, BufferSource* source) {
+ *source = BufferSource(image);
+
+ // Check "MZ" magic of DOS header.
+ if (!source->CheckNextBytes({'M', 'Z'}))
+ return false;
+
+ const auto* dos_header = source->GetPointer<pe::ImageDOSHeader>();
+ // For |e_lfanew|, reject on misalignment or overlap with DOS header.
+ if (!dos_header || (dos_header->e_lfanew & 7) != 0 ||
+ dos_header->e_lfanew < 0U + sizeof(pe::ImageDOSHeader)) {
+ return false;
+ }
+ // Offset to PE header is in DOS header.
+ *source = std::move(BufferSource(image).Skip(dos_header->e_lfanew));
+ // Check 'PE\0\0' magic from PE header.
+ if (!source->ConsumeBytes({'P', 'E', 0, 0}))
+ return false;
+
+ return true;
+}
+
+template <class TRAITS>
+const pe::ImageDataDirectory* ReadDataDirectory(
+ const typename TRAITS::ImageOptionalHeader* optional_header,
+ size_t index) {
+ if (index >= optional_header->number_of_rva_and_sizes)
+ return nullptr;
+ return &optional_header->data_directory[index];
+}
+
+// Decides whether |section| (assumed value) is a section that contains code.
+template <class TRAITS>
+bool IsWin32CodeSection(const pe::ImageSectionHeader& section) {
+ return (section.characteristics & kCodeCharacteristics) ==
+ kCodeCharacteristics;
+}
+
+} // namespace
+
+/******** Win32X86Traits ********/
+
+// static
+constexpr Bitness Win32X86Traits::kBitness;
+constexpr ExecutableType Win32X86Traits::kExeType;
+const char Win32X86Traits::kExeTypeString[] = "Windows PE x86";
+
+/******** Win32X64Traits ********/
+
+// static
+constexpr Bitness Win32X64Traits::kBitness;
+constexpr ExecutableType Win32X64Traits::kExeType;
+const char Win32X64Traits::kExeTypeString[] = "Windows PE x64";
+
+/******** DisassemblerWin32 ********/
+
+// static.
+template <class TRAITS>
+bool DisassemblerWin32<TRAITS>::QuickDetect(ConstBufferView image) {
+ BufferSource source;
+ return ReadWin32Header(image, &source);
+}
+
+// |num_equivalence_iterations_| = 2 for reloc -> abs32.
+template <class TRAITS>
+DisassemblerWin32<TRAITS>::DisassemblerWin32() : Disassembler(2) {}
+
+template <class TRAITS>
+DisassemblerWin32<TRAITS>::~DisassemblerWin32() = default;
+
+template <class TRAITS>
+ExecutableType DisassemblerWin32<TRAITS>::GetExeType() const {
+ return Traits::kExeType;
+}
+
+template <class TRAITS>
+std::string DisassemblerWin32<TRAITS>::GetExeTypeString() const {
+ return Traits::kExeTypeString;
+}
+
+template <class TRAITS>
+std::vector<ReferenceGroup> DisassemblerWin32<TRAITS>::MakeReferenceGroups()
+ const {
+ return {
+ {ReferenceTypeTraits{2, TypeTag(kReloc), PoolTag(kReloc)},
+ &DisassemblerWin32::MakeReadRelocs, &DisassemblerWin32::MakeWriteRelocs},
+ {ReferenceTypeTraits{Traits::kVAWidth, TypeTag(kAbs32), PoolTag(kAbs32)},
+ &DisassemblerWin32::MakeReadAbs32, &DisassemblerWin32::MakeWriteAbs32},
+ {ReferenceTypeTraits{4, TypeTag(kRel32), PoolTag(kRel32)},
+ &DisassemblerWin32::MakeReadRel32, &DisassemblerWin32::MakeWriteRel32},
+ };
+}
+
+template <class TRAITS>
+std::unique_ptr<ReferenceReader> DisassemblerWin32<TRAITS>::MakeReadRelocs(
+ offset_t lo,
+ offset_t hi) {
+ if (!ParseAndStoreRelocBlocks())
+ return std::make_unique<EmptyReferenceReader>();
+
+ RelocRvaReaderWin32 reloc_rva_reader(image_, reloc_region_,
+ reloc_block_offsets_, lo, hi);
+ CHECK_GE(image_.size(), Traits::kVAWidth);
+ offset_t offset_bound =
+ base::checked_cast<offset_t>(image_.size() - Traits::kVAWidth + 1);
+ return std::make_unique<RelocReaderWin32>(std::move(reloc_rva_reader),
+ Traits::kRelocType, offset_bound,
+ translator_);
+}
+
+template <class TRAITS>
+std::unique_ptr<ReferenceReader> DisassemblerWin32<TRAITS>::MakeReadAbs32(
+ offset_t lo,
+ offset_t hi) {
+ ParseAndStoreAbs32();
+ Abs32RvaExtractorWin32 abs_rva_extractor(
+ image_, {Traits::kBitness, image_base_}, abs32_locations_, lo, hi);
+ return std::make_unique<Abs32ReaderWin32>(std::move(abs_rva_extractor),
+ translator_);
+}
+
+template <class TRAITS>
+std::unique_ptr<ReferenceReader> DisassemblerWin32<TRAITS>::MakeReadRel32(
+ offset_t lo,
+ offset_t hi) {
+ ParseAndStoreRel32();
+ return std::make_unique<Rel32ReaderX86>(image_, lo, hi, &rel32_locations_,
+ translator_);
+}
+
+template <class TRAITS>
+std::unique_ptr<ReferenceWriter> DisassemblerWin32<TRAITS>::MakeWriteRelocs(
+ MutableBufferView image) {
+ if (!ParseAndStoreRelocBlocks())
+ return std::make_unique<EmptyReferenceWriter>();
+
+ return std::make_unique<RelocWriterWin32>(Traits::kRelocType, image,
+ reloc_region_, reloc_block_offsets_,
+ translator_);
+}
+
+template <class TRAITS>
+std::unique_ptr<ReferenceWriter> DisassemblerWin32<TRAITS>::MakeWriteAbs32(
+ MutableBufferView image) {
+ return std::make_unique<Abs32WriterWin32>(
+ image, AbsoluteAddress(Traits::kBitness, image_base_), translator_);
+}
+
+template <class TRAITS>
+std::unique_ptr<ReferenceWriter> DisassemblerWin32<TRAITS>::MakeWriteRel32(
+ MutableBufferView image) {
+ return std::make_unique<Rel32WriterX86>(image, translator_);
+}
+
+template <class TRAITS>
+bool DisassemblerWin32<TRAITS>::Parse(ConstBufferView image) {
+ image_ = image;
+ return ParseHeader();
+}
+
+template <class TRAITS>
+bool DisassemblerWin32<TRAITS>::ParseHeader() {
+ BufferSource source;
+
+ if (!ReadWin32Header(image_, &source))
+ return false;
+
+ constexpr size_t kDataDirBase =
+ offsetof(typename Traits::ImageOptionalHeader, data_directory);
+ auto* coff_header = source.GetPointer<pe::ImageFileHeader>();
+ if (!coff_header || coff_header->size_of_optional_header < kDataDirBase)
+ return false;
+
+ // |number_of_rva_and_sizes < kImageNumberOfDirectoryEntries| is possible. So
+ // in theory, GetPointer() on ImageOptionalHeader can reach EOF for a tiny PE
+ // file, causing false rejection. However, this should not occur for practical
+ // cases; and rejection is okay for corner cases (e.g., from a fuzzer).
+ auto* optional_header =
+ source.GetPointer<typename Traits::ImageOptionalHeader>();
+ if (!optional_header || optional_header->magic != Traits::kMagic)
+ return false;
+
+ // Check |optional_header->number_of_rva_and_sizes|.
+ const size_t data_dir_size =
+ coff_header->size_of_optional_header - kDataDirBase;
+ const size_t num_data_dir = data_dir_size / sizeof(pe::ImageDataDirectory);
+ if (num_data_dir != optional_header->number_of_rva_and_sizes ||
+ num_data_dir * sizeof(pe::ImageDataDirectory) != data_dir_size ||
+ num_data_dir > pe::kImageNumberOfDirectoryEntries) {
+ return false;
+ }
+
+ base_relocation_table_ = ReadDataDirectory<Traits>(
+ optional_header, pe::kIndexOfBaseRelocationTable);
+ if (!base_relocation_table_)
+ return false;
+
+ image_base_ = optional_header->image_base;
+
+ // |optional_header->size_of_image| is the size of the image when loaded into
+ // memory, and not the actual size on disk.
+ rva_t rva_bound = optional_header->size_of_image;
+ if (rva_bound >= kRvaBound)
+ return false;
+
+ // An exclusive upper bound of all offsets used in the image. This gets
+ // updated as sections get visited.
+ offset_t offset_bound =
+ base::checked_cast<offset_t>(source.begin() - image_.begin());
+
+ // Extract |sections_|.
+ size_t sections_count = coff_header->number_of_sections;
+ auto* sections_array =
+ source.GetArray<pe::ImageSectionHeader>(sections_count);
+ if (!sections_array)
+ return false;
+ sections_.assign(sections_array, sections_array + sections_count);
+
+ // Prepare |units| for offset-RVA translation.
+ std::vector<AddressTranslator::Unit> units;
+ units.reserve(sections_count);
+
+ // Visit each section, validate, and add address translation data to |units|.
+ bool has_text_section = false;
+ decltype(pe::ImageSectionHeader::virtual_address) prev_virtual_address = 0;
+ for (size_t i = 0; i < sections_count; ++i) {
+ const pe::ImageSectionHeader& section = sections_[i];
+ // Apply strict checks on section bounds.
+ if (!image_.covers(
+ {section.file_offset_of_raw_data, section.size_of_raw_data})) {
+ return false;
+ }
+ if (!RangeIsBounded(section.virtual_address, section.virtual_size,
+ rva_bound)) {
+ return false;
+ }
+
+ // PE sections should be sorted by RVAs. For robustness, we don't rely on
+ // this, so even if unsorted we don't care. Output warning though.
+ if (prev_virtual_address > section.virtual_address)
+ LOG(WARNING) << "RVA anomaly found for Section " << i;
+ prev_virtual_address = section.virtual_address;
+
+ // Add |section| data for offset-RVA translation.
+ units.push_back({section.file_offset_of_raw_data, section.size_of_raw_data,
+ section.virtual_address, section.virtual_size});
+
+ offset_t end_offset =
+ section.file_offset_of_raw_data + section.size_of_raw_data;
+ offset_bound = std::max(end_offset, offset_bound);
+ if (IsWin32CodeSection<Traits>(section))
+ has_text_section = true;
+ }
+
+ if (offset_bound > image_.size())
+ return false;
+ if (!has_text_section)
+ return false;
+
+ // Initialize |translator_| for offset-RVA translations. Any inconsistency
+ // (e.g., 2 offsets correspond to the same RVA) would invalidate the PE file.
+ if (translator_.Initialize(std::move(units)) != AddressTranslator::kSuccess)
+ return false;
+
+ // Resize |image_| to include only contents claimed by sections. Note that
+ // this may miss digital signatures at end of PE files, but for patching this
+ // is of minor concern.
+ image_.shrink(offset_bound);
+
+ return true;
+}
+
+template <class TRAITS>
+bool DisassemblerWin32<TRAITS>::ParseAndStoreRelocBlocks() {
+ if (has_parsed_relocs_)
+ return reloc_region_.lo() != kInvalidOffset;
+
+ has_parsed_relocs_ = true;
+ DCHECK(reloc_block_offsets_.empty());
+
+ offset_t relocs_offset =
+ translator_.RvaToOffset(base_relocation_table_->virtual_address);
+ size_t relocs_size = base_relocation_table_->size;
+ const BufferRegion temp_reloc_region = {relocs_offset, relocs_size};
+
+ // Reject bogus relocs. It's possible to have no reloc, so this is non-fatal!
+ if (relocs_offset == kInvalidOffset || !image_.covers(temp_reloc_region))
+ return false;
+
+ // Precompute offsets of all reloc blocks.
+ if (!RelocRvaReaderWin32::FindRelocBlocks(image_, temp_reloc_region,
+ &reloc_block_offsets_)) {
+ return false;
+ }
+ // Reassign |reloc_region_| only on success.
+ reloc_region_ = temp_reloc_region;
+ return true;
+}
+
+template <class TRAITS>
+bool DisassemblerWin32<TRAITS>::ParseAndStoreAbs32() {
+ if (has_parsed_abs32_)
+ return true;
+ has_parsed_abs32_ = true;
+
+ // Read reloc targets as preliminary abs32 locations.
+ std::unique_ptr<ReferenceReader> relocs = MakeReadRelocs(0, offset_t(size()));
+ for (auto ref = relocs->GetNext(); ref.has_value(); ref = relocs->GetNext())
+ abs32_locations_.push_back(ref->target);
+
+ std::sort(abs32_locations_.begin(), abs32_locations_.end());
+
+ // Abs32 references must have targets translatable to offsets. Remove those
+ // that are unable to do so.
+ size_t num_untranslatable = RemoveUntranslatableAbs32(
+ image_, {Traits::kBitness, image_base_}, translator_, &abs32_locations_);
+ LOG_IF(WARNING, num_untranslatable) << "Removed " << num_untranslatable
+ << " untranslatable abs32 references.";
+
+ // Abs32 reference bodies must not overlap. If found, simply remove them.
+ size_t num_overlapping =
+ RemoveOverlappingAbs32Locations(Traits::kVAWidth, &abs32_locations_);
+ LOG_IF(WARNING, num_overlapping)
+ << "Removed " << num_overlapping
+ << " abs32 references with overlapping bodies.";
+
+ abs32_locations_.shrink_to_fit();
+ return true;
+}
+
+template <class TRAITS>
+bool DisassemblerWin32<TRAITS>::ParseAndStoreRel32() {
+ if (has_parsed_rel32_)
+ return true;
+ has_parsed_rel32_ = true;
+
+ ParseAndStoreAbs32();
+
+ AddressTranslator::RvaToOffsetCache target_rva_checker(translator_);
+
+ for (const pe::ImageSectionHeader& section : sections_) {
+ if (!IsWin32CodeSection<Traits>(section))
+ continue;
+
+ rva_t start_rva = section.virtual_address;
+ rva_t end_rva = start_rva + section.virtual_size;
+
+ // |virtual_size < size_of_raw_data| is possible. In this case, disassembly
+ // should not proceed beyond |virtual_size|, so rel32 location RVAs remain
+ // translatable to file offsets.
+ uint32_t size_to_use =
+ std::min(section.virtual_size, section.size_of_raw_data);
+ ConstBufferView region =
+ image_[{section.file_offset_of_raw_data, size_to_use}];
+ Abs32GapFinder gap_finder(image_, region, abs32_locations_,
+ Traits::kVAWidth);
+ typename Traits::RelFinder rel_finder(image_, translator_);
+ // Iterate over gaps between abs32 references, to avoid collision.
+ while (gap_finder.FindNext()) {
+ rel_finder.SetRegion(gap_finder.GetGap());
+ // Heuristically detect rel32 references, store if valid.
+ while (rel_finder.FindNext()) {
+ auto rel32 = rel_finder.GetRel32();
+ if (target_rva_checker.IsValid(rel32.target_rva) &&
+ (rel32.can_point_outside_section ||
+ (start_rva <= rel32.target_rva && rel32.target_rva < end_rva))) {
+ rel_finder.Accept();
+ rel32_locations_.push_back(rel32.location);
+ }
+ }
+ }
+ }
+ rel32_locations_.shrink_to_fit();
+ // |sections_| entries are usually sorted by offset, but there's no guarantee.
+ // So sort explicitly, to be sure.
+ std::sort(rel32_locations_.begin(), rel32_locations_.end());
+ return true;
+}
+
+// Explicit instantiation for supported classes.
+template class DisassemblerWin32<Win32X86Traits>;
+template class DisassemblerWin32<Win32X64Traits>;
+
+} // namespace zucchini
diff --git a/disassembler_win32.h b/disassembler_win32.h
new file mode 100644
index 0000000..77b65ac
--- /dev/null
+++ b/disassembler_win32.h
@@ -0,0 +1,131 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_DISASSEMBLER_WIN32_H_
+#define COMPONENTS_ZUCCHINI_DISASSEMBLER_WIN32_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <deque>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "components/zucchini/address_translator.h"
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/disassembler.h"
+#include "components/zucchini/image_utils.h"
+#include "components/zucchini/type_win_pe.h"
+
+namespace zucchini {
+
+class Rel32FinderX86;
+class Rel32FinderX64;
+
+struct Win32X86Traits {
+ static constexpr Bitness kBitness = kBit32;
+ static constexpr ExecutableType kExeType = kExeTypeWin32X86;
+ enum : uint16_t { kMagic = 0x10B };
+ enum : uint16_t { kRelocType = 3 };
+ enum : uint32_t { kVAWidth = 4 };
+ static const char kExeTypeString[];
+
+ using ImageOptionalHeader = pe::ImageOptionalHeader;
+ using RelFinder = Rel32FinderX86;
+ using Address = uint32_t;
+};
+
+struct Win32X64Traits {
+ static constexpr Bitness kBitness = kBit64;
+ static constexpr ExecutableType kExeType = kExeTypeWin32X64;
+ enum : uint16_t { kMagic = 0x20B };
+ enum : uint16_t { kRelocType = 10 };
+ enum : uint32_t { kVAWidth = 8 };
+ static const char kExeTypeString[];
+
+ using ImageOptionalHeader = pe::ImageOptionalHeader64;
+ using RelFinder = Rel32FinderX64;
+ using Address = uint64_t;
+};
+
+template <class TRAITS>
+class DisassemblerWin32 : public Disassembler {
+ public:
+ using Traits = TRAITS;
+ enum ReferenceType : uint8_t { kReloc, kAbs32, kRel32, kTypeCount };
+
+ // Applies quick checks to determine whether |image| *may* point to the start
+ // of an executable. Returns true iff the check passes.
+ static bool QuickDetect(ConstBufferView image);
+
+ DisassemblerWin32();
+ DisassemblerWin32(const DisassemblerWin32&) = delete;
+ const DisassemblerWin32& operator=(const DisassemblerWin32&) = delete;
+ ~DisassemblerWin32() override;
+
+ // Disassembler:
+ ExecutableType GetExeType() const override;
+ std::string GetExeTypeString() const override;
+ std::vector<ReferenceGroup> MakeReferenceGroups() const override;
+
+ // Functions that return reader / writer for references.
+ std::unique_ptr<ReferenceReader> MakeReadRelocs(offset_t lo, offset_t hi);
+ std::unique_ptr<ReferenceReader> MakeReadAbs32(offset_t lo, offset_t hi);
+ std::unique_ptr<ReferenceReader> MakeReadRel32(offset_t lo, offset_t hi);
+ std::unique_ptr<ReferenceWriter> MakeWriteRelocs(MutableBufferView image);
+ std::unique_ptr<ReferenceWriter> MakeWriteAbs32(MutableBufferView image);
+ std::unique_ptr<ReferenceWriter> MakeWriteRel32(MutableBufferView image);
+
+ private:
+ friend Disassembler;
+
+ // Disassembler:
+ bool Parse(ConstBufferView image) override;
+
+ // Parses the file header. Returns true iff successful.
+ bool ParseHeader();
+
+ // Parsers to extract references. These are lazily called, and return whether
+ // parsing was successful (failures are non-fatal).
+ bool ParseAndStoreRelocBlocks();
+ bool ParseAndStoreAbs32();
+ bool ParseAndStoreRel32();
+
+ // In-memory copy of sections.
+ std::vector<pe::ImageSectionHeader> sections_;
+
+ // Image base address to translate between RVA and VA.
+ typename Traits::Address image_base_ = 0;
+
+ // Pointer to data Directory entry of the relocation table.
+ const pe::ImageDataDirectory* base_relocation_table_ = nullptr;
+
+ // Translator between offsets and RVAs.
+ AddressTranslator translator_;
+
+ // Reference storage.
+ BufferRegion reloc_region_ = {kInvalidOffset, 0U};
+ std::vector<offset_t> reloc_block_offsets_;
+ offset_t reloc_end_ = 0;
+ std::vector<offset_t> abs32_locations_;
+ // Using std::deque to reduce peak memory footprint.
+ std::deque<offset_t> rel32_locations_;
+
+ // Initialization states of reference storage, used for lazy initialization.
+ // TODO(huangs): Investigate whether lazy initialization is useful for memory
+ // reduction. This is a carryover from Courgette. To be sure we should run
+ // experiment after Zucchini is able to do ensemble patching.
+ bool has_parsed_relocs_ = false;
+ bool has_parsed_abs32_ = false;
+ bool has_parsed_rel32_ = false;
+};
+
+using DisassemblerWin32X86 = DisassemblerWin32<Win32X86Traits>;
+using DisassemblerWin32X64 = DisassemblerWin32<Win32X64Traits>;
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_DISASSEMBLER_WIN32_H_
diff --git a/disassembler_ztf.cc b/disassembler_ztf.cc
new file mode 100644
index 0000000..dfe9045
--- /dev/null
+++ b/disassembler_ztf.cc
@@ -0,0 +1,653 @@
+// Copyright 2018 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/disassembler_ztf.h"
+
+#include <algorithm>
+#include <cmath>
+#include <iterator>
+#include <limits>
+#include <numeric>
+
+#include "base/check_op.h"
+#include "base/numerics/checked_math.h"
+#include "base/numerics/safe_conversions.h"
+#include "components/zucchini/algorithm.h"
+#include "components/zucchini/buffer_source.h"
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/io_utils.h"
+
+namespace zucchini {
+
+namespace {
+
+constexpr uint8_t kDelimiter = ',';
+
+constexpr int kHeaderMagicSize = 4;
+constexpr int kFooterMagicSize = 5;
+constexpr int kTotalMagicSize = kHeaderMagicSize + kFooterMagicSize;
+
+// Number of characters that aren't digits in each type of reference.
+constexpr int kNumConstCharInAbs = 3;
+constexpr int kNumConstCharInRel = 5;
+
+/******** ZtfConfig ********/
+
+// For passing around metadata about the type of reference to match.
+// - |digits_per_dim| is the length of the offset in lines/cols of a
+// reference.
+// - |open_char| is an ASCII character representing the opening char.
+// - |close_char| is an ASCII character representing the closing char.
+struct ZtfConfig {
+ uint8_t digits_per_dim;
+ uint8_t open_char;
+ uint8_t close_char;
+
+ constexpr uint8_t abs_width() const {
+ return digits_per_dim * 2 + kNumConstCharInAbs;
+ }
+
+ constexpr uint8_t rel_width() const {
+ return digits_per_dim * 2 + kNumConstCharInRel;
+ }
+
+ uint8_t Width(ztf::LineCol /* lc */) const { return abs_width(); }
+
+ uint8_t Width(ztf::DeltaLineCol /* dlc */) const { return rel_width(); }
+};
+
+// Creates a ZtfConfig for parsing or writing based on the desired |digits| and
+// |pool|.
+template <DisassemblerZtf::ReferencePool pool>
+constexpr ZtfConfig MakeZtfConfig(uint8_t digits) {
+ switch (pool) {
+ case DisassemblerZtf::kAngles:
+ return ZtfConfig{digits, '<', '>'};
+ case DisassemblerZtf::kBraces:
+ return ZtfConfig{digits, '{', '}'};
+ case DisassemblerZtf::kBrackets:
+ return ZtfConfig{digits, '[', ']'};
+ case DisassemblerZtf::kParentheses:
+ break; // Handled below.
+ }
+ return ZtfConfig{digits, '(', ')'};
+}
+
+/******** ZtfParser ********/
+
+// ZtfParser is used to extract (absolute) LineCol and (relative) DeltaLineCol
+// from a ZTF file, and contains various helpers for character, digits, and sign
+// matching.
+class ZtfParser {
+ public:
+ ZtfParser(offset_t hi, ConstBufferView image, ZtfConfig config)
+ : image_(image), hi_(hi), config_(config) {
+ DCHECK_LE(static_cast<size_t>(std::pow(10U, config_.digits_per_dim)),
+ ztf::kMaxDimValue);
+ }
+
+ ZtfParser(const ZtfParser&) = delete;
+ const ZtfParser& operator=(const ZtfParser&) = delete;
+
+ // Attempts to match an absolute reference at |offset|. If successful then
+ // assigns the result to |abs_lc| and returns true. Otherwise returns false.
+ // An absolute reference takes the form:
+ // <open><digits><delimiter><digits><close>
+ bool MatchAtOffset(offset_t offset, ztf::LineCol* abs_lc) {
+ if (hi_ < config_.abs_width() || offset > hi_ - config_.abs_width())
+ return false;
+ offset_ = offset;
+ return MatchChar(config_.open_char) && MatchDigits(+1, &abs_lc->line) &&
+ MatchChar(kDelimiter) && MatchDigits(+1, &abs_lc->col) &&
+ MatchChar(config_.close_char);
+ }
+
+ // Attempts to match an absolute reference at |offset|. If successful then
+ // assigns the result to |rel_lc| and returns true. Otherwise returns false. A
+ // relative reference takes the form:
+ // <open><sign><digits><delimiter><sign><digits><close>
+ bool MatchAtOffset(offset_t offset, ztf::DeltaLineCol* rel_dlc) {
+ if (hi_ < config_.rel_width() || offset > hi_ - config_.rel_width())
+ return false;
+ offset_ = offset;
+ ztf::dim_t line_sign;
+ ztf::dim_t col_sign;
+ return MatchChar(config_.open_char) && MatchSign(&line_sign) &&
+ MatchDigits(line_sign, &rel_dlc->line) && MatchChar(kDelimiter) &&
+ MatchSign(&col_sign) && MatchDigits(col_sign, &rel_dlc->col) &&
+ MatchChar(config_.close_char);
+ }
+
+ private:
+ // The Match*() functions below can advance |offset_|, and return a bool to
+ // indicate success to allow chaining using &&.
+
+ // Returns true if |character| is at location |offset_| in |image_| and
+ // increments |offset_|.
+ bool MatchChar(uint8_t character) {
+ return character == image_.read<uint8_t>(offset_++);
+ }
+
+ // Looks for '+' or '-' at |offset_|. If found, stores +1 or -1 in |sign| and
+ // returns true. Otherwise returns false.
+ bool MatchSign(ztf::dim_t* sign) {
+ uint8_t val = image_.read<uint8_t>(offset_++);
+ if (val == static_cast<uint8_t>(ztf::SignChar::kMinus)) {
+ *sign = -1;
+ return true;
+ }
+ if (val == static_cast<uint8_t>(ztf::SignChar::kPlus)) {
+ *sign = 1;
+ return true;
+ }
+ return false;
+ }
+
+ // Attempts to extract a number with the number of base 10 digits equal to
+ // |config_.digits_per_dim| from |image_| starting from |offset_|. Returns
+ // true and assigns the integer value to |value| if successful.
+ bool MatchDigits(ztf::dim_t sign, ztf::dim_t* value) {
+ ztf::dim_t output = 0;
+ for (int i = 0; i < config_.digits_per_dim; ++i) {
+ auto digit = image_.read<uint8_t>(offset_++);
+ if (digit >= '0' && digit < '0' + 10)
+ output = output * 10 + digit - '0';
+ else
+ return false;
+ }
+ if (!output && sign < 0) // Disallow "-0", "-00", etc.
+ return false;
+ *value = sign * output;
+ return true;
+ }
+
+ ConstBufferView image_;
+ const offset_t hi_;
+ const ZtfConfig config_;
+ offset_t offset_ = 0;
+};
+
+/******** ZtfWriter ********/
+
+// ZtfWriter is used to write references to an image. This includes writing
+// the enclosing characters around the reference.
+class ZtfWriter {
+ public:
+ ZtfWriter(MutableBufferView image, ZtfConfig config)
+ : image_(image),
+ config_(config),
+ val_bound_(
+ static_cast<ztf::dim_t>(std::pow(10, config_.digits_per_dim))) {}
+
+ ZtfWriter(const ZtfWriter&) = delete;
+ const ZtfWriter& operator=(const ZtfWriter&) = delete;
+
+ // Write an absolute reference |abs_ref| at |offset|. Note that references
+ // that would overwrite a newline are skipped as this would invalidate all
+ // the other reference line numbers.
+ void Write(offset_t offset, ztf::LineCol abs_ref) {
+ offset_ = offset;
+ if (!SafeToWriteNumber(abs_ref.line) || !SafeToWriteNumber(abs_ref.col) ||
+ !SafeToWriteData(offset_, offset_ + config_.abs_width())) {
+ return;
+ }
+ WriteChar(config_.open_char);
+ WriteNumber(abs_ref.line);
+ WriteChar(kDelimiter);
+ WriteNumber(abs_ref.col);
+ WriteChar(config_.close_char);
+ }
+
+ // Write a relative reference |rel_ref| at |offset|. Note that references
+ // that would overwrite a newline are skipped as this would invalidate all
+ // the other reference line numbers.
+ void Write(offset_t offset, ztf::DeltaLineCol rel_ref) {
+ offset_ = offset;
+ if (!SafeToWriteNumber(rel_ref.line) || !SafeToWriteNumber(rel_ref.col) ||
+ !SafeToWriteData(offset_, offset_ + config_.rel_width())) {
+ return;
+ }
+ WriteChar(config_.open_char);
+ WriteSign(rel_ref.line);
+ WriteNumber(rel_ref.line);
+ WriteChar(kDelimiter);
+ WriteSign(rel_ref.col);
+ WriteNumber(rel_ref.col);
+ WriteChar(config_.close_char);
+ }
+
+ private:
+ // Returns whether it is safe to modify bytes in |[lo, hi)| in |image_| for
+ // Reference correction. Failure cases are:
+ // - Out-of-bound writes.
+ // - Overwriting '\n'. This is a ZTF special case since '\n' dictates file
+ // structure, and Reference correction should never mess with this.
+ bool SafeToWriteData(offset_t lo, offset_t hi) const {
+ DCHECK_LE(lo, hi);
+ // Out of bounds.
+ if (hi > image_.size())
+ return false;
+ for (offset_t i = lo; i < hi; ++i) {
+ if (image_.read<uint8_t>(i) == '\n')
+ return false;
+ }
+ return true;
+ }
+
+ // Checks whether it is safe to write a |val| based on
+ // |config_.digits_per_dim|.
+ bool SafeToWriteNumber(ztf::dim_t val) const {
+ return std::abs(val) < val_bound_;
+ }
+
+ // The Write*() functions each advance |offset_| by a fixed distance. The
+ // caller should ensure there's enough space to write data.
+
+ // Write |character| at |offset_| and increment |offset_|.
+ void WriteChar(uint8_t character) { image_.write(offset_++, character); }
+
+ // Write the sign of |value| at |offset_| and increment |offset_|.
+ void WriteSign(ztf::dim_t value) {
+ image_.write(offset_++,
+ value >= 0 ? ztf::SignChar::kPlus : ztf::SignChar::kMinus);
+ }
+
+ // Writes the absolute value of the number represented by |value| at |offset_|
+ // using zero padding to fill |config_.digits_per_dim|.
+ void WriteNumber(ztf::dim_t value) {
+ size_t size = config_.digits_per_dim + 1;
+ DCHECK_LE(size, kMaxDigitCount + 1);
+ char digits[kMaxDigitCount + 1]; // + 1 for terminator.
+ int len =
+ snprintf(digits, size, "%0*u", config_.digits_per_dim, std::abs(value));
+ DCHECK_EQ(len, config_.digits_per_dim);
+ for (int i = 0; i < len; ++i)
+ image_.write(offset_++, digits[i]);
+ }
+
+ MutableBufferView image_;
+ const ZtfConfig config_;
+ // Bound on numeric values, as limited by |config_.digits_per_dim|.
+ const ztf::dim_t val_bound_;
+ offset_t offset_ = 0;
+};
+
+// Specialization of ReferenceReader for reading text references.
+template <typename T>
+class ZtfReferenceReader : public ReferenceReader {
+ public:
+ ZtfReferenceReader(offset_t lo,
+ offset_t hi,
+ ConstBufferView image,
+ const ZtfTranslator& translator,
+ ZtfConfig config)
+ : offset_(lo),
+ hi_(hi),
+ translator_(translator),
+ config_(config),
+ parser_(hi_, image, config_) {
+ DCHECK_LE(hi_, image.size());
+ }
+
+ // Walks |offset_| from |lo| to |hi_| running |parser_|. If any matches are
+ // found they are returned.
+ absl::optional<Reference> GetNext() override {
+ T line_col;
+ for (; offset_ < hi_; ++offset_) {
+ if (!parser_.MatchAtOffset(offset_, &line_col))
+ continue;
+
+ auto target = ConvertToTargetOffset(offset_, line_col);
+ // Ignore targets that point outside the file.
+ if (target == kInvalidOffset)
+ continue;
+ offset_t location = offset_;
+ offset_ += config_.Width(line_col);
+ return Reference{location, target};
+ }
+ return absl::nullopt;
+ }
+
+ private:
+ // Converts |lc| (an absolute reference) to an offset using |translator_|.
+ offset_t ConvertToTargetOffset(offset_t /* location */,
+ ztf::LineCol lc) const {
+ return translator_.LineColToOffset(lc);
+ }
+
+ // Converts |dlc| (a relative reference) to an offset using |translator_|.
+ // This requires converting the |dlc| to a ztf::LineCol to find the offset.
+ offset_t ConvertToTargetOffset(offset_t location,
+ ztf::DeltaLineCol dlc) const {
+ auto lc = translator_.OffsetToLineCol(location);
+ if (!lc.has_value())
+ return kInvalidOffset;
+ return translator_.LineColToOffset(lc.value() + dlc);
+ }
+
+ offset_t offset_;
+ const offset_t hi_;
+ const ZtfTranslator& translator_;
+ const ZtfConfig config_;
+ ZtfParser parser_;
+};
+
+// Specialization of ReferenceWriter for writing text references.
+template <typename T>
+class ZtfReferenceWriter : public ReferenceWriter {
+ public:
+ ZtfReferenceWriter(MutableBufferView image,
+ const ZtfTranslator& translator,
+ ZtfConfig config)
+ : translator_(translator), writer_(image, config) {}
+
+ void PutNext(Reference reference) override {
+ T line_col;
+ if (!ConvertToTargetLineCol(reference, &line_col))
+ return;
+
+ writer_.Write(reference.location, line_col);
+ }
+
+ private:
+ // Converts |reference| to an absolute reference to be stored in |out_lc|.
+ // Returns true on success.
+ bool ConvertToTargetLineCol(Reference reference, ztf::LineCol* out_lc) {
+ auto temp_lc = translator_.OffsetToLineCol(reference.target);
+ if (!temp_lc.has_value() || !translator_.IsValid(temp_lc.value()))
+ return false;
+
+ *out_lc = temp_lc.value();
+ return true;
+ }
+
+ // Converts |reference| to a relative reference to be stored in |out_dlc|.
+ // Will return true on success.
+ bool ConvertToTargetLineCol(Reference reference, ztf::DeltaLineCol* out_dlc) {
+ auto location_lc = translator_.OffsetToLineCol(reference.location);
+ if (!location_lc.has_value())
+ return false;
+
+ auto target_lc = translator_.OffsetToLineCol(reference.target);
+ if (!target_lc.has_value())
+ return false;
+
+ *out_dlc = target_lc.value() - location_lc.value();
+ return translator_.IsValid(reference.location, *out_dlc);
+ }
+
+ const ZtfTranslator& translator_;
+ ZtfWriter writer_;
+};
+
+// Reads a text header to check for the magic string "ZTxt" at the start
+// indicating the file should be treated as a Zucchini text file.
+bool ReadZtfHeader(ConstBufferView image) {
+ BufferSource source(image);
+ // Reject empty images and "ZTxtxTZ\n" (missing 't').
+ if (source.size() < kTotalMagicSize)
+ return false;
+ if (source.size() > std::numeric_limits<offset_t>::max())
+ return false;
+ return source.CheckNextBytes({'Z', 'T', 'x', 't'});
+}
+
+} // namespace
+
+/******** ZtfTranslator ********/
+
+ZtfTranslator::ZtfTranslator() {}
+
+ZtfTranslator::~ZtfTranslator() = default;
+
+bool ZtfTranslator::Init(ConstBufferView image) {
+ line_starts_.clear();
+ // Record the starting offset of every line in |image_| into |line_start_|.
+ line_starts_.push_back(0);
+ for (size_t i = 0; i < image.size(); ++i) {
+ if (image.read<uint8_t>(i) == '\n') {
+ // Maximum number of entries is |ztf::kMaxDimValue|, including the end
+ // sentinel.
+ if (line_starts_.size() >= ztf::kMaxDimValue)
+ return false;
+ line_starts_.push_back(base::checked_cast<offset_t>(i + 1));
+ // Check that the line length is reachable from an absolute reference.
+ if (line_starts_.back() - *std::next(line_starts_.rbegin()) >=
+ ztf::kMaxDimValue) {
+ return false;
+ }
+ }
+ }
+ // Since the last character of ZTF file is always '\n', |line_starts_| will
+ // always contain the file length as the last element, which serves as a
+ // sentinel.
+ CHECK_EQ(image.size(), static_cast<size_t>(line_starts_.back()));
+ return true;
+}
+
+bool ZtfTranslator::IsValid(ztf::LineCol lc) const {
+ DCHECK(!line_starts_.empty());
+ return lc.line >= 1 && lc.col >= 1 &&
+ static_cast<offset_t>(lc.line) <= NumLines() &&
+ static_cast<offset_t>(lc.col) <= LineLength(lc.line);
+}
+
+bool ZtfTranslator::IsValid(offset_t offset, ztf::DeltaLineCol dlc) const {
+ DCHECK(!line_starts_.empty());
+ auto abs_lc = OffsetToLineCol(offset);
+ if (!abs_lc.has_value())
+ return false;
+
+ if (!base::CheckAdd(abs_lc->line, dlc.line).IsValid() ||
+ !base::CheckAdd(abs_lc->col, dlc.col).IsValid()) {
+ return false;
+ }
+ return IsValid(abs_lc.value() + dlc);
+}
+
+offset_t ZtfTranslator::LineColToOffset(ztf::LineCol lc) const {
+ // Guard against out of bounds access to |line_starts_| and ensure the
+ // |lc| falls within the file.
+ DCHECK(!line_starts_.empty());
+ if (!IsValid(lc))
+ return kInvalidOffset;
+
+ offset_t target = line_starts_[lc.line - 1] + lc.col - 1;
+ DCHECK_LT(target, line_starts_.back());
+ return target;
+}
+
+absl::optional<ztf::LineCol> ZtfTranslator::OffsetToLineCol(
+ offset_t offset) const {
+ DCHECK(!line_starts_.empty());
+ // Don't place a target outside the image.
+ if (offset >= line_starts_.back())
+ return absl::nullopt;
+ auto it = SearchForRange(offset);
+ ztf::LineCol lc;
+ lc.line = std::distance(line_starts_.cbegin(), it) + 1;
+ lc.col = offset - line_starts_[lc.line - 1] + 1;
+ DCHECK_LE(static_cast<offset_t>(lc.col), LineLength(lc.line));
+ return lc;
+}
+
+std::vector<offset_t>::const_iterator ZtfTranslator::SearchForRange(
+ offset_t offset) const {
+ DCHECK(!line_starts_.empty());
+ auto it =
+ std::upper_bound(line_starts_.cbegin(), line_starts_.cend(), offset);
+ DCHECK(it != line_starts_.cbegin());
+ return --it;
+}
+
+offset_t ZtfTranslator::LineLength(uint16_t line) const {
+ DCHECK_GE(line, 1);
+ DCHECK_LE(line, NumLines());
+ return line_starts_[line] - line_starts_[line - 1];
+}
+
+/******** DisassemblerZtf ********/
+
+// Use 2 even though reference "chaining" isn't present in ZTF as it is the
+// usual case for other Disassemblers and this is meant to mimic that as closely
+// as possible.
+DisassemblerZtf::DisassemblerZtf() : Disassembler(2) {}
+
+DisassemblerZtf::~DisassemblerZtf() = default;
+
+// static.
+bool DisassemblerZtf::QuickDetect(ConstBufferView image) {
+ return ReadZtfHeader(image);
+}
+
+ExecutableType DisassemblerZtf::GetExeType() const {
+ return kExeTypeZtf;
+}
+
+std::string DisassemblerZtf::GetExeTypeString() const {
+ return "Zucchini Text Format";
+}
+
+std::vector<ReferenceGroup> DisassemblerZtf::MakeReferenceGroups() const {
+ return {
+ {{5, TypeTag(kAnglesAbs1), PoolTag(kAngles)},
+ &DisassemblerZtf::MakeReadAbs<1, kAngles>,
+ &DisassemblerZtf::MakeWriteAbs<1, kAngles>},
+ {{7, TypeTag(kAnglesAbs2), PoolTag(kAngles)},
+ &DisassemblerZtf::MakeReadAbs<2, kAngles>,
+ &DisassemblerZtf::MakeWriteAbs<2, kAngles>},
+ {{9, TypeTag(kAnglesAbs3), PoolTag(kAngles)},
+ &DisassemblerZtf::MakeReadAbs<3, kAngles>,
+ &DisassemblerZtf::MakeWriteAbs<3, kAngles>},
+ {{7, TypeTag(kAnglesRel1), PoolTag(kAngles)},
+ &DisassemblerZtf::MakeReadRel<1, kAngles>,
+ &DisassemblerZtf::MakeWriteRel<1, kAngles>},
+ {{9, TypeTag(kAnglesRel2), PoolTag(kAngles)},
+ &DisassemblerZtf::MakeReadRel<2, kAngles>,
+ &DisassemblerZtf::MakeWriteRel<2, kAngles>},
+ {{11, TypeTag(kAnglesRel3), PoolTag(kAngles)},
+ &DisassemblerZtf::MakeReadRel<3, kAngles>,
+ &DisassemblerZtf::MakeWriteRel<3, kAngles>},
+ {{5, TypeTag(kBracesAbs1), PoolTag(kBraces)},
+ &DisassemblerZtf::MakeReadAbs<1, kBraces>,
+ &DisassemblerZtf::MakeWriteAbs<1, kBraces>},
+ {{7, TypeTag(kBracesAbs2), PoolTag(kBraces)},
+ &DisassemblerZtf::MakeReadAbs<2, kBraces>,
+ &DisassemblerZtf::MakeWriteAbs<2, kBraces>},
+ {{9, TypeTag(kBracesAbs3), PoolTag(kBraces)},
+ &DisassemblerZtf::MakeReadAbs<3, kBraces>,
+ &DisassemblerZtf::MakeWriteAbs<3, kBraces>},
+ {{7, TypeTag(kBracesRel1), PoolTag(kBraces)},
+ &DisassemblerZtf::MakeReadRel<1, kBraces>,
+ &DisassemblerZtf::MakeWriteRel<1, kBraces>},
+ {{9, TypeTag(kBracesRel2), PoolTag(kBraces)},
+ &DisassemblerZtf::MakeReadRel<2, kBraces>,
+ &DisassemblerZtf::MakeWriteRel<2, kBraces>},
+ {{11, TypeTag(kBracesRel3), PoolTag(kBraces)},
+ &DisassemblerZtf::MakeReadRel<3, kBraces>,
+ &DisassemblerZtf::MakeWriteRel<3, kBraces>},
+ {{5, TypeTag(kBracketsAbs1), PoolTag(kBrackets)},
+ &DisassemblerZtf::MakeReadAbs<1, kBrackets>,
+ &DisassemblerZtf::MakeWriteAbs<1, kBrackets>},
+ {{7, TypeTag(kBracketsAbs2), PoolTag(kBrackets)},
+ &DisassemblerZtf::MakeReadAbs<2, kBrackets>,
+ &DisassemblerZtf::MakeWriteAbs<2, kBrackets>},
+ {{9, TypeTag(kBracketsAbs3), PoolTag(kBrackets)},
+ &DisassemblerZtf::MakeReadAbs<3, kBrackets>,
+ &DisassemblerZtf::MakeWriteAbs<3, kBrackets>},
+ {{7, TypeTag(kBracketsRel1), PoolTag(kBrackets)},
+ &DisassemblerZtf::MakeReadRel<1, kBrackets>,
+ &DisassemblerZtf::MakeWriteRel<1, kBrackets>},
+ {{9, TypeTag(kBracketsRel2), PoolTag(kBrackets)},
+ &DisassemblerZtf::MakeReadRel<2, kBrackets>,
+ &DisassemblerZtf::MakeWriteRel<2, kBrackets>},
+ {{11, TypeTag(kBracketsRel3), PoolTag(kBrackets)},
+ &DisassemblerZtf::MakeReadRel<3, kBrackets>,
+ &DisassemblerZtf::MakeWriteRel<3, kBrackets>},
+ {{5, TypeTag(kParenthesesAbs1), PoolTag(kParentheses)},
+ &DisassemblerZtf::MakeReadAbs<1, kParentheses>,
+ &DisassemblerZtf::MakeWriteAbs<1, kParentheses>},
+ {{7, TypeTag(kParenthesesAbs2), PoolTag(kParentheses)},
+ &DisassemblerZtf::MakeReadAbs<2, kParentheses>,
+ &DisassemblerZtf::MakeWriteAbs<2, kParentheses>},
+ {{9, TypeTag(kParenthesesAbs3), PoolTag(kParentheses)},
+ &DisassemblerZtf::MakeReadAbs<3, kParentheses>,
+ &DisassemblerZtf::MakeWriteAbs<3, kParentheses>},
+ {{7, TypeTag(kParenthesesRel1), PoolTag(kParentheses)},
+ &DisassemblerZtf::MakeReadRel<1, kParentheses>,
+ &DisassemblerZtf::MakeWriteRel<1, kParentheses>},
+ {{9, TypeTag(kParenthesesRel2), PoolTag(kParentheses)},
+ &DisassemblerZtf::MakeReadRel<2, kParentheses>,
+ &DisassemblerZtf::MakeWriteRel<2, kParentheses>},
+ {{11, TypeTag(kParenthesesRel3), PoolTag(kParentheses)},
+ &DisassemblerZtf::MakeReadRel<3, kParentheses>,
+ &DisassemblerZtf::MakeWriteRel<3, kParentheses>},
+ };
+}
+
+template <uint8_t digits, DisassemblerZtf::ReferencePool pool>
+std::unique_ptr<ReferenceReader> DisassemblerZtf::MakeReadAbs(offset_t lo,
+ offset_t hi) {
+ static_assert(digits >= 1 && digits <= kMaxDigitCount,
+ "|digits| must be in range [1, 3]");
+ return std::make_unique<ZtfReferenceReader<ztf::LineCol>>(
+ lo, hi, image_, translator_, MakeZtfConfig<pool>(digits));
+}
+
+template <uint8_t digits, DisassemblerZtf::ReferencePool pool>
+std::unique_ptr<ReferenceReader> DisassemblerZtf::MakeReadRel(offset_t lo,
+ offset_t hi) {
+ static_assert(digits >= 1 && digits <= kMaxDigitCount,
+ "|digits| must be in range [1, 3]");
+ return std::make_unique<ZtfReferenceReader<ztf::DeltaLineCol>>(
+ lo, hi, image_, translator_, MakeZtfConfig<pool>(digits));
+}
+
+template <uint8_t digits, DisassemblerZtf::ReferencePool pool>
+std::unique_ptr<ReferenceWriter> DisassemblerZtf::MakeWriteAbs(
+ MutableBufferView image) {
+ static_assert(digits >= 1 && digits <= kMaxDigitCount,
+ "|digits| must be in range [1, 3]");
+ return std::make_unique<ZtfReferenceWriter<ztf::LineCol>>(
+ image, translator_, MakeZtfConfig<pool>(digits));
+}
+
+template <uint8_t digits, DisassemblerZtf::ReferencePool pool>
+std::unique_ptr<ReferenceWriter> DisassemblerZtf::MakeWriteRel(
+ MutableBufferView image) {
+ static_assert(digits >= 1 && digits <= kMaxDigitCount,
+ "|digits| must be in range [1, 3]");
+ return std::make_unique<ZtfReferenceWriter<ztf::DeltaLineCol>>(
+ image, translator_, MakeZtfConfig<pool>(digits));
+}
+
+bool DisassemblerZtf::Parse(ConstBufferView image) {
+ image_ = image;
+ if (!ReadZtfHeader(image_))
+ return false;
+
+ CHECK_GE(image_.size(),
+ static_cast<size_t>(kTotalMagicSize)); // Needs header and footer.
+
+ // Find the terminating footer "txTZ\n" that indicates the end of the image.
+ offset_t offset = 0;
+ for (; offset <= image_.size() - kFooterMagicSize; offset++) {
+ if (image_.read<uint8_t>(offset) == 't' &&
+ image_.read<uint8_t>(offset + 1) == 'x' &&
+ image_.read<uint8_t>(offset + 2) == 'T' &&
+ image_.read<uint8_t>(offset + 3) == 'Z' &&
+ image_.read<uint8_t>(offset + 4) == '\n') {
+ break;
+ }
+ }
+
+ // If no footer is found before the end of the image then the parsing failed.
+ if (offset > image_.size() - kFooterMagicSize)
+ return false;
+ image_.shrink(offset + kFooterMagicSize);
+
+ return translator_.Init(image_);
+}
+
+} // namespace zucchini
diff --git a/disassembler_ztf.h b/disassembler_ztf.h
new file mode 100644
index 0000000..0e73c2a
--- /dev/null
+++ b/disassembler_ztf.h
@@ -0,0 +1,201 @@
+// Copyright 2018 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_DISASSEMBLER_ZTF_H_
+#define COMPONENTS_ZUCCHINI_DISASSEMBLER_ZTF_H_
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "components/zucchini/disassembler.h"
+#include "components/zucchini/image_utils.h"
+#include "components/zucchini/type_ztf.h"
+#include "third_party/abseil-cpp/absl/types/optional.h"
+
+namespace zucchini {
+
+// Disassembler for text based files. This file format is supported for
+// debugging Zucchini and is not intended for production usage.
+//
+// A valid Zucchini Text Format (ZTF) file is specified as follows:
+//
+// Header:
+// The first four bytes must be - 'Z' 'T' 'x' 't'
+// Footer:
+// The last five bytes must be - 't' 'x' 'T' 'Z' '\n'
+// (note that terminating new line is required).
+// Content:
+// The content can be any sequence of printable ASCII characters and new line
+// (but not carriage return). This excludes the sequence that comprises the
+// Footer.
+// References:
+// A reference is either Absolute or Relative. All references must begin and
+// end with a pair of enclosing characters <open>, <close>. The options are:
+// - Angles: '<' and '>'
+// - Braces: '{' and '}'
+// - Brackets: '[' and ']'
+// - Parentheses: '(' and ')'
+//
+// A reference contains three items:
+// - A line number <line>
+// - A delimiter ',' <delimiter>
+// - A column number <col>
+// <line> and <col> may contain 1-3 digits and both must contain the same
+// number of digits. If a number is too short then it can be left-padded
+// with '0'.
+//
+// For Absolute references, <line> and <col> are 1-based (i.e. positive)
+// index of line and column numbers of a character in the ZTF. This follows
+// standard convention for text editors. Note that "\n" is considered to be
+// part of a preceding line.
+//
+// <open><line><delimiter><col><close>
+//
+// For Relative references, <line> and <col> are integer offsets deltas of the
+// target's (absolute) line and column relative to the line and column of the
+// reference's first byte (i.e. <open>). Relative references have <sign> ('+'
+// or '-') before <line> and <col>. For the special case of "0", "00", etc.,
+// <sign> must be "+".
+//
+// <open><sign><line><delimiter><sign><col><close>
+//
+// If a reference points outside the target either in writing or reading it is
+// considered invalid and ignored. Similarly if it overflows a line. i.e. if a
+// line is 10 characters long and a references targets character 11 of that
+// line it is rejected. Lines are delimited with '\n' which is counted toward
+// the line length.
+//
+// If a reference is to be written that would overwrite a '\n' character it is
+// ignored as this would break all other line values.
+
+enum : size_t { kMaxDigitCount = 3 };
+
+// Helper class for translating among offset_t, ztf::LineCol and
+// ztf::DeltaLineCol.
+class ZtfTranslator {
+ public:
+ ZtfTranslator();
+ ZtfTranslator(const ZtfTranslator&) = delete;
+ const ZtfTranslator& operator=(const ZtfTranslator&) = delete;
+ ~ZtfTranslator();
+
+ // Initializes |line_starts_| with the contents of |image|.
+ bool Init(ConstBufferView image);
+
+ // Checks if |lc| is a valid location in the file.
+ bool IsValid(ztf::LineCol lc) const;
+
+ // Checks if |dlc| relative to |offset| is a valid location in the file.
+ bool IsValid(offset_t offset, ztf::DeltaLineCol dlc) const;
+
+ // Returns the offset corresponding to |line_col| if it is valid. Otherwise
+ // returns |kInvalidOffset|.
+ offset_t LineColToOffset(ztf::LineCol line_col) const;
+
+ // Returns the ztf::LineCol for an |offset| if it is valid. Otherwise returns
+ // absl::nullopt.
+ absl::optional<ztf::LineCol> OffsetToLineCol(offset_t offset) const;
+
+ private:
+ // Returns an iterator to the range containing |offset|. Which is represented
+ // by the starting offset. The next element will contain the upper bound of
+ // the range.
+ std::vector<offset_t>::const_iterator SearchForRange(offset_t offset) const;
+
+ // Returns the length of a 1-indexed line. The caller is expected to check
+ // that the requested line exists.
+ offset_t LineLength(uint16_t line) const;
+
+ offset_t NumLines() const {
+ return static_cast<offset_t>(line_starts_.size() - 1);
+ }
+
+ // |line_starts_| is a sorted list of each line's starting offset, along with
+ // the image size as the sentinel; it looks like {0, ..., image.size}.
+ std::vector<offset_t> line_starts_;
+};
+
+// Disassembler for Zucchini Text Format (ZTF).
+class DisassemblerZtf : public Disassembler {
+ public:
+ // Target Pools
+ enum ReferencePool : uint8_t {
+ kAngles, // <>
+ kBraces, // {}
+ kBrackets, // []
+ kParentheses // ()
+ };
+
+ // Type breakdown. Should contain all permutations of ReferencePool, Abs|Rel
+ // and the possible number of digits (1-3).
+ enum ReferenceType : uint8_t {
+ kAnglesAbs1,
+ kAnglesAbs2,
+ kAnglesAbs3,
+ kAnglesRel1,
+ kAnglesRel2,
+ kAnglesRel3,
+ kBracesAbs1,
+ kBracesAbs2,
+ kBracesAbs3,
+ kBracesRel1,
+ kBracesRel2,
+ kBracesRel3,
+ kBracketsAbs1,
+ kBracketsAbs2,
+ kBracketsAbs3,
+ kBracketsRel1,
+ kBracketsRel2,
+ kBracketsRel3,
+ kParenthesesAbs1,
+ kParenthesesAbs2,
+ kParenthesesAbs3,
+ kParenthesesRel1,
+ kParenthesesRel2,
+ kParenthesesRel3,
+ kNumTypes
+ };
+
+ DisassemblerZtf();
+ DisassemblerZtf(const DisassemblerZtf&) = delete;
+ const DisassemblerZtf& operator=(const DisassemblerZtf&) = delete;
+ ~DisassemblerZtf() override;
+
+ // Applies quick checks to determine if |image| *may* point to the start of a
+ // ZTF file. Returns true on success.
+ static bool QuickDetect(ConstBufferView image);
+
+ // Disassembler:
+ ExecutableType GetExeType() const override;
+ std::string GetExeTypeString() const override;
+ std::vector<ReferenceGroup> MakeReferenceGroups() const override;
+
+ // Reference Readers, templated to allow configurable digit count and pool.
+ template <uint8_t digits, ReferencePool pool>
+ std::unique_ptr<ReferenceReader> MakeReadAbs(offset_t lo, offset_t hi);
+ template <uint8_t digits, ReferencePool pool>
+ std::unique_ptr<ReferenceReader> MakeReadRel(offset_t lo, offset_t hi);
+
+ // Reference Writers, templated to allow configurable digit count and pool.
+ template <uint8_t digits, ReferencePool pool>
+ std::unique_ptr<ReferenceWriter> MakeWriteAbs(MutableBufferView image);
+ template <uint8_t digits, ReferencePool pool>
+ std::unique_ptr<ReferenceWriter> MakeWriteRel(MutableBufferView image);
+
+ private:
+ friend Disassembler;
+
+ // Disassembler:
+ bool Parse(ConstBufferView image) override;
+
+ ZtfTranslator translator_;
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_DISASSEMBLER_ZTF_H_
diff --git a/disassembler_ztf_unittest.cc b/disassembler_ztf_unittest.cc
new file mode 100644
index 0000000..9b53e62
--- /dev/null
+++ b/disassembler_ztf_unittest.cc
@@ -0,0 +1,402 @@
+// Copyright 2018 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/disassembler_ztf.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <map>
+#include <set>
+#include <utility>
+#include <vector>
+
+#include "base/cxx17_backports.h"
+#include "base/strings/string_piece.h"
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/element_detection.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+namespace {
+
+constexpr char kNormalText[] = R"(ZTxt
+Hello World!
+This is an example of an absolute reference <<1,1>>
+And {-01,+05} is an example of a relative ref
+txTZ
+TRAILING DATA)";
+// -1 to exclude null byte.
+constexpr size_t kNormalTextExtraBytes = base::size("TRAILING DATA") - 1;
+
+constexpr char kOutOfBoundsText[] = R"(ZTxt<1,1>
+Hello World!
+This is an example of an OOB absolute reference <890,605>
+And {-050,+100} is an example of an OOB relative ref.
+but [+00,+10] is valid at least. As is (1,5).
+<1, 6> and { ,1} aren't nor is {4,5]
+{7,6}<1,1><2,3>{+00,+00}{004,100}[+00,+60][+000,-100]<-000,-035>(-00,-00)txTZ
+)";
+
+// Converts a raw string into data.
+std::vector<uint8_t> StrToData(base::StringPiece s) {
+ return std::vector<uint8_t>(s.begin(), s.end());
+}
+
+// Compare if |a.location < b.location| as references have unique locations.
+struct ReferenceCompare {
+ bool operator()(const Reference& a, const Reference& b) const {
+ return a.location < b.location;
+ }
+};
+
+using ReferenceKey =
+ std::pair<DisassemblerZtf::ReferencePool, DisassemblerZtf::ReferenceType>;
+using ReferenceSets =
+ std::map<ReferenceKey, std::set<Reference, ReferenceCompare>>;
+
+// Write references in |refs_to_write| to |image|. Also validate the
+// disassembler parses |image| such that it is of |expected_size|.
+void WriteReferences(MutableBufferView image,
+ size_t expected_size,
+ const ReferenceSets& refs_to_write) {
+ EXPECT_TRUE(DisassemblerZtf::QuickDetect(image));
+ std::unique_ptr<DisassemblerZtf> dis =
+ Disassembler::Make<DisassemblerZtf>(image);
+ EXPECT_TRUE(dis);
+ EXPECT_EQ(expected_size, dis->size());
+ image.shrink(dis->size());
+ auto reference_groups = dis->MakeReferenceGroups();
+ for (const auto& group : reference_groups) {
+ auto writer = group.GetWriter(image, dis.get());
+ ReferenceKey key = {
+ static_cast<DisassemblerZtf::ReferencePool>(group.pool_tag().value()),
+ static_cast<DisassemblerZtf::ReferenceType>(group.type_tag().value())};
+ if (!refs_to_write.count(key))
+ continue;
+ for (const auto& ref : refs_to_write.at(key))
+ writer->PutNext(ref);
+ }
+}
+
+// Read references in |refs_to_read| from |image|. Once found
+// the elements are removed from |refs_to_read|. Also validate the
+// disassembler parses |image| such that it is of |expected_size|.
+void ReadReferences(ConstBufferView image,
+ size_t expected_size,
+ ReferenceSets* refs_to_read) {
+ EXPECT_TRUE(DisassemblerZtf::QuickDetect(image));
+ std::unique_ptr<DisassemblerZtf> dis =
+ Disassembler::Make<DisassemblerZtf>(image);
+ EXPECT_TRUE(dis);
+ EXPECT_EQ(expected_size, dis->size());
+ auto reference_groups = dis->MakeReferenceGroups();
+ for (const auto& group : reference_groups) {
+ auto reader = group.GetReader(dis.get());
+ ReferenceKey key = {
+ static_cast<DisassemblerZtf::ReferencePool>(group.pool_tag().value()),
+ static_cast<DisassemblerZtf::ReferenceType>(group.type_tag().value())};
+ if (!refs_to_read->count(key)) {
+ // No elements of this pool/type pair are expected so assert that none are
+ // found.
+ auto ref = reader->GetNext();
+ EXPECT_FALSE(ref.has_value());
+ continue;
+ }
+ // For each reference remove it from the set if it exists, error if
+ // unexpected references are found.
+ for (auto ref = reader->GetNext(); ref.has_value();
+ ref = reader->GetNext()) {
+ EXPECT_EQ(1UL, refs_to_read->at(key).erase(ref.value()));
+ }
+ EXPECT_EQ(0U, refs_to_read->at(key).size());
+ }
+}
+
+void TestTranslation(const ZtfTranslator& translator,
+ offset_t expected_location,
+ ztf::LineCol lc) {
+ // Check the lc is translated to the expected location.
+ EXPECT_EQ(expected_location, translator.LineColToOffset(lc));
+ auto new_lc = translator.OffsetToLineCol(expected_location);
+ if (expected_location == kInvalidOffset) {
+ EXPECT_FALSE(translator.IsValid(lc));
+ EXPECT_FALSE(new_lc.has_value());
+ } else {
+ EXPECT_TRUE(translator.IsValid(lc));
+ // Check that the reverse is true. |ztf::LineCol{0, 0}| is a sentinel and
+ // should never be valid.
+ EXPECT_EQ(lc.line, new_lc->line);
+ EXPECT_EQ(lc.col, new_lc->col);
+ }
+}
+
+template <typename T>
+size_t CountDistinct(const std::vector<T>& v) {
+ return std::set<T>(v.begin(), v.end()).size();
+}
+
+} // namespace
+
+TEST(ZtfTranslatorTest, Translate) {
+ ztf::dim_t kMaxVal = INT16_MAX;
+ ztf::dim_t kMinVal = INT16_MIN;
+
+ const std::vector<uint8_t> text(StrToData(kOutOfBoundsText));
+ ConstBufferView image(text.data(), text.size());
+ ZtfTranslator translator;
+ EXPECT_TRUE(translator.Init(image));
+
+ // Absolute Translations:
+
+ // Check a bunch of invalid locations.
+ TestTranslation(translator, kInvalidOffset, ztf::LineCol{50, 60});
+ TestTranslation(translator, kInvalidOffset, ztf::LineCol{0, 0});
+ TestTranslation(translator, kInvalidOffset, ztf::LineCol{1, 0});
+ TestTranslation(translator, kInvalidOffset, ztf::LineCol{0, 1});
+ TestTranslation(translator, kInvalidOffset, ztf::LineCol{0, 1});
+ TestTranslation(translator, kInvalidOffset, ztf::LineCol{1, -1});
+ TestTranslation(translator, kInvalidOffset, ztf::LineCol{-1, 1});
+ TestTranslation(translator, kInvalidOffset, ztf::LineCol{-1, -1});
+ TestTranslation(translator, kInvalidOffset, ztf::LineCol{1, kMaxVal});
+ TestTranslation(translator, kInvalidOffset, ztf::LineCol{kMaxVal, 1});
+ TestTranslation(translator, kInvalidOffset, ztf::LineCol{1, kMinVal});
+ TestTranslation(translator, kInvalidOffset, ztf::LineCol{kMinVal, 1});
+
+ // Check the start of the file.
+ TestTranslation(translator, 0, ztf::LineCol{1, 1});
+ TestTranslation(translator, 1, ztf::LineCol{1, 2});
+
+ // Check the boundary around a newline.
+ TestTranslation(translator, 9, ztf::LineCol{1, 10});
+ TestTranslation(translator, kInvalidOffset, ztf::LineCol{1, 11});
+ TestTranslation(translator, 10, ztf::LineCol{2, 1});
+ TestTranslation(translator, kInvalidOffset, ztf::LineCol{2, 0});
+
+ // Check the end of the file.
+ TestTranslation(translator, kInvalidOffset, ztf::LineCol{8, 1});
+ TestTranslation(translator, kInvalidOffset, ztf::LineCol{7, 79});
+ // Need to subtract to account for the newline.
+ TestTranslation(translator, text.size() - 1, ztf::LineCol{7, 78});
+ TestTranslation(translator, text.size() - 2, ztf::LineCol{7, 77});
+
+ // Delta Validity
+ // - Reminder! 0 -> 1:1
+
+ // Common possible edge cases.
+ EXPECT_TRUE(translator.IsValid(0, ztf::DeltaLineCol{0, 0}));
+ EXPECT_TRUE(translator.IsValid(0, ztf::DeltaLineCol{0, 1}));
+ EXPECT_TRUE(translator.IsValid(0, ztf::DeltaLineCol{1, 0}));
+ EXPECT_FALSE(translator.IsValid(0, ztf::DeltaLineCol{-1, -1}));
+ EXPECT_FALSE(translator.IsValid(0, ztf::DeltaLineCol{-1, 0}));
+ EXPECT_FALSE(translator.IsValid(0, ztf::DeltaLineCol{0, -1}));
+ EXPECT_FALSE(translator.IsValid(0, ztf::DeltaLineCol{0, -1}));
+ EXPECT_FALSE(translator.IsValid(0, ztf::DeltaLineCol{0, kMaxVal}));
+ EXPECT_FALSE(translator.IsValid(0, ztf::DeltaLineCol{kMaxVal, 0}));
+ EXPECT_FALSE(translator.IsValid(0, ztf::DeltaLineCol{0, kMinVal}));
+ EXPECT_FALSE(translator.IsValid(0, ztf::DeltaLineCol{kMinVal, 0}));
+ EXPECT_FALSE(translator.IsValid(233, ztf::DeltaLineCol{0, kMaxVal}));
+ EXPECT_FALSE(translator.IsValid(233, ztf::DeltaLineCol{kMaxVal, 0}));
+ EXPECT_FALSE(translator.IsValid(233, ztf::DeltaLineCol{kMaxVal, kMaxVal}));
+
+ // Newline area.
+ EXPECT_TRUE(translator.IsValid(0, ztf::DeltaLineCol{0, 9}));
+ EXPECT_FALSE(translator.IsValid(0, ztf::DeltaLineCol{0, 10}));
+ EXPECT_FALSE(translator.IsValid(9, ztf::DeltaLineCol{0, 1}));
+ EXPECT_FALSE(translator.IsValid(9, ztf::DeltaLineCol{-1, 0}));
+ EXPECT_FALSE(translator.IsValid(9, ztf::DeltaLineCol{1, -10}));
+ EXPECT_TRUE(translator.IsValid(9, ztf::DeltaLineCol{1, -9}));
+
+ // End of file.
+ EXPECT_FALSE(translator.IsValid(0, ztf::DeltaLineCol{7, 78}));
+ EXPECT_FALSE(translator.IsValid(0, ztf::DeltaLineCol{7, 77}));
+ EXPECT_FALSE(translator.IsValid(0, ztf::DeltaLineCol{6, 78}));
+ EXPECT_TRUE(translator.IsValid(0, ztf::DeltaLineCol{6, 77}));
+ EXPECT_FALSE(translator.IsValid(text.size() - 1, ztf::DeltaLineCol{0, 1}));
+ EXPECT_FALSE(translator.IsValid(text.size() - 1, ztf::DeltaLineCol{1, 0}));
+ EXPECT_TRUE(translator.IsValid(text.size() - 2, ztf::DeltaLineCol{0, 1}));
+ EXPECT_FALSE(translator.IsValid(text.size() - 2, ztf::DeltaLineCol{1, 0}));
+}
+
+// Ensures that ReferenceGroups from DisassemblerZtf::MakeReferenceGroups()
+// cover each non-sentinel element in ReferenceType in order, exactly once. Also
+// ensures that the ReferenceType elements are grouped by ReferencePool, and
+// listed in increasing order.
+TEST(DisassemblerZtfTest, ReferenceGroups) {
+ std::vector<uint32_t> pool_list;
+ std::vector<uint32_t> type_list;
+ DisassemblerZtf dis;
+ for (ReferenceGroup group : dis.MakeReferenceGroups()) {
+ pool_list.push_back(static_cast<uint32_t>(group.pool_tag().value()));
+ type_list.push_back(static_cast<uint32_t>(group.type_tag().value()));
+ }
+
+ // Check ReferenceByte coverage.
+ constexpr size_t kNumTypes = DisassemblerZtf::kNumTypes;
+ EXPECT_EQ(kNumTypes, type_list.size());
+ EXPECT_EQ(kNumTypes, CountDistinct(type_list));
+ EXPECT_TRUE(std::is_sorted(type_list.begin(), type_list.end()));
+
+ // Check that ReferenceType elements are grouped by ReferencePool. Note that
+ // repeats can occur, and pools can be skipped.
+ EXPECT_TRUE(std::is_sorted(pool_list.begin(), pool_list.end()));
+}
+
+TEST(DisassemblerZtfTest, BadMagic) {
+ // Test a case where there is no header so a disassembler cannot be created.
+ {
+ const std::vector<uint8_t> text(StrToData("foobarbaz bazbarfoo"));
+ ConstBufferView image(text.data(), text.size());
+ EXPECT_FALSE(DisassemblerZtf::QuickDetect(image));
+ EXPECT_FALSE(Disassembler::Make<DisassemblerZtf>(image));
+ }
+ // Test a case where there is no footer so a disassembler cannot be created.
+ {
+ const std::vector<uint8_t> text(StrToData("ZTxtfoobarbaz bazbarfootxTZ"));
+ ConstBufferView image(text.data(), text.size());
+ EXPECT_TRUE(DisassemblerZtf::QuickDetect(image));
+ EXPECT_FALSE(Disassembler::Make<DisassemblerZtf>(image));
+ }
+ // Test when the header is too short
+ {
+ const std::vector<uint8_t> text(StrToData("ZTxtxTZ\n"));
+ ConstBufferView image(text.data(), text.size());
+ EXPECT_FALSE(DisassemblerZtf::QuickDetect(image));
+ EXPECT_FALSE(Disassembler::Make<DisassemblerZtf>(image));
+ }
+}
+
+TEST(DisassemblerZtfTest, ZtfSizeBound) {
+ {
+ std::vector<uint8_t> text(StrToData("ZTxt"));
+ std::fill_n(std::back_inserter(text), ztf::kMaxDimValue - 2, '\n');
+ text.insert(text.end(), {'t', 'x', 'T', 'Z', '\n'});
+ ConstBufferView image(text.data(), text.size());
+ EXPECT_TRUE(DisassemblerZtf::QuickDetect(image));
+ EXPECT_TRUE(Disassembler::Make<DisassemblerZtf>(image));
+ }
+ {
+ std::vector<uint8_t> text(StrToData("ZTxt"));
+ std::fill_n(std::back_inserter(text), ztf::kMaxDimValue - 1, '\n');
+ text.insert(text.end(), {'t', 'x', 'T', 'Z', '\n'});
+ ConstBufferView image(text.data(), text.size());
+ EXPECT_TRUE(DisassemblerZtf::QuickDetect(image));
+ EXPECT_FALSE(Disassembler::Make<DisassemblerZtf>(image));
+ }
+}
+
+// Try reading from a well formed source.
+TEST(DisassemblerZtfTest, NormalRead) {
+ const std::vector<uint8_t> text(StrToData(kNormalText));
+ ConstBufferView image(text.data(), text.size());
+ ReferenceSets expected_map = {
+ {{DisassemblerZtf::kAngles, DisassemblerZtf::kAnglesAbs1},
+ {Reference({63, 0})}},
+ {{DisassemblerZtf::kBraces, DisassemblerZtf::kBracesRel2},
+ {Reference({74, 27})}},
+ };
+ ReadReferences(image, text.size() - kNormalTextExtraBytes, &expected_map);
+}
+
+// Try writing to a well formed source and ensure that what is read back
+// reflects what was written.
+TEST(DisassemblerZtfTest, NormalWrite) {
+ std::vector<uint8_t> mutable_text(StrToData(kNormalText));
+ MutableBufferView image(mutable_text.data(), mutable_text.size());
+ ReferenceSets change_map = {
+ {{DisassemblerZtf::kParentheses, DisassemblerZtf::kParenthesesAbs1},
+ {Reference({63, 71})}},
+ {{DisassemblerZtf::kBrackets, DisassemblerZtf::kBracketsRel3},
+ {Reference({74, 4})}},
+ };
+ WriteReferences(image, mutable_text.size() - kNormalTextExtraBytes,
+ change_map);
+
+ // As a sanity check see if a disassembler can identify the same references.
+ ConstBufferView const_image(image);
+ ReadReferences(const_image, mutable_text.size() - kNormalTextExtraBytes,
+ &change_map);
+}
+
+// Try reading from a source rife with errors.
+TEST(DisassemblerZtfTest, ReadOutOfBoundsRefs) {
+ const std::vector<uint8_t> text(StrToData(kOutOfBoundsText));
+ ConstBufferView image(text.data(), text.size());
+ ReferenceSets expected_map = {
+ {{DisassemblerZtf::kAngles, DisassemblerZtf::kAnglesAbs1},
+ {Reference({4, 0}), Reference({223, 0}), Reference({228, 12})}},
+ {{DisassemblerZtf::kBrackets, DisassemblerZtf::kBracketsRel2},
+ {Reference({139, 149})}},
+ {{DisassemblerZtf::kBraces, DisassemblerZtf::kBracesAbs1},
+ {Reference({218, 223})}},
+ {{DisassemblerZtf::kBraces, DisassemblerZtf::kBracesRel2},
+ {Reference({233, 233})}},
+ {{DisassemblerZtf::kParentheses, DisassemblerZtf::kParenthesesAbs1},
+ {Reference({174, 4})}},
+ };
+ ReadReferences(image, text.size(), &expected_map);
+}
+
+// Try writing to a source rife with errors (malformed references or ones that
+// reference non-existent locations. Some of the values written are also bad. To
+// validate check if the expected set of references are read back.
+TEST(DisassemblerZtfTest, WriteOutOfBoundsRefs) {
+ // Replace |old_val| (provided for checking) with |new_val| in |set|.
+ auto update_set = [](Reference old_ref, Reference new_ref,
+ std::set<Reference, ReferenceCompare>* set) {
+ auto it = set->find(old_ref);
+ EXPECT_NE(it, set->cend());
+ EXPECT_EQ(*it, old_ref);
+ set->erase(it);
+ set->insert(new_ref);
+ };
+
+ // Replace |old_val| (provided for checking) with |new_val| in the set which
+ // is the value corresponding to |key| in |map|.
+ auto update_map =
+ [update_set](
+ ReferenceKey key, Reference old_ref, Reference new_ref,
+ std::map<ReferenceKey, std::set<Reference, ReferenceCompare>>* map) {
+ auto it = map->find(key);
+ EXPECT_NE(it, map->cend());
+ update_set(old_ref, new_ref, &(it->second));
+ };
+
+ std::vector<uint8_t> mutable_text(StrToData(kOutOfBoundsText));
+ MutableBufferView image(mutable_text.data(), mutable_text.size());
+ ReferenceSets change_map = {
+ {{DisassemblerZtf::kAngles, DisassemblerZtf::kAnglesAbs1},
+ {Reference({223, 15}), Reference({228, 13})}},
+ {{DisassemblerZtf::kAngles, DisassemblerZtf::kAnglesAbs3},
+ {Reference({4, 50})}}, // This should fail to write.
+ {{DisassemblerZtf::kBrackets, DisassemblerZtf::kBracketsRel2},
+ {Reference({139, static_cast<offset_t>(
+ mutable_text.size())})}}, // This should fail.
+ {{DisassemblerZtf::kParentheses, DisassemblerZtf::kParenthesesAbs1},
+ {Reference({174, 21})}}, // This should fail.
+ {{DisassemblerZtf::kBraces, DisassemblerZtf::kBracesAbs1},
+ {Reference({218, 219})}},
+ {{DisassemblerZtf::kBraces, DisassemblerZtf::kBracesRel2},
+ {Reference({233, 174})}},
+ };
+ WriteReferences(image, mutable_text.size(), change_map);
+
+ // As a sanity check see if a disassembler can identify the same references
+ // (excluding the invalid ones).
+ change_map.erase(change_map.find(
+ {DisassemblerZtf::kAngles, DisassemblerZtf::kAnglesAbs3}));
+ change_map.at({DisassemblerZtf::kAngles, DisassemblerZtf::kAnglesAbs1})
+ .emplace(Reference{4, 0});
+ update_map({DisassemblerZtf::kBrackets, DisassemblerZtf::kBracketsRel2},
+ Reference({139, static_cast<offset_t>(mutable_text.size())}),
+ Reference({139, 149}), &change_map);
+ update_map({DisassemblerZtf::kParentheses, DisassemblerZtf::kParenthesesAbs1},
+ Reference({174, 21}), Reference({174, 4}), &change_map);
+ ConstBufferView const_image(image);
+ ReadReferences(const_image, mutable_text.size(), &change_map);
+}
+
+} // namespace zucchini
diff --git a/element_detection.cc b/element_detection.cc
new file mode 100644
index 0000000..356c0d7
--- /dev/null
+++ b/element_detection.cc
@@ -0,0 +1,165 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/element_detection.h"
+
+#include <utility>
+
+#include "components/zucchini/buildflags.h"
+#include "components/zucchini/disassembler.h"
+#include "components/zucchini/disassembler_no_op.h"
+
+#if BUILDFLAG(ENABLE_DEX)
+#include "components/zucchini/disassembler_dex.h"
+#endif // BUILDFLAG(ENABLE_DEX)
+
+#if BUILDFLAG(ENABLE_ELF)
+#include "components/zucchini/disassembler_elf.h"
+#endif // BUILDFLAG(ENABLE_ELF)
+
+#if BUILDFLAG(ENABLE_WIN)
+#include "components/zucchini/disassembler_win32.h"
+#endif // BUILDFLAG(ENABLE_WIN)
+
+#if BUILDFLAG(ENABLE_ZTF)
+#include "components/zucchini/disassembler_ztf.h"
+#endif // BUILDFLAG(ENABLE_ZTF)
+
+namespace zucchini {
+
+namespace {
+
+// Impose a minimal program size to eliminate pathological cases.
+enum : size_t { kMinProgramSize = 16 };
+
+} // namespace
+
+/******** Utility Functions ********/
+
+std::unique_ptr<Disassembler> MakeDisassemblerWithoutFallback(
+ ConstBufferView image) {
+#if BUILDFLAG(ENABLE_WIN)
+ if (DisassemblerWin32X86::QuickDetect(image)) {
+ auto disasm = Disassembler::Make<DisassemblerWin32X86>(image);
+ if (disasm && disasm->size() >= kMinProgramSize)
+ return disasm;
+ }
+
+ if (DisassemblerWin32X64::QuickDetect(image)) {
+ auto disasm = Disassembler::Make<DisassemblerWin32X64>(image);
+ if (disasm && disasm->size() >= kMinProgramSize)
+ return disasm;
+ }
+#endif // BUILDFLAG(ENABLE_WIN)
+
+#if BUILDFLAG(ENABLE_ELF)
+ if (DisassemblerElfX86::QuickDetect(image)) {
+ auto disasm = Disassembler::Make<DisassemblerElfX86>(image);
+ if (disasm && disasm->size() >= kMinProgramSize)
+ return disasm;
+ }
+
+ if (DisassemblerElfX64::QuickDetect(image)) {
+ auto disasm = Disassembler::Make<DisassemblerElfX64>(image);
+ if (disasm && disasm->size() >= kMinProgramSize)
+ return disasm;
+ }
+
+ if (DisassemblerElfAArch32::QuickDetect(image)) {
+ auto disasm = Disassembler::Make<DisassemblerElfAArch32>(image);
+ if (disasm && disasm->size() >= kMinProgramSize)
+ return disasm;
+ }
+
+ if (DisassemblerElfAArch64::QuickDetect(image)) {
+ auto disasm = Disassembler::Make<DisassemblerElfAArch64>(image);
+ if (disasm && disasm->size() >= kMinProgramSize)
+ return disasm;
+ }
+#endif // BUILDFLAG(ENABLE_ELF)
+
+#if BUILDFLAG(ENABLE_DEX)
+ if (DisassemblerDex::QuickDetect(image)) {
+ auto disasm = Disassembler::Make<DisassemblerDex>(image);
+ if (disasm && disasm->size() >= kMinProgramSize)
+ return disasm;
+ }
+#endif // BUILDFLAG(ENABLE_DEX)
+
+#if BUILDFLAG(ENABLE_ZTF)
+ if (DisassemblerZtf::QuickDetect(image)) {
+ // This disallows very short examples like "ZTxtxtZ\n" in ensemble patching.
+ auto disasm = Disassembler::Make<DisassemblerZtf>(image);
+ if (disasm && disasm->size() >= kMinProgramSize)
+ return disasm;
+ }
+#endif // BUILDFLAG(ENABLE_ZTF)
+
+ return nullptr;
+}
+
+std::unique_ptr<Disassembler> MakeDisassemblerOfType(ConstBufferView image,
+ ExecutableType exe_type) {
+ switch (exe_type) {
+#if BUILDFLAG(ENABLE_WIN)
+ case kExeTypeWin32X86:
+ return Disassembler::Make<DisassemblerWin32X86>(image);
+ case kExeTypeWin32X64:
+ return Disassembler::Make<DisassemblerWin32X64>(image);
+#endif // BUILDFLAG(ENABLE_WIN)
+#if BUILDFLAG(ENABLE_ELF)
+ case kExeTypeElfX86:
+ return Disassembler::Make<DisassemblerElfX86>(image);
+ case kExeTypeElfX64:
+ return Disassembler::Make<DisassemblerElfX64>(image);
+ case kExeTypeElfAArch32:
+ return Disassembler::Make<DisassemblerElfAArch32>(image);
+ case kExeTypeElfAArch64:
+ return Disassembler::Make<DisassemblerElfAArch64>(image);
+#endif // BUILDFLAG(ENABLE_ELF)
+#if BUILDFLAG(ENABLE_DEX)
+ case kExeTypeDex:
+ return Disassembler::Make<DisassemblerDex>(image);
+#endif // BUILDFLAG(ENABLE_DEX)
+#if BUILDFLAG(ENABLE_ZTF)
+ case kExeTypeZtf:
+ return Disassembler::Make<DisassemblerZtf>(image);
+#endif // BUILDFLAG(ENABLE_ZTF)
+ case kExeTypeNoOp:
+ return Disassembler::Make<DisassemblerNoOp>(image);
+ default:
+ // If an architecture is disabled then null is handled gracefully.
+ return nullptr;
+ }
+}
+
+absl::optional<Element> DetectElementFromDisassembler(ConstBufferView image) {
+ std::unique_ptr<Disassembler> disasm = MakeDisassemblerWithoutFallback(image);
+ if (disasm)
+ return Element({0, disasm->size()}, disasm->GetExeType());
+ return absl::nullopt;
+}
+
+/******** ProgramScanner ********/
+
+ElementFinder::ElementFinder(ConstBufferView image, ElementDetector&& detector)
+ : image_(image), detector_(std::move(detector)) {}
+
+ElementFinder::~ElementFinder() = default;
+
+absl::optional<Element> ElementFinder::GetNext() {
+ for (; pos_ < image_.size(); ++pos_) {
+ ConstBufferView test_image =
+ ConstBufferView::FromRange(image_.begin() + pos_, image_.end());
+ absl::optional<Element> element = detector_.Run(test_image);
+ if (element) {
+ element->offset += pos_;
+ pos_ = element->EndOffset();
+ return element;
+ }
+ }
+ return absl::nullopt;
+}
+
+} // namespace zucchini
diff --git a/element_detection.h b/element_detection.h
new file mode 100644
index 0000000..856ec27
--- /dev/null
+++ b/element_detection.h
@@ -0,0 +1,59 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_ELEMENT_DETECTION_H_
+#define COMPONENTS_ZUCCHINI_ELEMENT_DETECTION_H_
+
+#include <stddef.h>
+
+#include <memory>
+
+#include "base/callback.h"
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/image_utils.h"
+#include "third_party/abseil-cpp/absl/types/optional.h"
+
+namespace zucchini {
+
+class Disassembler;
+
+// Attempts to detect an executable located at start of |image|. If found,
+// returns the corresponding disassembler. Otherwise returns null.
+std::unique_ptr<Disassembler> MakeDisassemblerWithoutFallback(
+ ConstBufferView image);
+
+// Attempts to create a disassembler corresponding to |exe_type| and initialize
+// it with |image|, On failure, returns null.
+std::unique_ptr<Disassembler> MakeDisassemblerOfType(ConstBufferView image,
+ ExecutableType exe_type);
+
+// Attempts to detect an element associated with |image| and returns it, or
+// returns nullopt if no element is detected.
+using ElementDetector =
+ base::RepeatingCallback<absl::optional<Element>(ConstBufferView image)>;
+
+// Implementation of ElementDetector using disassemblers.
+absl::optional<Element> DetectElementFromDisassembler(ConstBufferView image);
+
+// A class to scan through an image and iteratively detect elements.
+class ElementFinder {
+ public:
+ ElementFinder(ConstBufferView image, ElementDetector&& detector);
+ ElementFinder(const ElementFinder&) = delete;
+ const ElementFinder& operator=(const ElementFinder&) = delete;
+ ~ElementFinder();
+
+ // Scans for the next executable using |detector|. Returns the next element
+ // found, or nullopt if no more element can be found.
+ absl::optional<Element> GetNext();
+
+ private:
+ ConstBufferView image_;
+ ElementDetector detector_;
+ offset_t pos_ = 0;
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_ELEMENT_DETECTION_H_
diff --git a/element_detection_unittest.cc b/element_detection_unittest.cc
new file mode 100644
index 0000000..319a88a
--- /dev/null
+++ b/element_detection_unittest.cc
@@ -0,0 +1,102 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/element_detection.h"
+
+#include <map>
+#include <vector>
+
+#include "base/bind.h"
+#include "components/zucchini/buffer_view.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+namespace {
+// This test uses a mock archive format where regions are determined by their
+// consecutive byte values rather than parsing real executables.
+//
+// 0 - Padding or raw data (not mapped to an executable).
+// 1 - A Win32x86 executable.
+// 2 - A Win32x64 executable.
+//
+// So an example archive file of;
+// 0 1 1 1 0 1 1 0 0 2 2 2 2
+// contains (in order left to right):
+// - One padding byte
+// - Three byte Win32x86 executable
+// - One padding byte
+// - Two byte Win32x86 executable
+// - Two padding bytes
+// - Four byte Win32x64 executable
+
+class ElementDetectionTest : public ::testing::Test {
+ protected:
+ using ElementVector = std::vector<Element>;
+ using ExeTypeMap = std::map<uint8_t, ExecutableType>;
+
+ ElementDetectionTest()
+ : exe_map_({{1, kExeTypeWin32X86}, {2, kExeTypeWin32X64}}) {}
+
+ ElementVector TestElementFinder(std::vector<uint8_t> buffer) {
+ ConstBufferView image(buffer.data(), buffer.size());
+
+ ElementFinder finder(
+ image,
+ base::BindRepeating(
+ [](ExeTypeMap exe_map, ConstBufferView image,
+ ConstBufferView region) -> absl::optional<Element> {
+ EXPECT_GE(region.begin(), image.begin());
+ EXPECT_LE(region.end(), image.end());
+ EXPECT_GE(region.size(), 0U);
+
+ if (region[0] != 0) {
+ offset_t length = 1;
+ while (length < region.size() && region[length] == region[0])
+ ++length;
+ return Element{{0, length}, exe_map[region[0]]};
+ }
+ return absl::nullopt;
+ },
+ exe_map_, image));
+ std::vector<Element> elements;
+ for (auto element = finder.GetNext(); element; element = finder.GetNext()) {
+ elements.push_back(*element);
+ }
+ return elements;
+ }
+
+ // Translation map from mock archive bytes to actual types used in Zucchini.
+ ExeTypeMap exe_map_;
+};
+
+TEST_F(ElementDetectionTest, ElementFinderEmpty) {
+ std::vector<uint8_t> buffer(10, 0);
+ ElementFinder finder(
+ ConstBufferView(buffer.data(), buffer.size()),
+ base::BindRepeating([](ConstBufferView image) -> absl::optional<Element> {
+ return absl::nullopt;
+ }));
+ EXPECT_EQ(absl::nullopt, finder.GetNext());
+}
+
+TEST_F(ElementDetectionTest, ElementFinder) {
+ EXPECT_EQ(ElementVector(), TestElementFinder({}));
+ EXPECT_EQ(ElementVector(), TestElementFinder({0, 0}));
+ EXPECT_EQ(ElementVector({{{0, 2}, kExeTypeWin32X86}}),
+ TestElementFinder({1, 1}));
+ EXPECT_EQ(
+ ElementVector({{{0, 2}, kExeTypeWin32X86}, {{2, 2}, kExeTypeWin32X64}}),
+ TestElementFinder({1, 1, 2, 2}));
+ EXPECT_EQ(ElementVector({{{1, 2}, kExeTypeWin32X86}}),
+ TestElementFinder({0, 1, 1, 0}));
+ EXPECT_EQ(
+ ElementVector({{{1, 2}, kExeTypeWin32X86}, {{3, 3}, kExeTypeWin32X64}}),
+ TestElementFinder({0, 1, 1, 2, 2, 2}));
+ EXPECT_EQ(
+ ElementVector({{{1, 2}, kExeTypeWin32X86}, {{4, 3}, kExeTypeWin32X64}}),
+ TestElementFinder({0, 1, 1, 0, 2, 2, 2}));
+}
+
+} // namespace
+} // namespace zucchini
diff --git a/encoded_view.cc b/encoded_view.cc
new file mode 100644
index 0000000..205603f
--- /dev/null
+++ b/encoded_view.cc
@@ -0,0 +1,78 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/encoded_view.h"
+
+#include <algorithm>
+#include <utility>
+
+#include "base/check_op.h"
+
+namespace zucchini {
+
+EncodedView::EncodedView(const ImageIndex& image_index)
+ : image_index_(image_index), pool_infos_(image_index.PoolCount()) {}
+EncodedView::~EncodedView() = default;
+
+EncodedView::value_type EncodedView::Projection(offset_t location) const {
+ DCHECK_LT(location, image_index_.size());
+
+ // Find out what lies at |location|.
+ TypeTag type = image_index_.LookupType(location);
+
+ // |location| points into raw data.
+ if (type == kNoTypeTag) {
+ // The projection is the identity function on raw content.
+ return image_index_.GetRawValue(location);
+ }
+
+ // |location| points into a Reference.
+ const ReferenceSet& ref_set = image_index_.refs(type);
+ Reference ref = ref_set.at(location);
+ DCHECK_GE(location, ref.location);
+ DCHECK_LT(location, ref.location + ref_set.width());
+
+ // |location| is not the first byte of the reference.
+ if (location != ref.location) {
+ // Trailing bytes of a reference are all projected to the same value.
+ return kReferencePaddingProjection;
+ }
+
+ PoolTag pool_tag = ref_set.pool_tag();
+ const auto& target_pool = ref_set.target_pool();
+
+ // Targets with an associated Label will use its Label index in projection.
+ DCHECK_EQ(target_pool.size(), pool_infos_[pool_tag.value()].labels.size());
+ uint32_t label = pool_infos_[pool_tag.value()]
+ .labels[target_pool.KeyForOffset(ref.target)];
+
+ // Projection is done on (|target|, |type|), shifted by
+ // kBaseReferenceProjection to avoid collisions with raw content.
+ value_type projection = label;
+ projection *= image_index_.TypeCount();
+ projection += type.value();
+ return projection + kBaseReferenceProjection;
+}
+
+size_t EncodedView::Cardinality() const {
+ size_t max_width = 0;
+ for (const auto& pool_info : pool_infos_)
+ max_width = std::max(max_width, pool_info.bound);
+ return max_width * image_index_.TypeCount() + kBaseReferenceProjection;
+}
+
+void EncodedView::SetLabels(PoolTag pool,
+ std::vector<uint32_t>&& labels,
+ size_t bound) {
+ DCHECK_EQ(labels.size(), image_index_.pool(pool).size());
+ DCHECK(labels.empty() || *max_element(labels.begin(), labels.end()) < bound);
+ pool_infos_[pool.value()].labels = std::move(labels);
+ pool_infos_[pool.value()].bound = bound;
+}
+
+EncodedView::PoolInfo::PoolInfo() = default;
+EncodedView::PoolInfo::PoolInfo(PoolInfo&&) = default;
+EncodedView::PoolInfo::~PoolInfo() = default;
+
+} // namespace zucchini
diff --git a/encoded_view.h b/encoded_view.h
new file mode 100644
index 0000000..864d265
--- /dev/null
+++ b/encoded_view.h
@@ -0,0 +1,185 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_ENCODED_VIEW_H_
+#define COMPONENTS_ZUCCHINI_ENCODED_VIEW_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <iterator>
+#include <vector>
+
+#include "components/zucchini/image_index.h"
+#include "components/zucchini/image_utils.h"
+
+namespace zucchini {
+
+// Zucchini-gen performs semantics-aware matching:
+// - Same-typed reference target in "old" and "new" can be associated.
+// Associated targets are assigned an identifier called "label" (and for
+// unassociated targets, label = 0).
+// - EncodedView maps each offset in "old" and "new" images to a "projected
+// value", which can be:
+// - Raw byte value (0-255) for non-references.
+// - Reference "projected value" (> 256) that depends on target {type, label}
+// at each reference's location (byte 0).
+// - Reference padding value (256) at the body of each reference (bytes 1+).
+// - The projected values for "old" and "new" are used to build the equivalence
+// map.
+
+constexpr size_t kReferencePaddingProjection = 256;
+constexpr size_t kBaseReferenceProjection = 257;
+
+// A Range (providing begin and end iterators) that adapts ImageIndex to make
+// image data appear as an Encoded Image, that is encoded data under a higher
+// level of abstraction than raw bytes. In particular:
+// - First byte of each reference become a projection of its type and label.
+// - Subsequent bytes of each reference becomes |kReferencePaddingProjection|.
+// - Non-reference raw bytes remain as raw bytes.
+class EncodedView {
+ public:
+ // RandomAccessIterator whose values are the results of Projection().
+ class Iterator {
+ public:
+ using iterator_category = std::random_access_iterator_tag;
+ using value_type = size_t;
+ using difference_type = ptrdiff_t;
+ using reference = size_t;
+ using pointer = size_t*;
+
+ Iterator(const EncodedView* encoded_view, difference_type pos)
+ : encoded_view_(encoded_view), pos_(pos) {}
+
+ Iterator(const Iterator&) = default;
+
+ Iterator& operator=(const Iterator&) = default;
+
+ value_type operator*() const {
+ return encoded_view_->Projection(static_cast<offset_t>(pos_));
+ }
+
+ value_type operator[](difference_type n) const {
+ return encoded_view_->Projection(static_cast<offset_t>(pos_ + n));
+ }
+
+ Iterator& operator++() {
+ ++pos_;
+ return *this;
+ }
+
+ Iterator operator++(int) {
+ Iterator tmp = *this;
+ ++pos_;
+ return tmp;
+ }
+
+ Iterator& operator--() {
+ --pos_;
+ return *this;
+ }
+
+ Iterator operator--(int) {
+ Iterator tmp = *this;
+ --pos_;
+ return tmp;
+ }
+
+ Iterator& operator+=(difference_type n) {
+ pos_ += n;
+ return *this;
+ }
+
+ Iterator& operator-=(difference_type n) {
+ pos_ -= n;
+ return *this;
+ }
+
+ friend bool operator==(Iterator a, Iterator b) { return a.pos_ == b.pos_; }
+
+ friend bool operator!=(Iterator a, Iterator b) { return !(a == b); }
+
+ friend bool operator<(Iterator a, Iterator b) { return a.pos_ < b.pos_; }
+
+ friend bool operator>(Iterator a, Iterator b) { return b < a; }
+
+ friend bool operator<=(Iterator a, Iterator b) { return !(b < a); }
+
+ friend bool operator>=(Iterator a, Iterator b) { return !(a < b); }
+
+ friend difference_type operator-(Iterator a, Iterator b) {
+ return a.pos_ - b.pos_;
+ }
+
+ friend Iterator operator+(Iterator it, difference_type n) {
+ it += n;
+ return it;
+ }
+
+ friend Iterator operator-(Iterator it, difference_type n) {
+ it -= n;
+ return it;
+ }
+
+ private:
+ const EncodedView* encoded_view_;
+ difference_type pos_;
+ };
+
+ using value_type = size_t;
+ using size_type = offset_t;
+ using difference_type = ptrdiff_t;
+ using const_iterator = Iterator;
+
+ // |image_index| is the annotated image being adapted, and is required to
+ // remain valid for the lifetime of the object.
+ explicit EncodedView(const ImageIndex& image_index);
+ EncodedView(const EncodedView&) = delete;
+ const EncodedView& operator=(const EncodedView&) = delete;
+ ~EncodedView();
+
+ // Projects |location| to a scalar value that describes the content at a
+ // higher level of abstraction.
+ value_type Projection(offset_t location) const;
+
+ bool IsToken(offset_t location) const {
+ return image_index_.IsToken(location);
+ }
+
+ // Returns the cardinality of the projection, i.e., the upper bound on
+ // values returned by Projection().
+ value_type Cardinality() const;
+
+ // Associates |labels| to targets for a given |pool|, replacing previous
+ // association. Values in |labels| must be smaller than |bound|.
+ void SetLabels(PoolTag pool, std::vector<uint32_t>&& labels, size_t bound);
+ const ImageIndex& image_index() const { return image_index_; }
+
+ // Range functions.
+ size_type size() const { return size_type(image_index_.size()); }
+ const_iterator begin() const {
+ return const_iterator{this, difference_type(0)};
+ }
+ const_iterator end() const {
+ return const_iterator{this, difference_type(size())};
+ }
+
+ private:
+ struct PoolInfo {
+ PoolInfo();
+ PoolInfo(PoolInfo&&);
+ ~PoolInfo();
+
+ // |labels| translates IndirectReference target_key to label.
+ std::vector<uint32_t> labels;
+ size_t bound = 0;
+ };
+
+ const ImageIndex& image_index_;
+ std::vector<PoolInfo> pool_infos_;
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_ENCODED_VIEW_H_
diff --git a/encoded_view_unittest.cc b/encoded_view_unittest.cc
new file mode 100644
index 0000000..96d9dc4
--- /dev/null
+++ b/encoded_view_unittest.cc
@@ -0,0 +1,202 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/encoded_view.h"
+
+#include <iterator>
+#include <numeric>
+#include <vector>
+
+#include "components/zucchini/image_index.h"
+#include "components/zucchini/test_disassembler.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+namespace {
+
+constexpr size_t PADDING = kReferencePaddingProjection;
+
+template <class It1, class It2>
+void TestInputIterator(It1 first_expected,
+ It1 last_expected,
+ It2 first_input,
+ It2 last_input) {
+ while (first_expected != last_expected && first_input != last_input) {
+ EXPECT_EQ(*first_expected, *first_input);
+ ++first_expected;
+ ++first_input;
+ }
+ EXPECT_EQ(last_input, first_input);
+ EXPECT_EQ(last_expected, first_expected);
+}
+
+template <class It1, class It2>
+void TestForwardIterator(It1 first_expected,
+ It1 last_expected,
+ It2 first_input,
+ It2 last_input) {
+ TestInputIterator(first_expected, last_expected, first_input, last_input);
+
+ while (first_expected != last_expected && first_input != last_input) {
+ EXPECT_EQ(*(first_expected++), *(first_input++));
+ }
+ EXPECT_EQ(last_input, first_input);
+ EXPECT_EQ(last_expected, first_expected);
+}
+
+template <class It1, class It2>
+void TestBidirectionalIterator(It1 first_expected,
+ It1 last_expected,
+ It2 first_input,
+ It2 last_input) {
+ TestForwardIterator(first_expected, last_expected, first_input, last_input);
+
+ while (first_expected != last_expected && first_input != last_input) {
+ EXPECT_EQ(*(--last_expected), *(--last_input));
+ }
+ EXPECT_EQ(last_input, first_input);
+ EXPECT_EQ(last_expected, first_expected);
+}
+
+template <class It1, class It2>
+void TestRandomAccessIterator(It1 first_expected,
+ It1 last_expected,
+ It2 first_input,
+ It2 last_input) {
+ TestBidirectionalIterator(first_expected, last_expected, first_input,
+ last_input);
+
+ using difference_type = typename std::iterator_traits<It1>::difference_type;
+
+ difference_type expected_size = last_expected - first_expected;
+ difference_type input_size = last_input - first_input;
+ EXPECT_EQ(expected_size, input_size);
+
+ for (difference_type i = 0; i < expected_size; ++i) {
+ EXPECT_EQ(*(first_expected + i), *(first_input + i));
+ EXPECT_EQ(first_expected[i], first_input[i]);
+
+ EXPECT_EQ(0 < i, first_input < first_input + i);
+ EXPECT_EQ(0 > i, first_input > first_input + i);
+ EXPECT_EQ(0 <= i, first_input <= first_input + i);
+ EXPECT_EQ(0 >= i, first_input >= first_input + i);
+
+ EXPECT_EQ(expected_size < i, last_input < first_input + i);
+ EXPECT_EQ(expected_size > i, last_input > first_input + i);
+ EXPECT_EQ(expected_size <= i, last_input <= first_input + i);
+ EXPECT_EQ(expected_size >= i, last_input >= first_input + i);
+
+ It2 input = first_input;
+ input += i;
+ EXPECT_EQ(*input, first_expected[i]);
+ input -= i;
+ EXPECT_EQ(first_input, input);
+ input += i;
+
+ EXPECT_EQ(0 < i, first_input < input);
+ EXPECT_EQ(0 > i, first_input > input);
+ EXPECT_EQ(0 <= i, first_input <= input);
+ EXPECT_EQ(0 >= i, first_input >= input);
+
+ EXPECT_EQ(expected_size < i, last_input < input);
+ EXPECT_EQ(expected_size > i, last_input > input);
+ EXPECT_EQ(expected_size <= i, last_input <= input);
+ EXPECT_EQ(expected_size >= i, last_input >= input);
+ }
+}
+
+} // namespace
+
+class EncodedViewTest : public testing::Test {
+ protected:
+ EncodedViewTest()
+ : buffer_(20),
+ image_index_(ConstBufferView(buffer_.data(), buffer_.size())) {
+ std::iota(buffer_.begin(), buffer_.end(), 0);
+ TestDisassembler disasm({2, TypeTag(0), PoolTag(0)},
+ {{1, 0}, {8, 1}, {10, 2}},
+ {4, TypeTag(1), PoolTag(0)}, {{3, 3}},
+ {3, TypeTag(2), PoolTag(1)}, {{12, 4}, {17, 5}});
+ image_index_.Initialize(&disasm);
+ }
+
+ void CheckView(std::vector<size_t> expected,
+ const EncodedView& encoded_view) const {
+ for (offset_t i = 0; i < encoded_view.size(); ++i) {
+ EXPECT_EQ(expected[i], encoded_view.Projection(i)) << i;
+ }
+ TestRandomAccessIterator(expected.begin(), expected.end(),
+ encoded_view.begin(), encoded_view.end());
+ }
+
+ std::vector<uint8_t> buffer_;
+ ImageIndex image_index_;
+};
+
+TEST_F(EncodedViewTest, Unlabeled) {
+ EncodedView encoded_view(image_index_);
+
+ encoded_view.SetLabels(PoolTag(0), {0, 0, 0, 0}, 1);
+ encoded_view.SetLabels(PoolTag(1), {0, 0}, 1);
+
+ std::vector<size_t> expected = {
+ 0, // raw
+ kBaseReferenceProjection + 0 + 0 * 3, // ref 0
+ PADDING,
+ kBaseReferenceProjection + 1 + 0 * 3, // ref 1
+ PADDING,
+ PADDING,
+ PADDING,
+ 7, // raw
+ kBaseReferenceProjection + 0 + 0 * 3, // ref 0
+ PADDING,
+ kBaseReferenceProjection + 0 + 0 * 3, // ref 0
+ PADDING,
+ kBaseReferenceProjection + 2 + 0 * 3, // ref 2
+ PADDING,
+ PADDING,
+ 15, // raw
+ 16,
+ kBaseReferenceProjection + 2 + 0 * 3, // ref 2
+ PADDING,
+ PADDING,
+ };
+ EXPECT_EQ(kBaseReferenceProjection + 3 * 1, encoded_view.Cardinality());
+ CheckView(expected, encoded_view);
+}
+
+TEST_F(EncodedViewTest, Labeled) {
+ EncodedView encoded_view(image_index_);
+
+ encoded_view.SetLabels(PoolTag(0), {0, 2, 1, 2}, 3);
+ encoded_view.SetLabels(PoolTag(1), {0, 0}, 1);
+
+ std::vector<size_t> expected = {
+ 0, // raw
+ kBaseReferenceProjection + 0 + 0 * 3, // ref 0
+ PADDING,
+ kBaseReferenceProjection + 1 + 2 * 3, // ref 1
+ PADDING,
+ PADDING,
+ PADDING,
+ 7, // raw
+ kBaseReferenceProjection + 0 + 2 * 3, // ref 0
+ PADDING,
+ kBaseReferenceProjection + 0 + 1 * 3, // ref 0
+ PADDING,
+ kBaseReferenceProjection + 2 + 0 * 3, // ref 2
+ PADDING,
+ PADDING,
+ 15, // raw
+ 16,
+ kBaseReferenceProjection + 2 + 0 * 3, // ref 2
+ PADDING,
+ PADDING,
+ };
+ EXPECT_EQ(kBaseReferenceProjection + 3 * 3, encoded_view.Cardinality());
+ CheckView(expected, encoded_view);
+}
+
+} // namespace zucchini
diff --git a/ensemble_matcher.cc b/ensemble_matcher.cc
new file mode 100644
index 0000000..d6e8148
--- /dev/null
+++ b/ensemble_matcher.cc
@@ -0,0 +1,37 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/ensemble_matcher.h"
+
+#include <algorithm>
+#include <limits>
+
+#include "base/containers/cxx20_erase.h"
+#include "base/logging.h"
+
+namespace zucchini {
+
+/******** EnsembleMatcher ********/
+
+EnsembleMatcher::EnsembleMatcher() = default;
+
+EnsembleMatcher::~EnsembleMatcher() = default;
+
+void EnsembleMatcher::Trim() {
+ // Trim rule: If > 1 DEX files are found then ignore all DEX. This is done
+ // because we do not yet support MultiDex, under which contents can move
+ // across file boundary between "old" and "new" archives. When this occurs,
+ // forcing matches of DEX files and patching them separately can result in
+ // larger patches than naive patching.
+ auto is_match_dex = [](const ElementMatch& match) {
+ return match.exe_type() == kExeTypeDex;
+ };
+ auto num_dex = std::count_if(matches_.begin(), matches_.end(), is_match_dex);
+ if (num_dex > 1) {
+ LOG(WARNING) << "Found " << num_dex << " DEX: Ignoring all.";
+ base::EraseIf(matches_, is_match_dex);
+ }
+}
+
+} // namespace zucchini
diff --git a/ensemble_matcher.h b/ensemble_matcher.h
new file mode 100644
index 0000000..b188657
--- /dev/null
+++ b/ensemble_matcher.h
@@ -0,0 +1,60 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_ENSEMBLE_MATCHER_H_
+#define COMPONENTS_ZUCCHINI_ENSEMBLE_MATCHER_H_
+
+#include <stddef.h>
+
+#include <vector>
+
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/element_detection.h"
+#include "components/zucchini/image_utils.h"
+
+namespace zucchini {
+
+// A base class for ensemble matching strategies, which identify Elements in a
+// "new" and "old" archives, and match each "new" Element to an "old" Element.
+// Matched pairs can then be passed to Disassembler for architecture-specific
+// patching. Notes:
+// - A matched Element pair must have the same ExecutableType.
+// - Special case: Exact matches are ignored, since they can be patched directly
+// without architecture-specific patching.
+// - Multiple "new" Elements may match a common "old" Element.
+// - A "new" Element may have no match. This can happen when no viable match
+// exists, or when an exact match is skipped.
+class EnsembleMatcher {
+ public:
+ EnsembleMatcher();
+ EnsembleMatcher(const EnsembleMatcher&) = delete;
+ const EnsembleMatcher& operator=(const EnsembleMatcher&) = delete;
+ virtual ~EnsembleMatcher();
+
+ // Interface to main matching feature. Returns whether match was successful.
+ // This should be called at most once per instace.
+ virtual bool RunMatch(ConstBufferView old_image,
+ ConstBufferView new_image) = 0;
+
+ // Accessors to RunMatch() results.
+ const std::vector<ElementMatch>& matches() const { return matches_; }
+
+ size_t num_identical() const { return num_identical_; }
+
+ protected:
+ // Post-processes |matches_| to remove potentially unfavorable entries.
+ void Trim();
+
+ // Storage of matched elements: A list of matched pairs, where the list of
+ // "new" elements have increasing offsets and don't overlap. May be empty.
+ std::vector<ElementMatch> matches_;
+
+ // Number of identical matches found in match candidates. These should be
+ // excluded from |matches_|.
+ size_t num_identical_ = 0;
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_ENSEMBLE_MATCHER_H_
diff --git a/equivalence_map.cc b/equivalence_map.cc
new file mode 100644
index 0000000..26c0764
--- /dev/null
+++ b/equivalence_map.cc
@@ -0,0 +1,548 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/equivalence_map.h"
+
+#include <algorithm>
+#include <utility>
+
+#include "base/containers/cxx20_erase.h"
+#include "base/logging.h"
+#include "base/numerics/safe_conversions.h"
+#include "components/zucchini/encoded_view.h"
+#include "components/zucchini/patch_reader.h"
+#include "components/zucchini/suffix_array.h"
+
+namespace zucchini {
+
+namespace {
+
+// TODO(haungs): Tune these numbers to improve pathological case results.
+
+// In pathological cases Zucchini can exhibit O(n^2) behavior if the seed
+// selection process runs to completion. To prevent this we impose a quota for
+// the total length of equivalences the seed selection process can perform
+// trials on. For regular use cases it is unlikely this quota will be exceeded,
+// and if it is the effects on patch size are expected to be small.
+constexpr uint64_t kSeedSelectionTotalVisitLengthQuota = 1 << 18; // 256 KiB
+
+// The aforementioned quota alone is insufficient, as exploring backwards will
+// still be very successful resulting in O(n) behavior in the case of a limited
+// seed selection trials. This results in O(n^2) behavior returning. To mitigate
+// this we also impose a cap on the ExtendEquivalenceBackward() exploration.
+constexpr offset_t kBackwardsExtendLimit = 1 << 16; // 64 KiB
+
+} // namespace
+
+/******** Utility Functions ********/
+
+double GetTokenSimilarity(
+ const ImageIndex& old_image_index,
+ const ImageIndex& new_image_index,
+ const std::vector<TargetsAffinity>& targets_affinities,
+ offset_t src,
+ offset_t dst) {
+ DCHECK(old_image_index.IsToken(src));
+ DCHECK(new_image_index.IsToken(dst));
+
+ TypeTag old_type = old_image_index.LookupType(src);
+ TypeTag new_type = new_image_index.LookupType(dst);
+ if (old_type != new_type)
+ return kMismatchFatal;
+
+ // Raw comparison.
+ if (!old_image_index.IsReference(src) && !new_image_index.IsReference(dst)) {
+ return old_image_index.GetRawValue(src) == new_image_index.GetRawValue(dst)
+ ? 1.0
+ : -1.5;
+ }
+
+ const ReferenceSet& old_ref_set = old_image_index.refs(old_type);
+ const ReferenceSet& new_ref_set = new_image_index.refs(new_type);
+ Reference old_reference = old_ref_set.at(src);
+ Reference new_reference = new_ref_set.at(dst);
+ PoolTag pool_tag = old_ref_set.pool_tag();
+
+ double affinity = targets_affinities[pool_tag.value()].AffinityBetween(
+ old_ref_set.target_pool().KeyForOffset(old_reference.target),
+ new_ref_set.target_pool().KeyForOffset(new_reference.target));
+
+ // Both targets are not associated, which implies a weak match.
+ if (affinity == 0.0)
+ return 0.5 * old_ref_set.width();
+
+ // At least one target is associated, so values are compared.
+ return affinity > 0.0 ? old_ref_set.width() : -2.0;
+}
+
+double GetEquivalenceSimilarity(
+ const ImageIndex& old_image_index,
+ const ImageIndex& new_image_index,
+ const std::vector<TargetsAffinity>& targets_affinities,
+ const Equivalence& equivalence) {
+ double similarity = 0.0;
+ for (offset_t k = 0; k < equivalence.length; ++k) {
+ // Non-tokens are joined with the nearest previous token: skip until we
+ // cover the unit.
+ if (!new_image_index.IsToken(equivalence.dst_offset + k))
+ continue;
+
+ similarity += GetTokenSimilarity(
+ old_image_index, new_image_index, targets_affinities,
+ equivalence.src_offset + k, equivalence.dst_offset + k);
+ if (similarity == kMismatchFatal)
+ return kMismatchFatal;
+ }
+ return similarity;
+}
+
+EquivalenceCandidate ExtendEquivalenceForward(
+ const ImageIndex& old_image_index,
+ const ImageIndex& new_image_index,
+ const std::vector<TargetsAffinity>& targets_affinities,
+ const EquivalenceCandidate& candidate,
+ double min_similarity) {
+ Equivalence equivalence = candidate.eq;
+ offset_t best_k = equivalence.length;
+ double current_similarity = candidate.similarity;
+ double best_similarity = current_similarity;
+ double current_penalty = min_similarity;
+ for (offset_t k = best_k;
+ equivalence.src_offset + k < old_image_index.size() &&
+ equivalence.dst_offset + k < new_image_index.size();
+ ++k) {
+ // Mismatch in type, |candidate| cannot be extended further.
+ if (old_image_index.LookupType(equivalence.src_offset + k) !=
+ new_image_index.LookupType(equivalence.dst_offset + k)) {
+ break;
+ }
+
+ if (!new_image_index.IsToken(equivalence.dst_offset + k)) {
+ // Non-tokens are joined with the nearest previous token: skip until we
+ // cover the unit, and extend |best_k| if applicable.
+ if (best_k == k)
+ best_k = k + 1;
+ continue;
+ }
+
+ double similarity = GetTokenSimilarity(
+ old_image_index, new_image_index, targets_affinities,
+ equivalence.src_offset + k, equivalence.dst_offset + k);
+ current_similarity += similarity;
+ current_penalty = std::max(0.0, current_penalty) - similarity;
+
+ if (current_similarity < 0.0 || current_penalty >= min_similarity)
+ break;
+ if (current_similarity >= best_similarity) {
+ best_similarity = current_similarity;
+ best_k = k + 1;
+ }
+ }
+ equivalence.length = best_k;
+ return {equivalence, best_similarity};
+}
+
+EquivalenceCandidate ExtendEquivalenceBackward(
+ const ImageIndex& old_image_index,
+ const ImageIndex& new_image_index,
+ const std::vector<TargetsAffinity>& targets_affinities,
+ const EquivalenceCandidate& candidate,
+ double min_similarity) {
+ Equivalence equivalence = candidate.eq;
+ offset_t best_k = 0;
+ double current_similarity = candidate.similarity;
+ double best_similarity = current_similarity;
+ double current_penalty = 0.0;
+ offset_t k_min = std::min(
+ {equivalence.dst_offset, equivalence.src_offset, kBackwardsExtendLimit});
+ for (offset_t k = 1; k <= k_min; ++k) {
+ // Mismatch in type, |candidate| cannot be extended further.
+ if (old_image_index.LookupType(equivalence.src_offset - k) !=
+ new_image_index.LookupType(equivalence.dst_offset - k)) {
+ break;
+ }
+
+ // Non-tokens are joined with the nearest previous token: skip until we
+ // reach the next token.
+ if (!new_image_index.IsToken(equivalence.dst_offset - k))
+ continue;
+
+ DCHECK_EQ(old_image_index.LookupType(equivalence.src_offset - k),
+ new_image_index.LookupType(equivalence.dst_offset -
+ k)); // Sanity check.
+ double similarity = GetTokenSimilarity(
+ old_image_index, new_image_index, targets_affinities,
+ equivalence.src_offset - k, equivalence.dst_offset - k);
+
+ current_similarity += similarity;
+ current_penalty = std::max(0.0, current_penalty) - similarity;
+
+ if (current_similarity < 0.0 || current_penalty >= min_similarity)
+ break;
+ if (current_similarity >= best_similarity) {
+ best_similarity = current_similarity;
+ best_k = k;
+ }
+ }
+
+ equivalence.dst_offset -= best_k;
+ equivalence.src_offset -= best_k;
+ equivalence.length += best_k;
+ return {equivalence, best_similarity};
+}
+
+EquivalenceCandidate VisitEquivalenceSeed(
+ const ImageIndex& old_image_index,
+ const ImageIndex& new_image_index,
+ const std::vector<TargetsAffinity>& targets_affinities,
+ offset_t src,
+ offset_t dst,
+ double min_similarity) {
+ EquivalenceCandidate candidate{{src, dst, 0}, 0.0}; // Empty.
+ if (!old_image_index.IsToken(src))
+ return candidate;
+ candidate =
+ ExtendEquivalenceForward(old_image_index, new_image_index,
+ targets_affinities, candidate, min_similarity);
+ if (candidate.similarity < min_similarity)
+ return candidate; // Not worth exploring any more.
+ return ExtendEquivalenceBackward(old_image_index, new_image_index,
+ targets_affinities, candidate,
+ min_similarity);
+}
+
+/******** OffsetMapper ********/
+
+OffsetMapper::OffsetMapper(std::vector<Equivalence>&& equivalences,
+ offset_t old_image_size,
+ offset_t new_image_size)
+ : equivalences_(std::move(equivalences)),
+ old_image_size_(old_image_size),
+ new_image_size_(new_image_size) {
+ DCHECK_GT(new_image_size_, 0U);
+ DCHECK(std::is_sorted(equivalences_.begin(), equivalences_.end(),
+ [](const Equivalence& a, const Equivalence& b) {
+ return a.src_offset < b.src_offset;
+ }));
+ // This is for testing. Assume pruned.
+}
+
+OffsetMapper::OffsetMapper(EquivalenceSource&& equivalence_source,
+ offset_t old_image_size,
+ offset_t new_image_size)
+ : old_image_size_(old_image_size), new_image_size_(new_image_size) {
+ DCHECK_GT(new_image_size_, 0U);
+ for (auto e = equivalence_source.GetNext(); e.has_value();
+ e = equivalence_source.GetNext()) {
+ equivalences_.push_back(*e);
+ }
+ PruneEquivalencesAndSortBySource(&equivalences_);
+}
+
+OffsetMapper::OffsetMapper(const EquivalenceMap& equivalence_map,
+ offset_t old_image_size,
+ offset_t new_image_size)
+ : equivalences_(equivalence_map.size()),
+ old_image_size_(old_image_size),
+ new_image_size_(new_image_size) {
+ DCHECK_GT(new_image_size_, 0U);
+ std::transform(equivalence_map.begin(), equivalence_map.end(),
+ equivalences_.begin(),
+ [](const EquivalenceCandidate& c) { return c.eq; });
+ PruneEquivalencesAndSortBySource(&equivalences_);
+}
+
+OffsetMapper::~OffsetMapper() = default;
+
+// Safely evaluates |offset - unit.src_offset + unit.dst_offset| with signed
+// arithmetic, then clips the result to |[0, new_image_size_)|.
+offset_t OffsetMapper::NaiveExtendedForwardProject(const Equivalence& unit,
+ offset_t offset) const {
+ int64_t old_offset64 = offset;
+ int64_t src_offset64 = unit.src_offset;
+ int64_t dst_offset64 = unit.dst_offset;
+ uint64_t new_offset64 = std::min<uint64_t>(
+ std::max<int64_t>(0LL, old_offset64 - src_offset64 + dst_offset64),
+ new_image_size_ - 1);
+ return base::checked_cast<offset_t>(new_offset64);
+}
+
+offset_t OffsetMapper::ExtendedForwardProject(offset_t offset) const {
+ DCHECK(!equivalences_.empty());
+ if (offset < old_image_size_) {
+ // Finds the equivalence unit whose "old" block is nearest to |offset|,
+ // favoring the block with lower offset in case of a tie.
+ auto pos = std::upper_bound(
+ equivalences_.begin(), equivalences_.end(), offset,
+ [](offset_t a, const Equivalence& b) { return a < b.src_offset; });
+ // For tiebreaking: |offset - pos[-1].src_end()| is actually 1 less than
+ // |offset|'s distance to "old" block of |pos[-1]|. Therefore "<" is used.
+ if (pos != equivalences_.begin() &&
+ (pos == equivalences_.end() || offset < pos[-1].src_end() ||
+ offset - pos[-1].src_end() < pos->src_offset - offset)) {
+ --pos;
+ }
+ return NaiveExtendedForwardProject(*pos, offset);
+ }
+ // Fake offsets.
+ offset_t delta = offset - old_image_size_;
+ return delta < kOffsetBound - new_image_size_ ? new_image_size_ + delta
+ : kOffsetBound - 1;
+}
+
+void OffsetMapper::ForwardProjectAll(std::vector<offset_t>* offsets) const {
+ DCHECK(std::is_sorted(offsets->begin(), offsets->end()));
+ auto current = equivalences_.begin();
+ for (auto& src : *offsets) {
+ while (current != end() && current->src_end() <= src) {
+ ++current;
+ }
+
+ if (current != end() && current->src_offset <= src) {
+ src = src - current->src_offset + current->dst_offset;
+ } else {
+ src = kInvalidOffset;
+ }
+ }
+ base::Erase(*offsets, kInvalidOffset);
+ offsets->shrink_to_fit();
+}
+
+void OffsetMapper::PruneEquivalencesAndSortBySource(
+ std::vector<Equivalence>* equivalences) {
+ std::sort(equivalences->begin(), equivalences->end(),
+ [](const Equivalence& a, const Equivalence& b) {
+ return a.src_offset < b.src_offset;
+ });
+
+ for (auto current = equivalences->begin(); current != equivalences->end();
+ ++current) {
+ // A "reaper" is an equivalence after |current| that overlaps with it, but
+ // is longer, and so truncates |current|. For example:
+ // ****** <= |current|
+ // **
+ // ****
+ // ****
+ // ********** <= |next| as reaper.
+ // If a reaper is found (as |next|), every equivalence strictly between
+ // |current| and |next| would be truncated to 0 and discarded. Handling this
+ // case is important to avoid O(n^2) behavior.
+ bool next_is_reaper = false;
+
+ // Look ahead to resolve overlaps, until a better candidate is found.
+ auto next = current + 1;
+ for (; next != equivalences->end(); ++next) {
+ DCHECK_GE(next->src_offset, current->src_offset);
+ if (next->src_offset >= current->src_end())
+ break; // No more overlap.
+
+ if (current->length < next->length) {
+ // |next| is better: So it is a reaper that shrinks |current|.
+ offset_t delta = current->src_end() - next->src_offset;
+ current->length -= delta;
+ next_is_reaper = true;
+ break;
+ }
+ }
+
+ if (next_is_reaper) {
+ // Discard all equivalences strictly between |cur| and |next|.
+ for (auto reduced = current + 1; reduced != next; ++reduced)
+ reduced->length = 0;
+ current = next - 1;
+ } else {
+ // Shrink all equivalences that overlap with |current|. These are all
+ // worse than |current| since no reaper is found.
+ for (auto reduced = current + 1; reduced != next; ++reduced) {
+ offset_t delta = current->src_end() - reduced->src_offset;
+ reduced->length -= std::min(reduced->length, delta);
+ reduced->src_offset += delta;
+ reduced->dst_offset += delta;
+ DCHECK_EQ(reduced->src_offset, current->src_end());
+ }
+ }
+ }
+
+ // Discard all equivalences with length == 0.
+ base::EraseIf(*equivalences, [](const Equivalence& equivalence) {
+ return equivalence.length == 0;
+ });
+}
+
+/******** EquivalenceMap ********/
+
+EquivalenceMap::EquivalenceMap() = default;
+
+EquivalenceMap::EquivalenceMap(std::vector<EquivalenceCandidate>&& equivalences)
+ : candidates_(std::move(equivalences)) {
+ SortByDestination();
+}
+
+EquivalenceMap::EquivalenceMap(EquivalenceMap&&) = default;
+
+EquivalenceMap::~EquivalenceMap() = default;
+
+void EquivalenceMap::Build(
+ const std::vector<offset_t>& old_sa,
+ const EncodedView& old_view,
+ const EncodedView& new_view,
+ const std::vector<TargetsAffinity>& targets_affinities,
+ double min_similarity) {
+ DCHECK_EQ(old_sa.size(), old_view.size());
+
+ CreateCandidates(old_sa, old_view, new_view, targets_affinities,
+ min_similarity);
+ SortByDestination();
+ Prune(old_view, new_view, targets_affinities, min_similarity);
+
+ offset_t coverage = 0;
+ offset_t current_offset = 0;
+ for (auto candidate : candidates_) {
+ DCHECK_GE(candidate.eq.dst_offset, current_offset);
+ coverage += candidate.eq.length;
+ current_offset = candidate.eq.dst_end();
+ }
+ LOG(INFO) << "Equivalence Count: " << size();
+ LOG(INFO) << "Coverage / Extra / Total: " << coverage << " / "
+ << new_view.size() - coverage << " / " << new_view.size();
+}
+
+void EquivalenceMap::CreateCandidates(
+ const std::vector<offset_t>& old_sa,
+ const EncodedView& old_view,
+ const EncodedView& new_view,
+ const std::vector<TargetsAffinity>& targets_affinities,
+ double min_similarity) {
+ candidates_.clear();
+
+ // This is an heuristic to find 'good' equivalences on encoded views.
+ // Equivalences are found in ascending order of |new_image|.
+ offset_t dst_offset = 0;
+
+ while (dst_offset < new_view.size()) {
+ if (!new_view.IsToken(dst_offset)) {
+ ++dst_offset;
+ continue;
+ }
+ auto match =
+ SuffixLowerBound(old_sa, old_view.begin(),
+ new_view.begin() + dst_offset, new_view.end());
+
+ offset_t next_dst_offset = dst_offset + 1;
+ // TODO(huangs): Clean up.
+ double best_similarity = min_similarity;
+ uint64_t total_visit_length = 0;
+ EquivalenceCandidate best_candidate = {{0, 0, 0}, 0.0};
+ for (auto it = match; it != old_sa.end(); ++it) {
+ EquivalenceCandidate candidate = VisitEquivalenceSeed(
+ old_view.image_index(), new_view.image_index(), targets_affinities,
+ static_cast<offset_t>(*it), dst_offset, min_similarity);
+ if (candidate.similarity > best_similarity) {
+ best_candidate = candidate;
+ best_similarity = candidate.similarity;
+ next_dst_offset = candidate.eq.dst_end();
+ total_visit_length += candidate.eq.length;
+ if (total_visit_length > kSeedSelectionTotalVisitLengthQuota) {
+ break;
+ }
+ } else {
+ break;
+ }
+ }
+ total_visit_length = 0;
+ for (auto it = match; it != old_sa.begin(); --it) {
+ EquivalenceCandidate candidate = VisitEquivalenceSeed(
+ old_view.image_index(), new_view.image_index(), targets_affinities,
+ static_cast<offset_t>(it[-1]), dst_offset, min_similarity);
+ if (candidate.similarity > best_similarity) {
+ best_candidate = candidate;
+ best_similarity = candidate.similarity;
+ next_dst_offset = candidate.eq.dst_end();
+ total_visit_length += candidate.eq.length;
+ if (total_visit_length > kSeedSelectionTotalVisitLengthQuota) {
+ break;
+ }
+ } else {
+ break;
+ }
+ }
+ if (best_candidate.similarity >= min_similarity) {
+ candidates_.push_back(best_candidate);
+ }
+
+ dst_offset = next_dst_offset;
+ }
+}
+
+void EquivalenceMap::SortByDestination() {
+ std::sort(candidates_.begin(), candidates_.end(),
+ [](const EquivalenceCandidate& a, const EquivalenceCandidate& b) {
+ return a.eq.dst_offset < b.eq.dst_offset;
+ });
+}
+
+void EquivalenceMap::Prune(
+ const EncodedView& old_view,
+ const EncodedView& new_view,
+ const std::vector<TargetsAffinity>& target_affinities,
+ double min_similarity) {
+ // TODO(etiennep): unify with
+ // OffsetMapper::PruneEquivalencesAndSortBySource().
+ for (auto current = candidates_.begin(); current != candidates_.end();
+ ++current) {
+ if (current->similarity < min_similarity)
+ continue; // This candidate will be discarded anyways.
+
+ bool next_is_reaper = false;
+
+ // Look ahead to resolve overlaps, until a better candidate is found.
+ auto next = current + 1;
+ for (; next != candidates_.end(); ++next) {
+ DCHECK_GE(next->eq.dst_offset, current->eq.dst_offset);
+ if (next->eq.dst_offset >= current->eq.dst_offset + current->eq.length)
+ break; // No more overlap.
+
+ if (current->similarity < next->similarity) {
+ // |next| is better: So it is a reaper that shrinks |current|.
+ offset_t delta = current->eq.dst_end() - next->eq.dst_offset;
+ current->eq.length -= delta;
+ current->similarity = GetEquivalenceSimilarity(
+ old_view.image_index(), new_view.image_index(), target_affinities,
+ current->eq);
+
+ next_is_reaper = true;
+ break;
+ }
+ }
+
+ if (next_is_reaper) {
+ // Discard all equivalences strictly between |cur| and |next|.
+ for (auto reduced = current + 1; reduced != next; ++reduced) {
+ reduced->eq.length = 0;
+ reduced->similarity = 0;
+ }
+ current = next - 1;
+ } else {
+ // Shrinks all overlapping candidates following and worse than |current|.
+ for (auto reduced = current + 1; reduced != next; ++reduced) {
+ offset_t delta = current->eq.dst_end() - reduced->eq.dst_offset;
+ reduced->eq.length -= std::min(reduced->eq.length, delta);
+ reduced->eq.src_offset += delta;
+ reduced->eq.dst_offset += delta;
+ reduced->similarity = GetEquivalenceSimilarity(
+ old_view.image_index(), new_view.image_index(), target_affinities,
+ reduced->eq);
+ DCHECK_EQ(reduced->eq.dst_offset, current->eq.dst_end());
+ }
+ }
+ }
+
+ // Discard all candidates with similarity smaller than |min_similarity|.
+ base::EraseIf(candidates_,
+ [min_similarity](const EquivalenceCandidate& candidate) {
+ return candidate.similarity < min_similarity;
+ });
+}
+
+} // namespace zucchini
diff --git a/equivalence_map.h b/equivalence_map.h
new file mode 100644
index 0000000..8b716a1
--- /dev/null
+++ b/equivalence_map.h
@@ -0,0 +1,207 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_EQUIVALENCE_MAP_H_
+#define COMPONENTS_ZUCCHINI_EQUIVALENCE_MAP_H_
+
+#include <stddef.h>
+
+#include <limits>
+#include <vector>
+
+#include "components/zucchini/image_index.h"
+#include "components/zucchini/image_utils.h"
+#include "components/zucchini/targets_affinity.h"
+
+namespace zucchini {
+
+constexpr double kMismatchFatal = -std::numeric_limits<double>::infinity();
+
+class EncodedView;
+class EquivalenceSource;
+
+// Returns similarity score between a token (raw byte or first byte of a
+// reference) in |old_image_index| at |src| and a token in |new_image_index|
+// at |dst|. |targets_affinities| describes affinities for each target pool and
+// is used to evaluate similarity between references, hence it's size must be
+// equal to the number of pools in both |old_image_index| and |new_image_index|.
+// Both |src| and |dst| must refer to tokens in |old_image_index| and
+// |new_image_index|.
+double GetTokenSimilarity(
+ const ImageIndex& old_image_index,
+ const ImageIndex& new_image_index,
+ const std::vector<TargetsAffinity>& targets_affinities,
+ offset_t src,
+ offset_t dst);
+
+// Returns a similarity score between content in |old_image_index| and
+// |new_image_index| at regions described by |equivalence|, using
+// |targets_affinities| to evaluate similarity between references.
+double GetEquivalenceSimilarity(
+ const ImageIndex& old_image_index,
+ const ImageIndex& new_image_index,
+ const std::vector<TargetsAffinity>& targets_affinities,
+ const Equivalence& equivalence);
+
+// Extends |equivalence| forward and returns the result. This is related to
+// VisitEquivalenceSeed().
+EquivalenceCandidate ExtendEquivalenceForward(
+ const ImageIndex& old_image_index,
+ const ImageIndex& new_image_index,
+ const std::vector<TargetsAffinity>& targets_affinities,
+ const EquivalenceCandidate& equivalence,
+ double min_similarity);
+
+// Extends |equivalence| backward and returns the result. This is related to
+// VisitEquivalenceSeed().
+EquivalenceCandidate ExtendEquivalenceBackward(
+ const ImageIndex& old_image_index,
+ const ImageIndex& new_image_index,
+ const std::vector<TargetsAffinity>& targets_affinities,
+ const EquivalenceCandidate& equivalence,
+ double min_similarity);
+
+// Creates an equivalence, starting with |src| and |dst| as offset hint, and
+// extends it both forward and backward, trying to maximise similarity between
+// |old_image_index| and |new_image_index|, and returns the result.
+// |targets_affinities| is used to evaluate similarity between references.
+// |min_similarity| describes the minimum acceptable similarity score and is
+// used as threshold to discard bad equivalences.
+EquivalenceCandidate VisitEquivalenceSeed(
+ const ImageIndex& old_image_index,
+ const ImageIndex& new_image_index,
+ const std::vector<TargetsAffinity>& targets_affinities,
+ offset_t src,
+ offset_t dst,
+ double min_similarity);
+
+// Container of pruned equivalences used to map offsets from |old_image| to
+// offsets in |new_image|. Equivalences are pruned by cropping smaller
+// equivalences to avoid overlaps, to make the equivalence map (for covered
+// bytes in |old_image| and |new_image|) one-to-one.
+class OffsetMapper {
+ public:
+ using const_iterator = std::vector<Equivalence>::const_iterator;
+
+ // Constructors for various data sources. "Old" and "new" image sizes are
+ // needed for bounds checks and to handle dangling targets.
+ // - From a list of |equivalences|, already sorted (by |src_offset|) and
+ // pruned, useful for tests.
+ OffsetMapper(std::vector<Equivalence>&& equivalences,
+ offset_t old_image_size,
+ offset_t new_image_size);
+ // - From a generator, useful for Zucchini-apply.
+ OffsetMapper(EquivalenceSource&& equivalence_source,
+ offset_t old_image_size,
+ offset_t new_image_size);
+ // - From an EquivalenceMap that needs to be processed, useful for
+ // Zucchini-gen.
+ OffsetMapper(const EquivalenceMap& equivalence_map,
+ offset_t old_image_size,
+ offset_t new_image_size);
+ ~OffsetMapper();
+
+ size_t size() const { return equivalences_.size(); }
+ const_iterator begin() const { return equivalences_.begin(); }
+ const_iterator end() const { return equivalences_.end(); }
+
+ // Returns naive extended forward-projection of "old" |offset| that follows
+ // |eq|'s delta. |eq| needs not cover |offset|.
+ // - Averts underflow / overflow by clamping to |[0, new_image_size_)|.
+ // - However, |offset| is *not* restricted to |[0, old_image_size_)|; the
+ // caller must to make the check (hence "naive").
+ offset_t NaiveExtendedForwardProject(const Equivalence& unit,
+ offset_t offset) const;
+
+ // Returns an offset in |new_image| corresponding to |offset| in |old_image|.
+ // Assumes |equivalences_| to be non-empty. Cases:
+ // - If |offset| is covered (i.e., in an "old" block), then use the delta of
+ // the (unique) equivalence unit that covers |offset|.
+ // - If |offset| is non-covered, but in |[0, old_image_size_)|, then find the
+ // nearest "old" block, use its delta, and avert underflow / overflow by
+ // clamping the result to |[0, new_image_size_)|.
+ // - If |offset| is >= |new_image_size_| (a "fake offset"), then use
+ // |new_image_size_ - old_image_size_| as the delta.
+ offset_t ExtendedForwardProject(offset_t offset) const;
+
+ // Given sorted |offsets|, applies a projection in-place of all offsets that
+ // are part of a pruned equivalence from |old_image| to |new_image|. Other
+ // offsets are removed from |offsets|.
+ void ForwardProjectAll(std::vector<offset_t>* offsets) const;
+
+ // Accessor for testing.
+ const std::vector<Equivalence> equivalences() const { return equivalences_; }
+
+ // Sorts |equivalences| by |src_offset| and removes all source overlaps; so a
+ // source location that was covered by some Equivalence would become covered
+ // by exactly one Equivalence. Moreover, for the offset, the equivalence
+ // corresponds to the largest (pre-pruning) covering Equivalence, and in case
+ // of a tie, the Equivalence with minimal |src_offset|. |equivalences| may
+ // change in size since empty Equivalences are removed.
+ static void PruneEquivalencesAndSortBySource(
+ std::vector<Equivalence>* equivalences);
+
+ private:
+ // |equivalences_| is pruned, i.e., no "old" blocks overlap (and no "new"
+ // block overlaps). Also, it is sorted by "old" offsets.
+ std::vector<Equivalence> equivalences_;
+ const offset_t old_image_size_;
+ const offset_t new_image_size_;
+};
+
+// Container of equivalences between |old_image_index| and |new_image_index|,
+// sorted by |Equivalence::dst_offset|, only used during patch generation.
+class EquivalenceMap {
+ public:
+ using const_iterator = std::vector<EquivalenceCandidate>::const_iterator;
+
+ EquivalenceMap();
+ // Initializes the object with |equivalences|.
+ explicit EquivalenceMap(std::vector<EquivalenceCandidate>&& candidates);
+ EquivalenceMap(EquivalenceMap&&);
+ EquivalenceMap(const EquivalenceMap&) = delete;
+ ~EquivalenceMap();
+
+ // Finds relevant equivalences between |old_view| and |new_view|, using
+ // suffix array |old_sa| computed from |old_view| and using
+ // |targets_affinities| to evaluate similarity between references. This
+ // function is not symmetric. Equivalences might overlap in |old_view|, but
+ // not in |new_view|. It tries to maximize accumulated similarity within each
+ // equivalence, while maximizing |new_view| coverage. The minimum similarity
+ // of an equivalence is given by |min_similarity|.
+ void Build(const std::vector<offset_t>& old_sa,
+ const EncodedView& old_view,
+ const EncodedView& new_view,
+ const std::vector<TargetsAffinity>& targets_affinities,
+ double min_similarity);
+
+ size_t size() const { return candidates_.size(); }
+ const_iterator begin() const { return candidates_.begin(); }
+ const_iterator end() const { return candidates_.end(); }
+
+ private:
+ // Discovers equivalence candidates between |old_view| and |new_view| and
+ // stores them in the object. Note that resulting candidates are not sorted
+ // and might be overlapping in new image.
+ void CreateCandidates(const std::vector<offset_t>& old_sa,
+ const EncodedView& old_view,
+ const EncodedView& new_view,
+ const std::vector<TargetsAffinity>& targets_affinities,
+ double min_similarity);
+ // Sorts candidates by their offset in new image.
+ void SortByDestination();
+ // Visits |candidates_| (sorted by |dst_offset|) and remove all destination
+ // overlaps. Candidates with low similarity scores are more likely to be
+ // shrunken. Unfit candidates may be removed.
+ void Prune(const EncodedView& old_view,
+ const EncodedView& new_view,
+ const std::vector<TargetsAffinity>& targets_affinities,
+ double min_similarity);
+
+ std::vector<EquivalenceCandidate> candidates_;
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_EQUIVALENCE_MAP_H_
diff --git a/equivalence_map_unittest.cc b/equivalence_map_unittest.cc
new file mode 100644
index 0000000..b3a4ea4
--- /dev/null
+++ b/equivalence_map_unittest.cc
@@ -0,0 +1,635 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/equivalence_map.h"
+
+#include <cstring>
+#include <deque>
+#include <map>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "components/zucchini/encoded_view.h"
+#include "components/zucchini/image_index.h"
+#include "components/zucchini/suffix_array.h"
+#include "components/zucchini/targets_affinity.h"
+#include "components/zucchini/test_disassembler.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+namespace {
+
+using OffsetVector = std::vector<offset_t>;
+
+// Make all references 2 bytes long.
+constexpr offset_t kReferenceSize = 2;
+
+// Creates and initialize an ImageIndex from |a| and with 2 types of references.
+// The result is populated with |refs0| and |refs1|. |a| is expected to be a
+// string literal valid for the lifetime of the object.
+ImageIndex MakeImageIndexForTesting(const char* a,
+ std::vector<Reference>&& refs0,
+ std::vector<Reference>&& refs1) {
+ TestDisassembler disasm(
+ {kReferenceSize, TypeTag(0), PoolTag(0)}, std::move(refs0),
+ {kReferenceSize, TypeTag(1), PoolTag(0)}, std::move(refs1),
+ {kReferenceSize, TypeTag(2), PoolTag(1)}, {});
+
+ ImageIndex image_index(
+ ConstBufferView(reinterpret_cast<const uint8_t*>(a), std::strlen(a)));
+
+ EXPECT_TRUE(image_index.Initialize(&disasm));
+ return image_index;
+}
+
+std::vector<TargetsAffinity> MakeTargetsAffinitiesForTesting(
+ const ImageIndex& old_image_index,
+ const ImageIndex& new_image_index,
+ const EquivalenceMap& equivalence_map) {
+ std::vector<TargetsAffinity> target_affinities(old_image_index.PoolCount());
+ for (const auto& old_pool_tag_and_targets : old_image_index.target_pools()) {
+ PoolTag pool_tag = old_pool_tag_and_targets.first;
+ target_affinities[pool_tag.value()].InferFromSimilarities(
+ equivalence_map, old_pool_tag_and_targets.second.targets(),
+ new_image_index.pool(pool_tag).targets());
+ }
+ return target_affinities;
+}
+
+} // namespace
+
+TEST(EquivalenceMapTest, GetTokenSimilarity) {
+ ImageIndex old_index = MakeImageIndexForTesting(
+ "ab1122334455", {{2, 0}, {4, 1}, {6, 2}, {8, 2}}, {{10, 3}});
+ // Note: {4, 1} -> {6, 3} and {6, 2} -> {4, 1}, then result is sorted.
+ ImageIndex new_index = MakeImageIndexForTesting(
+ "a11b33224455", {{1, 0}, {4, 1}, {6, 3}, {8, 1}}, {{10, 2}});
+ std::vector<TargetsAffinity> affinities = MakeTargetsAffinitiesForTesting(
+ old_index, new_index,
+ EquivalenceMap({{{0, 0, 1}, 1.0}, {{1, 3, 1}, 1.0}}));
+
+ // Raw match.
+ EXPECT_LT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 0, 0));
+ // Raw mismatch.
+ EXPECT_GT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 0, 1));
+ EXPECT_GT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 1, 0));
+
+ // Type mismatch.
+ EXPECT_EQ(kMismatchFatal,
+ GetTokenSimilarity(old_index, new_index, affinities, 0, 1));
+ EXPECT_EQ(kMismatchFatal,
+ GetTokenSimilarity(old_index, new_index, affinities, 2, 0));
+ EXPECT_EQ(kMismatchFatal,
+ GetTokenSimilarity(old_index, new_index, affinities, 2, 10));
+ EXPECT_EQ(kMismatchFatal,
+ GetTokenSimilarity(old_index, new_index, affinities, 10, 1));
+
+ // Reference strong match.
+ EXPECT_LT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 2, 1));
+ EXPECT_LT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 4, 6));
+
+ // Reference weak match.
+ EXPECT_LT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 6, 4));
+ EXPECT_LT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 6, 8));
+ EXPECT_LT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 8, 4));
+
+ // Weak match is not greater than strong match.
+ EXPECT_LE(GetTokenSimilarity(old_index, new_index, affinities, 6, 4),
+ GetTokenSimilarity(old_index, new_index, affinities, 2, 1));
+
+ // Reference mismatch.
+ EXPECT_GT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 2, 4));
+ EXPECT_GT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 2, 6));
+}
+
+TEST(EquivalenceMapTest, GetEquivalenceSimilarity) {
+ ImageIndex image_index =
+ MakeImageIndexForTesting("abcdef1122", {{6, 0}}, {{8, 1}});
+ std::vector<TargetsAffinity> affinities =
+ MakeTargetsAffinitiesForTesting(image_index, image_index, {});
+
+ // Sanity check. These are no-op with length-0 equivalences.
+ EXPECT_EQ(0.0, GetEquivalenceSimilarity(image_index, image_index, affinities,
+ {0, 0, 0}));
+ EXPECT_EQ(0.0, GetEquivalenceSimilarity(image_index, image_index, affinities,
+ {0, 3, 0}));
+ EXPECT_EQ(0.0, GetEquivalenceSimilarity(image_index, image_index, affinities,
+ {3, 0, 0}));
+
+ // Now examine larger equivalences.
+ EXPECT_LT(0.0, GetEquivalenceSimilarity(image_index, image_index, affinities,
+ {0, 0, 3}));
+ EXPECT_GE(0.0, GetEquivalenceSimilarity(image_index, image_index, affinities,
+ {0, 3, 3}));
+ EXPECT_GE(0.0, GetEquivalenceSimilarity(image_index, image_index, affinities,
+ {3, 0, 3}));
+
+ EXPECT_LT(0.0, GetEquivalenceSimilarity(image_index, image_index, affinities,
+ {6, 6, 4}));
+}
+
+TEST(EquivalenceMapTest, ExtendEquivalenceForward) {
+ auto test_extend_forward =
+ [](const ImageIndex old_index, const ImageIndex new_index,
+ const EquivalenceCandidate& equivalence, double base_similarity) {
+ return ExtendEquivalenceForward(
+ old_index, new_index,
+ MakeTargetsAffinitiesForTesting(old_index, new_index, {}),
+ equivalence, base_similarity)
+ .eq;
+ };
+
+ EXPECT_EQ(Equivalence({0, 0, 0}),
+ test_extend_forward(MakeImageIndexForTesting("", {}, {}),
+ MakeImageIndexForTesting("", {}, {}),
+ {{0, 0, 0}, 0.0}, 8.0));
+
+ EXPECT_EQ(Equivalence({0, 0, 0}),
+ test_extend_forward(MakeImageIndexForTesting("banana", {}, {}),
+ MakeImageIndexForTesting("zzzz", {}, {}),
+ {{0, 0, 0}, 0.0}, 8.0));
+
+ EXPECT_EQ(Equivalence({0, 0, 6}),
+ test_extend_forward(MakeImageIndexForTesting("banana", {}, {}),
+ MakeImageIndexForTesting("banana", {}, {}),
+ {{0, 0, 0}, 0.0}, 8.0));
+
+ EXPECT_EQ(Equivalence({2, 2, 4}),
+ test_extend_forward(MakeImageIndexForTesting("banana", {}, {}),
+ MakeImageIndexForTesting("banana", {}, {}),
+ {{2, 2, 0}, 0.0}, 8.0));
+
+ EXPECT_EQ(Equivalence({0, 0, 6}),
+ test_extend_forward(MakeImageIndexForTesting("bananaxx", {}, {}),
+ MakeImageIndexForTesting("bananayy", {}, {}),
+ {{0, 0, 0}, 0.0}, 8.0));
+
+ EXPECT_EQ(
+ Equivalence({0, 0, 8}),
+ test_extend_forward(MakeImageIndexForTesting("banana11", {{6, 0}}, {}),
+ MakeImageIndexForTesting("banana11", {{6, 0}}, {}),
+ {{0, 0, 0}, 0.0}, 8.0));
+
+ EXPECT_EQ(
+ Equivalence({0, 0, 6}),
+ test_extend_forward(MakeImageIndexForTesting("banana11", {{6, 0}}, {}),
+ MakeImageIndexForTesting("banana22", {}, {{6, 0}}),
+ {{0, 0, 0}, 0.0}, 8.0));
+
+ EXPECT_EQ(
+ Equivalence({0, 0, 17}),
+ test_extend_forward(MakeImageIndexForTesting("bananaxxpineapple", {}, {}),
+ MakeImageIndexForTesting("bananayypineapple", {}, {}),
+ {{0, 0, 0}, 0.0}, 8.0));
+
+ EXPECT_EQ(
+ Equivalence({3, 0, 19}),
+ test_extend_forward(
+ MakeImageIndexForTesting("foobanana11xxpineapplexx", {{9, 0}}, {}),
+ MakeImageIndexForTesting("banana11yypineappleyy", {{6, 0}}, {}),
+ {{3, 0, 0}, 0.0}, 8.0));
+}
+
+TEST(EquivalenceMapTest, ExtendEquivalenceBackward) {
+ auto test_extend_backward =
+ [](const ImageIndex old_index, const ImageIndex new_index,
+ const EquivalenceCandidate& equivalence, double base_similarity) {
+ return ExtendEquivalenceBackward(
+ old_index, new_index,
+ MakeTargetsAffinitiesForTesting(old_index, new_index, {}),
+ equivalence, base_similarity)
+ .eq;
+ };
+
+ EXPECT_EQ(Equivalence({0, 0, 0}),
+ test_extend_backward(MakeImageIndexForTesting("", {}, {}),
+ MakeImageIndexForTesting("", {}, {}),
+ {{0, 0, 0}, 0.0}, 8.0));
+
+ EXPECT_EQ(Equivalence({6, 4, 0}),
+ test_extend_backward(MakeImageIndexForTesting("banana", {}, {}),
+ MakeImageIndexForTesting("zzzz", {}, {}),
+ {{6, 4, 0}, 0.0}, 8.0));
+
+ EXPECT_EQ(Equivalence({0, 0, 6}),
+ test_extend_backward(MakeImageIndexForTesting("banana", {}, {}),
+ MakeImageIndexForTesting("banana", {}, {}),
+ {{6, 6, 0}, 0.0}, 8.0));
+
+ EXPECT_EQ(Equivalence({2, 2, 6}),
+ test_extend_backward(MakeImageIndexForTesting("xxbanana", {}, {}),
+ MakeImageIndexForTesting("yybanana", {}, {}),
+ {{8, 8, 0}, 0.0}, 8.0));
+
+ EXPECT_EQ(
+ Equivalence({0, 0, 8}),
+ test_extend_backward(MakeImageIndexForTesting("11banana", {{0, 0}}, {}),
+ MakeImageIndexForTesting("11banana", {{0, 0}}, {}),
+ {{8, 8, 0}, 0.0}, 8.0));
+
+ EXPECT_EQ(
+ Equivalence({2, 2, 6}),
+ test_extend_backward(MakeImageIndexForTesting("11banana", {{0, 0}}, {}),
+ MakeImageIndexForTesting("22banana", {}, {{0, 0}}),
+ {{8, 8, 0}, 0.0}, 8.0));
+
+ EXPECT_EQ(Equivalence({0, 0, 17}),
+ test_extend_backward(
+ MakeImageIndexForTesting("bananaxxpineapple", {}, {}),
+ MakeImageIndexForTesting("bananayypineapple", {}, {}),
+ {{8, 8, 9}, 9.0}, 8.0));
+
+ EXPECT_EQ(
+ Equivalence({3, 0, 19}),
+ test_extend_backward(
+ MakeImageIndexForTesting("foobanana11xxpineapplexx", {{9, 0}}, {}),
+ MakeImageIndexForTesting("banana11yypineappleyy", {{6, 0}}, {}),
+ {{22, 19, 0}, 0.0}, 8.0));
+}
+
+TEST(EquivalenceMapTest, PruneEquivalencesAndSortBySource) {
+ auto PruneEquivalencesAndSortBySourceTest =
+ [](std::vector<Equivalence>&& equivalences) {
+ OffsetMapper::PruneEquivalencesAndSortBySource(&equivalences);
+ return std::move(equivalences);
+ };
+
+ EXPECT_EQ(std::vector<Equivalence>(),
+ PruneEquivalencesAndSortBySourceTest({}));
+ EXPECT_EQ(std::vector<Equivalence>({{0, 10, 1}}),
+ PruneEquivalencesAndSortBySourceTest({{0, 10, 1}}));
+ EXPECT_EQ(std::vector<Equivalence>(),
+ PruneEquivalencesAndSortBySourceTest({{0, 10, 0}}));
+ EXPECT_EQ(std::vector<Equivalence>({{0, 10, 1}, {1, 11, 1}}),
+ PruneEquivalencesAndSortBySourceTest({{0, 10, 1}, {1, 11, 1}}));
+ EXPECT_EQ(std::vector<Equivalence>({{0, 10, 2}, {2, 13, 1}}),
+ PruneEquivalencesAndSortBySourceTest({{0, 10, 2}, {1, 12, 2}}));
+ EXPECT_EQ(std::vector<Equivalence>({{0, 10, 2}}),
+ PruneEquivalencesAndSortBySourceTest({{0, 10, 2}, {1, 12, 1}}));
+ EXPECT_EQ(std::vector<Equivalence>({{0, 10, 2}, {2, 14, 1}}),
+ PruneEquivalencesAndSortBySourceTest({{0, 10, 2}, {1, 13, 2}}));
+ EXPECT_EQ(std::vector<Equivalence>({{0, 10, 1}, {1, 12, 3}}),
+ PruneEquivalencesAndSortBySourceTest({{0, 10, 2}, {1, 12, 3}}));
+ EXPECT_EQ(std::vector<Equivalence>({{0, 10, 3}, {3, 16, 2}}),
+ PruneEquivalencesAndSortBySourceTest(
+ {{0, 10, 3}, {1, 13, 3}, {3, 16, 2}})); // Pruning is greedy
+
+ // Consider following pattern that may cause O(n^2) behavior if not handled
+ // properly.
+ // ***************
+ // **********
+ // ********
+ // ******
+ // ****
+ // **
+ // ***************
+ // This test case makes sure the function does not stall on a large instance
+ // of this pattern.
+ EXPECT_EQ(std::vector<Equivalence>({{0, 10, +300000}, {300000, 30, +300000}}),
+ PruneEquivalencesAndSortBySourceTest([] {
+ std::vector<Equivalence> equivalenses;
+ equivalenses.push_back({0, 10, +300000});
+ for (offset_t i = 0; i < 100000; ++i)
+ equivalenses.push_back({200000 + i, 20, +200000 - 2 * i});
+ equivalenses.push_back({300000, 30, +300000});
+ return equivalenses;
+ }()));
+}
+
+TEST(EquivalenceMapTest, NaiveExtendedForwardProject) {
+ constexpr size_t kOldImageSize = 1000U;
+ constexpr size_t kNewImageSize = 1000U;
+ OffsetMapper offset_mapper(std::vector<Equivalence>(), kOldImageSize,
+ kNewImageSize);
+
+ // Convenience function to declutter.
+ auto project = [&offset_mapper](const Equivalence& eq, offset_t offset) {
+ return offset_mapper.NaiveExtendedForwardProject(eq, offset);
+ };
+
+ // Equivalence with delta = 0.
+ Equivalence eq_stay = {10, 10, +5}; // [10,15) -> [10,15).
+ for (offset_t offset = 0U; offset < 1000U; ++offset) {
+ EXPECT_EQ(offset, project(eq_stay, offset));
+ }
+ // Saturate since result would overflow "new".
+ EXPECT_EQ(999U, project(eq_stay, 1000U));
+ EXPECT_EQ(999U, project(eq_stay, 2000U));
+ EXPECT_EQ(999U, project(eq_stay, kOffsetBound - 1));
+
+ // Equivalence with delta = -10.
+ Equivalence eq_dec = {20, 10, +12}; // [20,32) --> [10,22).
+ // Offsets in "old" block.
+ EXPECT_EQ(10U, project(eq_dec, 20U));
+ EXPECT_EQ(11U, project(eq_dec, 21U));
+ EXPECT_EQ(21U, project(eq_dec, 31U));
+ // Offsets before "old" block, no underflow
+ EXPECT_EQ(9U, project(eq_dec, 19U));
+ EXPECT_EQ(1U, project(eq_dec, 11U));
+ EXPECT_EQ(0U, project(eq_dec, 10U));
+ // Offsets before "old" block, underflow (possible since delta < 0).
+ EXPECT_EQ(0U, project(eq_dec, 9U));
+ EXPECT_EQ(0U, project(eq_dec, 5U));
+ EXPECT_EQ(0U, project(eq_dec, 0U));
+ // Offsets after "old" block, no overflow.
+ EXPECT_EQ(20U, project(eq_dec, 30U));
+ EXPECT_EQ(64U, project(eq_dec, 74U));
+ EXPECT_EQ(90U, project(eq_dec, 100U));
+ EXPECT_EQ(490U, project(eq_dec, 500U));
+ EXPECT_EQ(999U, project(eq_dec, 1009U));
+ // Offsets after "old" block, overflow.
+ EXPECT_EQ(999U, project(eq_dec, 1010U));
+ EXPECT_EQ(999U, project(eq_dec, 2000U));
+ EXPECT_EQ(999U, project(eq_dec, kOffsetBound - 1));
+
+ // Equivalence with delta = +10.
+ Equivalence eq_inc = {7, 17, +80}; // [7,87) --> [17,97).
+ // Offsets in "old" block.
+ EXPECT_EQ(17U, project(eq_inc, 7U));
+ EXPECT_EQ(60U, project(eq_inc, 50U));
+ EXPECT_EQ(96U, project(eq_inc, 86U));
+ // Offsets before "old" block, underflow impossible since delta >= 0.
+ EXPECT_EQ(16U, project(eq_inc, 6U));
+ EXPECT_EQ(10U, project(eq_inc, 0U));
+ // Offsets after "old" block, no overflow.
+ EXPECT_EQ(97U, project(eq_inc, 87U));
+ EXPECT_EQ(510U, project(eq_inc, 500U));
+ EXPECT_EQ(999U, project(eq_inc, 989U));
+ // Offsets after "old" block, overflow.
+ EXPECT_EQ(999U, project(eq_inc, 990U));
+ EXPECT_EQ(999U, project(eq_inc, 2000U));
+ EXPECT_EQ(999U, project(eq_inc, kOffsetBound - 1));
+}
+
+TEST(EquivalenceMapTest, ExtendedForwardProject) {
+ // EquivalenceMaps provided must be sorted by "old" offset, and pruned.
+ // [0,2) --> [10,12), [2,3) --> [13,14), [4,6) --> [16,18).
+ OffsetMapper offset_mapper1({{0, 10, +2}, {2, 13, +1}, {4, 16, +2}}, 20U,
+ 25U);
+ EXPECT_EQ(10U, offset_mapper1.ExtendedForwardProject(0U));
+ EXPECT_EQ(11U, offset_mapper1.ExtendedForwardProject(1U));
+ EXPECT_EQ(13U, offset_mapper1.ExtendedForwardProject(2U));
+ EXPECT_EQ(14U, offset_mapper1.ExtendedForwardProject(3U)); // Previous equiv.
+ EXPECT_EQ(16U, offset_mapper1.ExtendedForwardProject(4U));
+ EXPECT_EQ(17U, offset_mapper1.ExtendedForwardProject(5U));
+ EXPECT_EQ(18U, offset_mapper1.ExtendedForwardProject(6U)); // Previous equiv.
+ // Fake offsets.
+ EXPECT_EQ(25U, offset_mapper1.ExtendedForwardProject(20U));
+ EXPECT_EQ(26U, offset_mapper1.ExtendedForwardProject(21U));
+ EXPECT_EQ(1005U, offset_mapper1.ExtendedForwardProject(1000U));
+ EXPECT_EQ(kOffsetBound - 1,
+ offset_mapper1.ExtendedForwardProject(kOffsetBound - 1));
+
+ // [0,2) --> [10,12), [13,14) --> [2,3), [16,18) --> [4,6).
+ OffsetMapper offset_mapper2({{0, 10, +2}, {13, 2, +1}, {16, 4, +2}}, 25U,
+ 20U);
+ EXPECT_EQ(10U, offset_mapper2.ExtendedForwardProject(0U));
+ EXPECT_EQ(11U, offset_mapper2.ExtendedForwardProject(1U));
+ EXPECT_EQ(2U, offset_mapper2.ExtendedForwardProject(13U));
+ EXPECT_EQ(3U, offset_mapper2.ExtendedForwardProject(14U)); // Previous equiv.
+ EXPECT_EQ(4U, offset_mapper2.ExtendedForwardProject(16U));
+ EXPECT_EQ(5U, offset_mapper2.ExtendedForwardProject(17U));
+ EXPECT_EQ(6U, offset_mapper2.ExtendedForwardProject(18U)); // Previous equiv.
+ // Fake offsets.
+ EXPECT_EQ(20U, offset_mapper2.ExtendedForwardProject(25U));
+ EXPECT_EQ(21U, offset_mapper2.ExtendedForwardProject(26U));
+ EXPECT_EQ(995U, offset_mapper2.ExtendedForwardProject(1000U));
+ EXPECT_EQ(kOffsetBound - 1 - 5,
+ offset_mapper2.ExtendedForwardProject(kOffsetBound - 1));
+}
+
+TEST(EquivalenceMapTest, ExtendedForwardProjectEncoding) {
+ // Tests OffsetMapper::ExtendedForwardProject(), which maps every "old" offset
+ // to a "new" offset, with possible overlap (even though blocks don't
+ // overlap). Not testing real offsets only (no fake offsets).
+ // |old_spec| is a string like "<<aaAAaabbBBbcCCc>>":
+ // - Upper case letters are covered "old" offsets.
+ // - Lower case letters are non-covered offsets that are properly mapped using
+ // nearest "old" block.
+ // - '<' denotes underflow (clamped to 0).
+ // - '>' denotes overflow (clampled to "new" size - 1).
+ // |new_spec| is a string like "aaAA(ab)(ab)BBb..cCCc":
+ // - Upper and lower case letters are mapped "new" targets, occurring in the
+ // order that they appear in |old_spec|.
+ // - '.' are "new" offsets that appear as output.
+ // - '(' and ')' surround a single "new" location that are repeated as output.
+ int case_no = 0;
+ auto run_test = [&case_no](std::vector<Equivalence>&& equivalences,
+ const std::string& old_spec,
+ const std::string& new_spec) {
+ const size_t old_size = old_spec.length();
+ // Build expected "new" offsets, queue up for each letter.
+ std::map<char, std::deque<offset_t>> expected;
+ offset_t cur_new_offset = 0;
+ char state = ')'; // ')' = increase offset, '(' = stay.
+ for (char ch : new_spec) {
+ if (ch == '(' || ch == ')')
+ state = ch;
+ else
+ expected[ch].push_back(cur_new_offset);
+ cur_new_offset += (state == ')') ? 1 : 0;
+ }
+ const size_t new_size = cur_new_offset;
+ // Forward-project for each "old" index, pull from queue from matching
+ // letter, and compare.
+ OffsetMapper offset_mapper(std::move(equivalences), old_size, new_size);
+ for (offset_t old_offset = 0; old_offset < old_size; ++old_offset) {
+ offset_t new_offset = offset_mapper.ExtendedForwardProject(old_offset);
+ char ch = old_spec[old_offset];
+ if (ch == '<') { // Special case: Underflow.
+ EXPECT_EQ(0U, new_offset) << "in case " << case_no;
+ } else if (ch == '>') { // Special case: Overflow.
+ EXPECT_EQ(static_cast<offset_t>(new_size - 1), new_offset)
+ << "in case " << case_no;
+ } else {
+ std::deque<offset_t>& q = expected[ch];
+ ASSERT_FALSE(q.empty());
+ EXPECT_EQ(q.front(), new_offset) << "in case " << case_no;
+ q.pop_front();
+ if (q.empty())
+ expected.erase(ch);
+ }
+ }
+ // Clear useless '.', and ensure everything is consumed.
+ expected.erase('.');
+ EXPECT_TRUE(expected.empty()) << "in case " << case_no;
+ ++case_no;
+ };
+
+ // Trivial: [5,9) --> [5,9).
+ run_test({{5, 5, +4}}, "aaaaaAAAAaaaaa", "aaaaaAAAAaaaaa");
+ // Swap: [0,4) --> [6,10), [4,10) --> [0,6).
+ run_test({{0, 6, +4}, {4, 0, +6}}, "AAAABBBBBB", "BBBBBBAAAA");
+ // Overlap: [0,4) --> [2,6), [4,10) --> [3,9).
+ run_test({{0, 2, +4}, {4, 3, +6}}, "AAAABBBBBB", "..A(AB)(AB)(AB)BBB.");
+ // Converge: [1,3) --> [2,4), [7,8) --> [6,7).
+ run_test({{1, 2, +2}, {7, 6, +1}}, "aAAaabbBbb", ".aAA(ab)(ab)Bbb.");
+ // Converge with tie-breaker: [1,3) --> [2,4), [8,9) --> [7,8).
+ run_test({{1, 2, +2}, {8, 7, +1}}, "aAAaaabbBb", ".aAAa(ab)(ab)Bb.");
+ // Shift left: [6,8) --> [2,4): Underflow occurs.
+ run_test({{6, 2, +2}}, "<<<<aaAAaa", "aaAAaa....");
+ // Shift right: [2,5) --> [6,9): Overflow occurs.
+ run_test({{2, 6, +3}}, "aaAAAa>>>>", "....aaAAAa");
+ // Diverge: [3,5) --> [1,3], [7,9) --> [9,11).
+ run_test({{3, 1, +2}, {7, 9, +2}}, "<<aAAabBBb>>", "aAAa....bBBb");
+ // Pile-up: [0,2) --> [7,9), [9,11) --> [9,11), [18,20) --> [11,13).
+ run_test({{0, 7, +2}, {9, 9, +2}, {18, 11, +2}}, "AAaaaabbbBBbbbbcccCC",
+ "......b(Ab)(Abc)(Bac)(Bac)(Cab)(Cab)bb.....");
+ // Inverse pile-up: [7,9) --> [0,2), [9,11) --> [9,11), [13,15) --> [18,20).
+ run_test({{7, 0, +2}, {9, 9, +2}, {11, 18, +2}}, "<<<<<<<AABBCC>>>>>>>",
+ "AA.......BB.......CC");
+ // Sparse rotate: [3,4) -> [10,11), [10,11) --> [17,18), [17,18) --> [3,4).
+ run_test({{3, 10, +1}, {10, 17, +1}, {17, 3, +1}}, "aaaAaaabbbBbbbcccCccc",
+ "cccCcccaaaAaaabbbBbbb");
+ // Messy swap: [2,4) --> [10,12), [12,16) --> [3,7).
+ run_test({{2, 10, +2}, {12, 3, +4}}, "aaAAaa>><bbbBBBBbb",
+ "bbbBBBBb(ab)aAAaa");
+ // Messy expand: [6,8) --> [3,5), [10,11) -> [11,12), [14,17) --> [16,19).
+ run_test({{6, 3, +2}, {10, 11, +1}, {14, 16, +3}}, "<<<aaaAAabBbbcCCCc>>>>>",
+ "aaaAAa....bBbb.cCCCc");
+ // Interleave: [1,2) --> [0,1), [5,6) --> [10,11), [6,8) --> [3,5),
+ // [11,13) --> [12,14), [14,16) --> [6,8), [17,18) --> [17,18).
+ run_test({{1, 0, +1},
+ {5, 10, +1},
+ {6, 3, +2},
+ {11, 12, +2},
+ {14, 6, +2},
+ {17, 17, +1}},
+ "<AaabBCCccdDDdEEeFf>", "AaaCCc(Ec)EebBdDDd..Ff");
+}
+
+TEST(EquivalenceMapTest, ForwardProjectAll) {
+ auto ForwardProjectAllTest = [](const OffsetMapper& offset_mapper,
+ std::initializer_list<offset_t> offsets) {
+ OffsetVector offsets_vec(offsets);
+ offset_mapper.ForwardProjectAll(&offsets_vec);
+ return offsets_vec;
+ };
+
+ // [0,2) --> [10,12), [2,3) --> [13,14), [4,6) --> [16,18).
+ OffsetMapper offset_mapper1({{0, 10, +2}, {2, 13, +1}, {4, 16, +2}}, 100U,
+ 100U);
+ EXPECT_EQ(OffsetVector({10}), ForwardProjectAllTest(offset_mapper1, {0}));
+ EXPECT_EQ(OffsetVector({13}), ForwardProjectAllTest(offset_mapper1, {2}));
+ EXPECT_EQ(OffsetVector({}), ForwardProjectAllTest(offset_mapper1, {3}));
+ EXPECT_EQ(OffsetVector({10, 13}),
+ ForwardProjectAllTest(offset_mapper1, {0, 2}));
+ EXPECT_EQ(OffsetVector({11, 13, 17}),
+ ForwardProjectAllTest(offset_mapper1, {1, 2, 5}));
+ EXPECT_EQ(OffsetVector({11, 17}),
+ ForwardProjectAllTest(offset_mapper1, {1, 3, 5}));
+ EXPECT_EQ(OffsetVector({10, 11, 13, 16, 17}),
+ ForwardProjectAllTest(offset_mapper1, {0, 1, 2, 3, 4, 5, 6}));
+
+ // [0,2) --> [10,12), [13,14) --> [2,3), [16,18) --> [4,6).
+ OffsetMapper offset_mapper2({{0, 10, +2}, {13, 2, +1}, {16, 4, +2}}, 100U,
+ 100U);
+ EXPECT_EQ(OffsetVector({2}), ForwardProjectAllTest(offset_mapper2, {13}));
+ EXPECT_EQ(OffsetVector({10, 2}),
+ ForwardProjectAllTest(offset_mapper2, {0, 13}));
+ EXPECT_EQ(OffsetVector({11, 2, 5}),
+ ForwardProjectAllTest(offset_mapper2, {1, 13, 17}));
+ EXPECT_EQ(OffsetVector({11, 5}),
+ ForwardProjectAllTest(offset_mapper2, {1, 14, 17}));
+ EXPECT_EQ(OffsetVector({10, 11, 2, 4, 5}),
+ ForwardProjectAllTest(offset_mapper2, {0, 1, 13, 14, 16, 17, 18}));
+}
+
+TEST(EquivalenceMapTest, Build) {
+ auto test_build_equivalence = [](const ImageIndex old_index,
+ const ImageIndex new_index,
+ double minimum_similarity) {
+ auto affinities = MakeTargetsAffinitiesForTesting(old_index, new_index, {});
+
+ EncodedView old_view(old_index);
+ EncodedView new_view(new_index);
+
+ for (const auto& old_pool_tag_and_targets : old_index.target_pools()) {
+ PoolTag pool_tag = old_pool_tag_and_targets.first;
+ std::vector<uint32_t> old_labels;
+ std::vector<uint32_t> new_labels;
+ size_t label_bound = affinities[pool_tag.value()].AssignLabels(
+ 1.0, &old_labels, &new_labels);
+ old_view.SetLabels(pool_tag, std::move(old_labels), label_bound);
+ new_view.SetLabels(pool_tag, std::move(new_labels), label_bound);
+ }
+
+ std::vector<offset_t> old_sa =
+ MakeSuffixArray<InducedSuffixSort>(old_view, old_view.Cardinality());
+
+ EquivalenceMap equivalence_map;
+ equivalence_map.Build(old_sa, old_view, new_view, affinities,
+ minimum_similarity);
+
+ offset_t current_dst_offset = 0;
+ offset_t coverage = 0;
+ for (const auto& candidate : equivalence_map) {
+ EXPECT_GE(candidate.eq.dst_offset, current_dst_offset);
+ EXPECT_GT(candidate.eq.length, offset_t(0));
+ EXPECT_LE(candidate.eq.src_offset + candidate.eq.length,
+ old_index.size());
+ EXPECT_LE(candidate.eq.dst_offset + candidate.eq.length,
+ new_index.size());
+ EXPECT_GE(candidate.similarity, minimum_similarity);
+ current_dst_offset = candidate.eq.dst_offset;
+ coverage += candidate.eq.length;
+ }
+ return coverage;
+ };
+
+ EXPECT_EQ(0U,
+ test_build_equivalence(MakeImageIndexForTesting("", {}, {}),
+ MakeImageIndexForTesting("", {}, {}), 4.0));
+
+ EXPECT_EQ(0U, test_build_equivalence(
+ MakeImageIndexForTesting("", {}, {}),
+ MakeImageIndexForTesting("banana", {}, {}), 4.0));
+
+ EXPECT_EQ(0U,
+ test_build_equivalence(MakeImageIndexForTesting("banana", {}, {}),
+ MakeImageIndexForTesting("", {}, {}), 4.0));
+
+ EXPECT_EQ(0U, test_build_equivalence(
+ MakeImageIndexForTesting("banana", {}, {}),
+ MakeImageIndexForTesting("zzzz", {}, {}), 4.0));
+
+ EXPECT_EQ(6U, test_build_equivalence(
+ MakeImageIndexForTesting("banana", {}, {}),
+ MakeImageIndexForTesting("banana", {}, {}), 4.0));
+
+ EXPECT_EQ(6U, test_build_equivalence(
+ MakeImageIndexForTesting("bananaxx", {}, {}),
+ MakeImageIndexForTesting("bananayy", {}, {}), 4.0));
+
+ EXPECT_EQ(8U, test_build_equivalence(
+ MakeImageIndexForTesting("banana11", {{6, 0}}, {}),
+ MakeImageIndexForTesting("banana11", {{6, 0}}, {}), 4.0));
+
+ EXPECT_EQ(6U, test_build_equivalence(
+ MakeImageIndexForTesting("banana11", {{6, 0}}, {}),
+ MakeImageIndexForTesting("banana22", {}, {{6, 0}}), 4.0));
+
+ EXPECT_EQ(
+ 15U,
+ test_build_equivalence(
+ MakeImageIndexForTesting("banana11pineapple", {{6, 0}}, {}),
+ MakeImageIndexForTesting("banana22pineapple", {}, {{6, 0}}), 4.0));
+
+ EXPECT_EQ(
+ 15U,
+ test_build_equivalence(
+ MakeImageIndexForTesting("bananaxxxxxxxxpineapple", {}, {}),
+ MakeImageIndexForTesting("bananayyyyyyyypineapple", {}, {}), 4.0));
+
+ EXPECT_EQ(
+ 19U,
+ test_build_equivalence(
+ MakeImageIndexForTesting("foobanana11xxpineapplexx", {{9, 0}}, {}),
+ MakeImageIndexForTesting("banana11yypineappleyy", {{6, 0}}, {}),
+ 4.0));
+}
+
+} // namespace zucchini
diff --git a/fuzzers/BUILD.gn b/fuzzers/BUILD.gn
new file mode 100644
index 0000000..90c436e
--- /dev/null
+++ b/fuzzers/BUILD.gn
@@ -0,0 +1,210 @@
+# Copyright 2018 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import("//testing/libfuzzer/fuzzer_test.gni")
+import("//third_party/protobuf/proto_library.gni")
+
+static_library("zucchini_fuzz_utils") {
+ sources = [
+ "fuzz_utils.cc",
+ "fuzz_utils.h",
+ ]
+ deps = [
+ "//base",
+ "//components/zucchini:zucchini_lib",
+ ]
+}
+
+# To download the corpus for local fuzzing use:
+# gsutil -m rsync \
+# gs://clusterfuzz-corpus/libfuzzer/zucchini_disassembler_dex_fuzzer \
+# components/zucchini/fuzzing/testdata/disassembler_dex_fuzzer/
+fuzzer_test("zucchini_disassembler_dex_fuzzer") {
+ sources = [ "disassembler_dex_fuzzer.cc" ]
+ deps = [
+ "//base",
+ "//components/zucchini:zucchini_lib",
+ ]
+}
+
+# To download the corpus for local fuzzing use:
+# gsutil -m rsync \
+# gs://clusterfuzz-corpus/libfuzzer/zucchini_disassembler_win32_fuzzer \
+# components/zucchini/fuzzing/testdata/disassembler_win32_fuzzer/
+fuzzer_test("zucchini_disassembler_win32_fuzzer") {
+ sources = [ "disassembler_win32_fuzzer.cc" ]
+ deps = [
+ ":zucchini_fuzz_utils",
+ "//base",
+ "//components/zucchini:zucchini_lib",
+ ]
+}
+
+# To download the corpus for local fuzzing use:
+# gsutil -m rsync \
+# gs://clusterfuzz-corpus/libfuzzer/zucchini_disassembler_elf_fuzzer \
+# components/zucchini/fuzzing/testdata/disassembler_elf_fuzzer/
+fuzzer_test("zucchini_disassembler_elf_fuzzer") {
+ sources = [ "disassembler_elf_fuzzer.cc" ]
+ deps = [
+ ":zucchini_fuzz_utils",
+ "//base",
+ "//components/zucchini:zucchini_lib",
+ ]
+}
+
+fuzzer_test("zucchini_patch_fuzzer") {
+ sources = [ "patch_fuzzer.cc" ]
+ deps = [
+ "//base",
+ "//components/zucchini:zucchini_lib",
+ ]
+ seed_corpus = "testdata/patch_fuzzer"
+}
+
+proto_library("zucchini_file_pair_proto") {
+ sources = [ "file_pair.proto" ]
+}
+
+# Ensure protoc is available.
+# Disabled on Windows due to crbug/844826.
+if (current_toolchain == host_toolchain && !is_win) {
+ # Raw Apply Fuzzer Seed:
+ action("zucchini_raw_apply_seed") {
+ script = "generate_fuzzer_data.py"
+
+ args = [
+ "--raw",
+ "old_eventlog_provider.dll", # <old_file>
+ "new_eventlog_provider.dll", # <new_file>
+
+ # <patch_file> (temporary)
+ rebase_path(
+ "$target_gen_dir/testdata/apply_fuzzer/eventlog_provider.patch",
+ root_build_dir),
+
+ # <output_file>
+ rebase_path(
+ "$target_gen_dir/testdata/apply_fuzzer/raw_apply_seed_proto.bin",
+ root_build_dir),
+ ]
+
+ # Files depended upon.
+ sources = [
+ "create_seed_file_pair.py",
+ "testdata/new_eventlog_provider.dll",
+ "testdata/old_eventlog_provider.dll",
+ ]
+
+ # Outputs: necessary for validation.
+ outputs =
+ [ "$target_gen_dir/testdata/apply_fuzzer/raw_apply_seed_proto.bin" ]
+ deps = [
+ "//components/zucchini:zucchini",
+ "//third_party/protobuf:protoc",
+ ]
+ }
+
+ # ZTF Apply Fuzzer Seed:
+ action("zucchini_ztf_apply_seed") {
+ script = "generate_fuzzer_data.py"
+
+ # *.ztf files are expected to be valid ZTF format.
+ args = [
+ "old.ztf", # <old_file>
+ "new.ztf", # <new_file>
+
+ # <patch_file> (temporary)
+ rebase_path("$target_gen_dir/testdata/apply_fuzzer/ztf.patch",
+ root_build_dir),
+
+ # <output_file>
+ rebase_path(
+ "$target_gen_dir/testdata/apply_fuzzer/ztf_apply_seed_proto.bin",
+ root_build_dir),
+ ]
+
+ # Files depended upon.
+ sources = [
+ "create_seed_file_pair.py",
+ "testdata/new.ztf",
+ "testdata/old.ztf",
+ ]
+
+ # Outputs: necessary for validation.
+ outputs =
+ [ "$target_gen_dir/testdata/apply_fuzzer/ztf_apply_seed_proto.bin" ]
+ deps = [
+ "//components/zucchini:zucchini",
+ "//third_party/protobuf:protoc",
+ ]
+ }
+
+ # Apply Fuzzer:
+ fuzzer_test("zucchini_apply_fuzzer") {
+ sources = [ "apply_fuzzer.cc" ]
+ deps = [
+ ":zucchini_file_pair_proto",
+ "//base",
+ "//components/zucchini:zucchini_lib",
+ "//third_party/libprotobuf-mutator",
+ ]
+ seed_corpus = "$target_gen_dir/testdata/apply_fuzzer"
+ seed_corpus_deps = [
+ ":zucchini_raw_apply_seed",
+ ":zucchini_ztf_apply_seed",
+ ]
+ }
+
+ # For Gen fuzzers seeds can be created from this directory with:
+ # python create_seed_file_pair.py <protoc> <old file> <new file> <out file>
+ # [--imposed=<imposed>]
+
+ # Raw Gen Fuzzer:
+ # <old file>: testdata/old.ztf
+ # <new file>: testdata/new.ztf
+ # <out file>: testdata/raw_or_ztf_gen_fuzzer/seed.asciipb
+ fuzzer_test("zucchini_raw_gen_fuzzer") {
+ sources = [ "raw_gen_fuzzer.cc" ]
+ deps = [
+ ":zucchini_file_pair_proto",
+ "//base",
+ "//components/zucchini:zucchini_lib",
+ "//third_party/libprotobuf-mutator",
+ ]
+ seed_corpus = "testdata/raw_or_ztf_gen_fuzzer"
+ }
+
+ # ZTF Gen Fuzzer:
+ # <old file>: testdata/old.ztf
+ # <new file>: testdata/new.ztf
+ # <out file>: testdata/raw_or_ztf_gen_fuzzer/seed.asciipb
+ fuzzer_test("zucchini_ztf_gen_fuzzer") {
+ sources = [ "ztf_gen_fuzzer.cc" ]
+ deps = [
+ ":zucchini_file_pair_proto",
+ "//base",
+ "//components/zucchini:zucchini_lib",
+ "//third_party/libprotobuf-mutator",
+ ]
+ seed_corpus = "testdata/raw_or_ztf_gen_fuzzer"
+ }
+
+ # Imposed Ensemble Match Fuzzer:
+ # <old file>: testdata/old_imposed_archive.txt
+ # <new file>: testdata/new_imposed_archive.txt
+ # <out file>: testdata/imposed_ensemble_matcher_fuzzer/seed.asciipb
+ # <imposed>: 17+420=388+347,452+420=27+347
+ # This is a mapping of regions old_offset+old_size=new_offset+new_size,...
+ fuzzer_test("zucchini_imposed_ensemble_matcher_fuzzer") {
+ sources = [ "imposed_ensemble_matcher_fuzzer.cc" ]
+ deps = [
+ ":zucchini_file_pair_proto",
+ "//base",
+ "//components/zucchini:zucchini_lib",
+ "//third_party/libprotobuf-mutator",
+ ]
+ seed_corpus = "testdata/imposed_ensemble_matcher_fuzzer"
+ }
+}
diff --git a/fuzzers/apply_fuzzer.cc b/fuzzers/apply_fuzzer.cc
new file mode 100644
index 0000000..baad978
--- /dev/null
+++ b/fuzzers/apply_fuzzer.cc
@@ -0,0 +1,59 @@
+// Copyright 2018 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <iostream>
+#include <vector>
+
+#include "base/environment.h"
+#include "base/logging.h"
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/fuzzers/file_pair.pb.h"
+#include "components/zucchini/patch_reader.h"
+#include "components/zucchini/zucchini.h"
+#include "testing/libfuzzer/proto/lpm_interface.h"
+
+struct Environment {
+ Environment() {
+ logging::SetMinLogLevel(logging::LOG_FATAL); // Disable console spamming.
+ }
+};
+
+Environment* env = new Environment();
+
+DEFINE_BINARY_PROTO_FUZZER(const zucchini::fuzzers::FilePair& file_pair) {
+ // Dump code for debugging.
+ if (base::Environment::Create()->HasVar("LPM_DUMP_NATIVE_INPUT")) {
+ std::cout << "Old File: " << file_pair.old_file() << std::endl
+ << "Patch File: " << file_pair.new_or_patch_file() << std::endl;
+ }
+
+ // Prepare data.
+ zucchini::ConstBufferView old_image(
+ reinterpret_cast<const uint8_t*>(file_pair.old_file().data()),
+ file_pair.old_file().size());
+ zucchini::ConstBufferView patch_file(
+ reinterpret_cast<const uint8_t*>(file_pair.new_or_patch_file().data()),
+ file_pair.new_or_patch_file().size());
+
+ // Generate a patch reader.
+ auto patch_reader = zucchini::EnsemblePatchReader::Create(patch_file);
+ // Abort if the patch can't be read.
+ if (!patch_reader.has_value())
+ return;
+
+ // Create the underlying new file.
+ size_t new_size = patch_reader->header().new_size;
+ // Reject unreasonably large "new" files that fuzzed patch may specify.
+ if (new_size > 64 * 1024)
+ return;
+ std::vector<uint8_t> new_data(new_size);
+ zucchini::MutableBufferView new_image(new_data.data(), new_size);
+
+ // Fuzz target.
+ zucchini::ApplyBuffer(old_image, *patch_reader, new_image);
+ // No need to check whether output exist, or if so, whether it's valid.
+}
diff --git a/fuzzers/create_seed_file_pair.py b/fuzzers/create_seed_file_pair.py
new file mode 100755
index 0000000..db3843f
--- /dev/null
+++ b/fuzzers/create_seed_file_pair.py
@@ -0,0 +1,81 @@
+#!/usr/bin/env python
+# Copyright 2018 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Create binary protobuf encoding for fuzzer seeds.
+
+This script is used to generate binary encoded protobuf seeds for fuzzers
+related to Zucchini-gen and -apply, which take pairs of files are arguments. The
+binary protobuf format is faster to parse so it is the preferred method for
+encoding the seeds. For gen related fuzzers this should only need to be run
+once. For any apply related fuzzers this should be rerun whenever the patch
+format is changed.
+"""
+
+import argparse
+import logging
+import os
+import subprocess
+import sys
+
+ABS_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__)))
+PROTO_DEFINITION_FILE = 'file_pair.proto'
+
+def parse_args():
+ """Parse commandline args."""
+ parser = argparse.ArgumentParser()
+ parser.add_argument('protoc_path', help='Path to protoc.')
+ parser.add_argument('old_file', help='Old file to generate/apply patch.')
+ parser.add_argument('new_or_patch_file',
+ help='New file to generate or patch to apply.')
+ parser.add_argument('output_file',
+ help='File to write binary protobuf to.')
+ parser.add_argument('--imposed_matches',
+ help='Equivalence matches to impose when generating '
+ 'the patch.')
+ return parser.parse_args()
+
+
+def read_to_proto_escaped_string(filename):
+ """Reads a file and converts it to hex escape sequences."""
+ with open(filename, 'rb') as f:
+ # Note that unicode-escape escapes all non-ASCII printable characters
+ # excluding ", which needs to be manually escaped.
+ return f.read().decode('latin1').encode('unicode-escape').replace(
+ b'"', b'\\"')
+
+
+def main():
+ args = parse_args()
+ # Create an ASCII string representing a protobuf.
+ content = [b'old_file: "%s"' % read_to_proto_escaped_string(args.old_file),
+ b'new_or_patch_file: "%s"' % read_to_proto_escaped_string(
+ args.new_or_patch_file)]
+
+ if args.imposed_matches:
+ content.append(b'imposed_matches: "%s"' %
+ args.imposed_matches.encode('unicode-escape'))
+
+ # Encode the ASCII protobuf as a binary protobuf.
+ ps = subprocess.Popen([args.protoc_path, '--proto_path=%s' % ABS_PATH,
+ '--encode=zucchini.fuzzers.FilePair',
+ os.path.join(ABS_PATH, PROTO_DEFINITION_FILE)],
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE)
+ # Write the string to the subprocess. Single line IO is fine as protoc returns
+ # a string.
+ output = ps.communicate(input=b'\n'.join(content))
+ ps.wait()
+ if ps.returncode:
+ logging.error('Binary protobuf encoding failed.')
+ return ps.returncode
+
+ # Write stdout of the subprocess for protoc to the |output_file|.
+ with open(args.output_file, 'wb') as f:
+ f.write(output[0])
+ return 0
+
+
+if __name__ == '__main__':
+ sys.exit(main())
diff --git a/fuzzers/disassembler_dex_fuzzer.cc b/fuzzers/disassembler_dex_fuzzer.cc
new file mode 100644
index 0000000..ab08696
--- /dev/null
+++ b/fuzzers/disassembler_dex_fuzzer.cc
@@ -0,0 +1,54 @@
+// Copyright 2018 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "base/logging.h"
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/disassembler.h"
+#include "components/zucchini/disassembler_dex.h"
+
+namespace {
+
+struct Environment {
+ Environment() { logging::SetMinLogLevel(logging::LOG_FATAL); }
+};
+
+} // namespace
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+ static Environment env;
+ if (!size)
+ return 0;
+ // Prepare data.
+ std::vector<uint8_t> mutable_data(data, data + size);
+ zucchini::ConstBufferView image(mutable_data.data(), mutable_data.size());
+
+ // Create disassembler. Early exit on failure.
+ auto disassembler_dex =
+ zucchini::Disassembler::Make<zucchini::DisassemblerDex>(image);
+ if (!disassembler_dex)
+ return 0;
+ CHECK_LE(disassembler_dex->size(), image.size());
+ zucchini::MutableBufferView mutable_image(mutable_data.data(),
+ disassembler_dex->size());
+
+ std::vector<zucchini::Reference> references;
+ // Read all references in the file.
+ auto groups = disassembler_dex->MakeReferenceGroups();
+ for (const auto& group : groups) {
+ auto reader = group.GetReader(disassembler_dex.get());
+ for (auto ref = reader->GetNext(); ref.has_value();
+ ref = reader->GetNext()) {
+ references.push_back(ref.value());
+ }
+ reader.reset();
+ auto writer = group.GetWriter(mutable_image, disassembler_dex.get());
+ for (const auto& ref : references)
+ writer->PutNext(ref);
+ references.clear();
+ }
+ return 0;
+}
diff --git a/fuzzers/disassembler_elf_fuzzer.cc b/fuzzers/disassembler_elf_fuzzer.cc
new file mode 100644
index 0000000..16c885d
--- /dev/null
+++ b/fuzzers/disassembler_elf_fuzzer.cc
@@ -0,0 +1,45 @@
+// Copyright 2019 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "base/logging.h"
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/disassembler.h"
+#include "components/zucchini/disassembler_elf.h"
+#include "components/zucchini/fuzzers/fuzz_utils.h"
+
+namespace {
+
+struct Environment {
+ Environment() { logging::SetMinLogLevel(logging::LOG_FATAL); }
+};
+
+} // namespace
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+ static Environment env;
+ if (!size)
+ return 0;
+ // Prepare data.
+ std::vector<uint8_t> mutable_data(data, data + size);
+ zucchini::ConstBufferView image(mutable_data.data(), mutable_data.size());
+
+ // Create disassembler. Early exit on failure.
+ auto disassembler_elf_x64 =
+ zucchini::Disassembler::Make<zucchini::DisassemblerElfX64>(image);
+ if (disassembler_elf_x64) {
+ zucchini::ReadAndWriteReferences(std::move(disassembler_elf_x64),
+ &mutable_data);
+ return 0;
+ }
+
+ auto disassembler_elf_x86 =
+ zucchini::Disassembler::Make<zucchini::DisassemblerElfX86>(image);
+ if (disassembler_elf_x86)
+ zucchini::ReadAndWriteReferences(std::move(disassembler_elf_x86),
+ &mutable_data);
+ return 0;
+}
diff --git a/fuzzers/disassembler_win32_fuzzer.cc b/fuzzers/disassembler_win32_fuzzer.cc
new file mode 100644
index 0000000..34a3565
--- /dev/null
+++ b/fuzzers/disassembler_win32_fuzzer.cc
@@ -0,0 +1,52 @@
+// Copyright 2018 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <map>
+#include <memory>
+#include <vector>
+
+#include "base/logging.h"
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/disassembler.h"
+#include "components/zucchini/disassembler_win32.h"
+#include "components/zucchini/fuzzers/fuzz_utils.h"
+
+namespace {
+
+struct Environment {
+ Environment() {
+ logging::SetMinLogLevel(logging::LOG_FATAL); // Disable console spamming.
+ }
+};
+
+} // namespace
+
+// Entry point for LibFuzzer.
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+ static Environment env;
+ if (!size)
+ return 0;
+ // Prepare data.
+ std::vector<uint8_t> mutable_data(data, data + size);
+ zucchini::ConstBufferView image(mutable_data.data(), mutable_data.size());
+
+ // One of x86 or x64 should return a non-nullptr if the data is valid.
+ auto disassembler_win32x86 =
+ zucchini::Disassembler::Make<zucchini::DisassemblerWin32X86>(image);
+ if (disassembler_win32x86) {
+ zucchini::ReadAndWriteReferences(std::move(disassembler_win32x86),
+ &mutable_data);
+ return 0;
+ }
+
+ auto disassembler_win32x64 =
+ zucchini::Disassembler::Make<zucchini::DisassemblerWin32X64>(image);
+ if (disassembler_win32x64)
+ zucchini::ReadAndWriteReferences(std::move(disassembler_win32x64),
+ &mutable_data);
+ return 0;
+}
diff --git a/fuzzers/file_pair.proto b/fuzzers/file_pair.proto
new file mode 100644
index 0000000..7fdc908
--- /dev/null
+++ b/fuzzers/file_pair.proto
@@ -0,0 +1,21 @@
+// Copyright 2018 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+syntax = "proto2";
+
+package zucchini.fuzzers;
+
+// NEXT_TAG = 4
+message FilePair {
+ // File to generate patch from or apply patch to.
+ required bytes old_file = 1;
+ // New file to generate patch or the patch to apply.
+ required bytes new_or_patch_file = 2;
+ // Imposed matches to apply to the equivalence matches.
+ // Should be of the format:
+ // "#+#=#+#,#+#=#+#,..." (e.g., "1+2=3+4", "1+2=3+4,5+6=7+8"),
+ // where "#+#=#+#" encodes a match as 4 unsigned integers:
+ // [offset in "old", size in "old", offset in "new", size in "new"].
+ optional string imposed_matches = 3;
+}
diff --git a/fuzzers/fuzz_utils.cc b/fuzzers/fuzz_utils.cc
new file mode 100644
index 0000000..1fd89fa
--- /dev/null
+++ b/fuzzers/fuzz_utils.cc
@@ -0,0 +1,40 @@
+// Copyright 2019 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/fuzzers/fuzz_utils.h"
+
+#include <map>
+#include <memory>
+#include <vector>
+
+#include "components/zucchini/disassembler.h"
+
+namespace zucchini {
+
+void ReadAndWriteReferences(
+ std::unique_ptr<zucchini::Disassembler> disassembler,
+ std::vector<uint8_t>* mutable_data) {
+ zucchini::MutableBufferView mutable_image(mutable_data->data(),
+ disassembler->size());
+ std::vector<zucchini::Reference> references;
+ auto groups = disassembler->MakeReferenceGroups();
+ std::map<zucchini::PoolTag, std::vector<zucchini::Reference>>
+ references_of_pool;
+ for (const auto& group : groups) {
+ auto reader = group.GetReader(disassembler.get());
+ std::vector<zucchini::Reference>* refs =
+ &references_of_pool[group.pool_tag()];
+ for (auto ref = reader->GetNext(); ref.has_value();
+ ref = reader->GetNext()) {
+ refs->push_back(ref.value());
+ }
+ }
+ for (const auto& group : groups) {
+ auto writer = group.GetWriter(mutable_image, disassembler.get());
+ for (const auto& ref : references_of_pool[group.pool_tag()])
+ writer->PutNext(ref);
+ }
+}
+
+} // namespace zucchini
diff --git a/fuzzers/fuzz_utils.h b/fuzzers/fuzz_utils.h
new file mode 100644
index 0000000..0caaab4
--- /dev/null
+++ b/fuzzers/fuzz_utils.h
@@ -0,0 +1,25 @@
+// Copyright 2019 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_FUZZERS_FUZZ_UTILS_H_
+#define COMPONENTS_ZUCCHINI_FUZZERS_FUZZ_UTILS_H_
+
+#include <stdint.h>
+
+#include <memory>
+#include <vector>
+
+#include "components/zucchini/disassembler.h"
+
+namespace zucchini {
+
+// Helper function that uses |disassembler| to read all references from
+// |mutable_data| and write them back.
+void ReadAndWriteReferences(
+ std::unique_ptr<zucchini::Disassembler> disassembler,
+ std::vector<uint8_t>* mutable_data);
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_FUZZERS_FUZZ_UTILS_H_
diff --git a/fuzzers/generate_fuzzer_data.py b/fuzzers/generate_fuzzer_data.py
new file mode 100755
index 0000000..c76cfbc
--- /dev/null
+++ b/fuzzers/generate_fuzzer_data.py
@@ -0,0 +1,81 @@
+#!/usr/bin/env python
+# Copyright 2018 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Script for generating new binary protobuf seeds for fuzzers.
+
+Currently supports creating a single seed binary protobuf of the form
+zucchini.fuzzer.FilePair.
+"""
+
+import argparse
+import hashlib
+import logging
+import os
+import platform
+import subprocess
+import sys
+
+ABS_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__)))
+ABS_TESTDATA_PATH = os.path.join(ABS_PATH, 'testdata')
+
+def parse_args():
+ """Parses arguments from command-line."""
+ parser = argparse.ArgumentParser()
+ parser.add_argument('--raw', help='Whether to use Raw Zucchini.',
+ action='store_true')
+ parser.add_argument('old_file', help='Old file to generate/apply patch.')
+ parser.add_argument('new_file', help='New file to generate patch from.')
+ parser.add_argument('patch_file', help='Patch filename to use.')
+ parser.add_argument('output_file', help='File to write binary protobuf to.')
+ return parser.parse_args()
+
+
+def gen(old_file, new_file, patch_file, output_file, is_raw, is_win):
+ """Generates a new patch and binary encodes a protobuf pair."""
+ # Create output directory if missing.
+ output_dir = os.path.dirname(output_file)
+ if not os.path.exists(output_dir):
+ os.makedirs(output_dir)
+
+ # Handle Windows executable names.
+ zucchini = 'zucchini'
+ protoc = 'protoc'
+ if is_win:
+ zucchini += '.exe'
+ protoc += '.exe'
+
+ zuc_cmd = [os.path.abspath(zucchini), '-gen']
+ if is_raw:
+ zuc_cmd.append('-raw')
+ # Generate a new patch.
+ ret = subprocess.call(zuc_cmd + [old_file, new_file, patch_file],
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ if ret:
+ logging.error('Patch generation failed for ({}, {})'.format(old_file,
+ new_file))
+ return ret
+ # Binary encode the protobuf pair.
+ ret = subprocess.call([sys.executable,
+ os.path.join(ABS_PATH, 'create_seed_file_pair.py'),
+ os.path.abspath(protoc), old_file, patch_file,
+ output_file])
+ os.remove(patch_file)
+ return ret
+
+
+def main():
+ args = parse_args()
+ return gen(os.path.join(ABS_TESTDATA_PATH, args.old_file),
+ os.path.join(ABS_TESTDATA_PATH, args.new_file),
+ os.path.abspath(args.patch_file),
+ os.path.abspath(args.output_file),
+ args.raw,
+ platform.system() == 'Windows')
+
+
+if __name__ == '__main__':
+ sys.exit(main())
+
diff --git a/fuzzers/imposed_ensemble_matcher_fuzzer.cc b/fuzzers/imposed_ensemble_matcher_fuzzer.cc
new file mode 100644
index 0000000..0dbcf86
--- /dev/null
+++ b/fuzzers/imposed_ensemble_matcher_fuzzer.cc
@@ -0,0 +1,67 @@
+// Copyright 2018 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <stdint.h>
+
+#include <iostream>
+#include <memory>
+
+#include "base/environment.h"
+#include "base/logging.h"
+#include "components/zucchini/buffer_sink.h"
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/fuzzers/file_pair.pb.h"
+#include "components/zucchini/patch_writer.h"
+#include "components/zucchini/zucchini.h"
+#include "testing/libfuzzer/proto/lpm_interface.h"
+
+namespace {
+
+constexpr size_t kMinImageSize = 16;
+constexpr size_t kMaxImageSize = 1024;
+
+} // namespace
+
+struct Environment {
+ Environment() {
+ logging::SetMinLogLevel(logging::LOG_FATAL); // Disable console spamming.
+ }
+};
+
+DEFINE_BINARY_PROTO_FUZZER(const zucchini::fuzzers::FilePair& file_pair) {
+ static Environment env;
+ // Dump code for debugging.
+ if (base::Environment::Create()->HasVar("LPM_DUMP_NATIVE_INPUT")) {
+ std::cout << "Imposed Matches: " << file_pair.imposed_matches() << std::endl
+ << "Old File: " << file_pair.old_file() << std::endl
+ << "New File: " << file_pair.new_or_patch_file() << std::endl;
+ }
+
+ // Prepare data.
+ zucchini::ConstBufferView old_image(
+ reinterpret_cast<const uint8_t*>(file_pair.old_file().data()),
+ file_pair.old_file().size());
+ zucchini::ConstBufferView new_image(
+ reinterpret_cast<const uint8_t*>(file_pair.new_or_patch_file().data()),
+ file_pair.new_or_patch_file().size());
+
+ // Restrict image sizes to speed up fuzzing.
+ if (old_image.size() < kMinImageSize || old_image.size() > kMaxImageSize ||
+ new_image.size() < kMinImageSize || new_image.size() > kMaxImageSize) {
+ return;
+ }
+
+ // Generate a patch writer.
+ zucchini::EnsemblePatchWriter patch_writer(old_image, new_image);
+
+ // Fuzz Target.
+ zucchini::GenerateBufferImposed(old_image, new_image,
+ file_pair.imposed_matches(), &patch_writer);
+
+ // Write to buffer to avoid IO.
+ size_t patch_size = patch_writer.SerializedSize();
+ std::unique_ptr<uint8_t[]> patch_data(new uint8_t[patch_size]);
+ zucchini::BufferSink patch(patch_data.get(), patch_size);
+ patch_writer.SerializeInto(patch);
+}
diff --git a/fuzzers/patch_fuzzer.cc b/fuzzers/patch_fuzzer.cc
new file mode 100644
index 0000000..83bebcf
--- /dev/null
+++ b/fuzzers/patch_fuzzer.cc
@@ -0,0 +1,19 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/patch_reader.h"
+#include "third_party/abseil-cpp/absl/types/optional.h"
+
+// Entry point for LibFuzzer.
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+ logging::SetMinLogLevel(3); // Disable console spamming.
+ zucchini::ConstBufferView patch(data, size);
+ absl::optional<zucchini::EnsemblePatchReader> patch_reader =
+ zucchini::EnsemblePatchReader::Create(patch);
+ return 0;
+}
diff --git a/fuzzers/raw_gen_fuzzer.cc b/fuzzers/raw_gen_fuzzer.cc
new file mode 100644
index 0000000..de63d95
--- /dev/null
+++ b/fuzzers/raw_gen_fuzzer.cc
@@ -0,0 +1,71 @@
+// Copyright 2018 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <stdint.h>
+
+#include <iostream>
+#include <memory>
+
+#include "base/environment.h"
+#include "base/logging.h"
+#include "components/zucchini/buffer_sink.h"
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/fuzzers/file_pair.pb.h"
+#include "components/zucchini/patch_writer.h"
+#include "components/zucchini/zucchini_gen.h"
+#include "testing/libfuzzer/proto/lpm_interface.h"
+
+namespace {
+
+constexpr size_t kMinImageSize = 16;
+constexpr size_t kMaxImageSize = 1024;
+
+} // namespace
+
+struct Environment {
+ Environment() {
+ logging::SetMinLogLevel(logging::LOG_FATAL); // Disable console spamming.
+ }
+};
+
+Environment* env = new Environment();
+
+DEFINE_BINARY_PROTO_FUZZER(const zucchini::fuzzers::FilePair& file_pair) {
+ // Dump code for debugging.
+ if (base::Environment::Create()->HasVar("LPM_DUMP_NATIVE_INPUT")) {
+ std::cout << "Old File: " << file_pair.old_file() << std::endl
+ << "New File: " << file_pair.new_or_patch_file() << std::endl;
+ }
+
+ // Prepare data.
+ zucchini::ConstBufferView old_image(
+ reinterpret_cast<const uint8_t*>(file_pair.old_file().data()),
+ file_pair.old_file().size());
+ zucchini::ConstBufferView new_image(
+ reinterpret_cast<const uint8_t*>(file_pair.new_or_patch_file().data()),
+ file_pair.new_or_patch_file().size());
+
+ // Restrict image sizes to speed up fuzzing.
+ if (old_image.size() < kMinImageSize || old_image.size() > kMaxImageSize ||
+ new_image.size() < kMinImageSize || new_image.size() > kMaxImageSize) {
+ return;
+ }
+
+ // Generate a patch writer.
+ zucchini::EnsemblePatchWriter patch_writer(old_image, new_image);
+
+ // Fuzz Target.
+ zucchini::GenerateBufferRaw(old_image, new_image, &patch_writer);
+
+ // Check that the patch size is sane. Crash the fuzzer if this isn't the case
+ // as it is a failure in Zucchini's patch performance that is worth
+ // investigating.
+ size_t patch_size = patch_writer.SerializedSize();
+ CHECK_LE(patch_size, kMaxImageSize * 2);
+
+ // Write to buffer to avoid IO.
+ std::unique_ptr<uint8_t[]> patch_data(new uint8_t[patch_size]);
+ zucchini::BufferSink patch(patch_data.get(), patch_size);
+ patch_writer.SerializeInto(patch);
+}
diff --git a/fuzzers/testdata/.gitignore b/fuzzers/testdata/.gitignore
new file mode 100644
index 0000000..d345889
--- /dev/null
+++ b/fuzzers/testdata/.gitignore
@@ -0,0 +1,4 @@
+# Exclude testdata binaries.
+*.bin
+*.dll
+*.patch
diff --git a/fuzzers/testdata/imposed_ensemble_matcher_fuzzer/seed.asciipb b/fuzzers/testdata/imposed_ensemble_matcher_fuzzer/seed.asciipb
new file mode 100644
index 0000000..abbadd2
--- /dev/null
+++ b/fuzzers/testdata/imposed_ensemble_matcher_fuzzer/seed.asciipb
@@ -0,0 +1,90 @@
+
+ˆABCDEFGHIJKLMNOP
+ZTxt
+ZucZucZucZucZucZucZucZucZuc
+ZucZucZucZucZucZucZucZucZuc
+ZucZucZucZucZucZucZucZucZuc
+ZucZucZucZucZucZucZucZucZuc
+BLOCK1
+Lorem Ipsum, Ipsum Lorem, Alpha Beta Gamma <1,1>
+{3,4} [4,5] (90,08)
+(1,4)
+[+001, +001]
+References {-004,-003}, <001,001>, [98,78]
+(+01,+00)
+AAAAAAAAA
+
+BLOCK2
+{06,01} Another block. Lorem Ipsum, Ipsum, Ipsum
+<><><><><>{}{}{}{}[][][]()()()()
+[4,1]
+
+Old bytes live here as this is reasonable.
+txTZ
+Hello, World!
+ZTxt
+ZucZucZucZucZucZucZucZucZuc
+ZucZucZucZucZucZucZucZucZuc
+ZucZucZucZucZucZucZucZucZuc
+ZucZucZucZucZucZucZucZucZuc
+BLOCK1
+Lorem Ipsum, Ipsum Lorem, Alpha Beta Gamma <1,1>
+{3,4} [4,5] (90,08)
+(1,4)
+[+001, +001]
+References {-004,-003}, <001,001>, [98,78]
+(+01,+00)
+AAAAAAAAA
+
+BLOCK2
+{06,01} Another block. Lorem Ipsum, Ipsum, Ipsum
+<><><><><>{}{}{}{}[][][]()()()()
+[4,1]
+
+Old bytes live here as this is reasonable.
+txTZ
+Yet another gap for Raw Zucchini
+„ABCDEFGHIJKLMNOPQRSTUVWXYZ
+ZTxt
+BLOCK2
+{20,01} Another block. Lorem Ipsum, Ipsum, Ipsum
+<><><><><>{}{}{}{}[][][]()()()()
+[4,1]
+
+BLOCK1
+Lorem Ipsum, Ipsum Lorem, Alpha Beta Gamma <1,1>
+{4,4} [5,8] (90,08)
+(1,4)
+[+001, +001]
+References {-005,-006}, <001,002>, [98,78]
+(+01,+04)
+AAAAAAAAA
+
+Other new bytes.
+
+Old bytes live here as this is reasonable.
+New bytes live here.
+txTZ
+Hello, World!
+ZTxt
+BLOCK2
+{20,01} Another block. Lorem Ipsum, Ipsum, Ipsum
+<><><><><>{}{}{}{}[][][]()()()()
+[4,1]
+
+BLOCK1
+Lorem Ipsum, Ipsum Lorem, Alpha Beta Gamma <1,1>
+{4,4} [5,8] (90,08)
+(1,4)
+[+001, +001]
+References {-005,-006}, <001,002>, [98,78]
+(+01,+04)
+AAAAAAAAA
+
+Other new bytes.
+
+Old bytes live here as this is reasonable.
+New bytes live here.
+txTZ
+Yet yet another gap for Raw Zucchini
+17+420=388+347,452+420=27+347 \ No newline at end of file
diff --git a/fuzzers/testdata/new.ztf b/fuzzers/testdata/new.ztf
new file mode 100644
index 0000000..1b1876f
--- /dev/null
+++ b/fuzzers/testdata/new.ztf
@@ -0,0 +1,20 @@
+ZTxt
+BLOCK2
+{20,01} Another block. Lorem Ipsum, Ipsum, Ipsum
+<><><><><>{}{}{}{}[][][]()()()()
+[4,1]
+
+BLOCK1
+Lorem Ipsum, Ipsum Lorem, Alpha Beta Gamma <1,1>
+{4,4} [5,8] (90,08)
+(1,4)
+[+001, +001]
+References {-005,-006}, <001,002>, [98,78]
+(+01,+04)
+AAAAAAAAA
+
+Other new bytes.
+
+Old bytes live here as this is reasonable.
+New bytes live here.
+txTZ
diff --git a/fuzzers/testdata/new_eventlog_provider.dll.sha1 b/fuzzers/testdata/new_eventlog_provider.dll.sha1
new file mode 100644
index 0000000..bbf56f9
--- /dev/null
+++ b/fuzzers/testdata/new_eventlog_provider.dll.sha1
@@ -0,0 +1 @@
+89ce67035d2d2dae33cb2d98d4762e955b93df95 \ No newline at end of file
diff --git a/fuzzers/testdata/new_imposed_archive.txt b/fuzzers/testdata/new_imposed_archive.txt
new file mode 100644
index 0000000..5ce6f70
--- /dev/null
+++ b/fuzzers/testdata/new_imposed_archive.txt
@@ -0,0 +1,43 @@
+ABCDEFGHIJKLMNOPQRSTUVWXYZ
+ZTxt
+BLOCK2
+{20,01} Another block. Lorem Ipsum, Ipsum, Ipsum
+<><><><><>{}{}{}{}[][][]()()()()
+[4,1]
+
+BLOCK1
+Lorem Ipsum, Ipsum Lorem, Alpha Beta Gamma <1,1>
+{4,4} [5,8] (90,08)
+(1,4)
+[+001, +001]
+References {-005,-006}, <001,002>, [98,78]
+(+01,+04)
+AAAAAAAAA
+
+Other new bytes.
+
+Old bytes live here as this is reasonable.
+New bytes live here.
+txTZ
+Hello, World!
+ZTxt
+BLOCK2
+{20,01} Another block. Lorem Ipsum, Ipsum, Ipsum
+<><><><><>{}{}{}{}[][][]()()()()
+[4,1]
+
+BLOCK1
+Lorem Ipsum, Ipsum Lorem, Alpha Beta Gamma <1,1>
+{4,4} [5,8] (90,08)
+(1,4)
+[+001, +001]
+References {-005,-006}, <001,002>, [98,78]
+(+01,+04)
+AAAAAAAAA
+
+Other new bytes.
+
+Old bytes live here as this is reasonable.
+New bytes live here.
+txTZ
+Yet yet another gap for Raw Zucchini
diff --git a/fuzzers/testdata/old.ztf b/fuzzers/testdata/old.ztf
new file mode 100644
index 0000000..12dd536
--- /dev/null
+++ b/fuzzers/testdata/old.ztf
@@ -0,0 +1,21 @@
+ZTxt
+ZucZucZucZucZucZucZucZucZuc
+ZucZucZucZucZucZucZucZucZuc
+ZucZucZucZucZucZucZucZucZuc
+ZucZucZucZucZucZucZucZucZuc
+BLOCK1
+Lorem Ipsum, Ipsum Lorem, Alpha Beta Gamma <1,1>
+{3,4} [4,5] (90,08)
+(1,4)
+[+001, +001]
+References {-004,-003}, <001,001>, [98,78]
+(+01,+00)
+AAAAAAAAA
+
+BLOCK2
+{06,01} Another block. Lorem Ipsum, Ipsum, Ipsum
+<><><><><>{}{}{}{}[][][]()()()()
+[4,1]
+
+Old bytes live here as this is reasonable.
+txTZ
diff --git a/fuzzers/testdata/old_eventlog_provider.dll.sha1 b/fuzzers/testdata/old_eventlog_provider.dll.sha1
new file mode 100644
index 0000000..5daf440
--- /dev/null
+++ b/fuzzers/testdata/old_eventlog_provider.dll.sha1
@@ -0,0 +1 @@
+c80fdce994ba043956e192f650d894555460ff9b \ No newline at end of file
diff --git a/fuzzers/testdata/old_imposed_archive.txt b/fuzzers/testdata/old_imposed_archive.txt
new file mode 100644
index 0000000..e4daa3f
--- /dev/null
+++ b/fuzzers/testdata/old_imposed_archive.txt
@@ -0,0 +1,45 @@
+ABCDEFGHIJKLMNOP
+ZTxt
+ZucZucZucZucZucZucZucZucZuc
+ZucZucZucZucZucZucZucZucZuc
+ZucZucZucZucZucZucZucZucZuc
+ZucZucZucZucZucZucZucZucZuc
+BLOCK1
+Lorem Ipsum, Ipsum Lorem, Alpha Beta Gamma <1,1>
+{3,4} [4,5] (90,08)
+(1,4)
+[+001, +001]
+References {-004,-003}, <001,001>, [98,78]
+(+01,+00)
+AAAAAAAAA
+
+BLOCK2
+{06,01} Another block. Lorem Ipsum, Ipsum, Ipsum
+<><><><><>{}{}{}{}[][][]()()()()
+[4,1]
+
+Old bytes live here as this is reasonable.
+txTZ
+Hello, World!
+ZTxt
+ZucZucZucZucZucZucZucZucZuc
+ZucZucZucZucZucZucZucZucZuc
+ZucZucZucZucZucZucZucZucZuc
+ZucZucZucZucZucZucZucZucZuc
+BLOCK1
+Lorem Ipsum, Ipsum Lorem, Alpha Beta Gamma <1,1>
+{3,4} [4,5] (90,08)
+(1,4)
+[+001, +001]
+References {-004,-003}, <001,001>, [98,78]
+(+01,+00)
+AAAAAAAAA
+
+BLOCK2
+{06,01} Another block. Lorem Ipsum, Ipsum, Ipsum
+<><><><><>{}{}{}{}[][][]()()()()
+[4,1]
+
+Old bytes live here as this is reasonable.
+txTZ
+Yet another gap for Raw Zucchini
diff --git a/fuzzers/testdata/patch_fuzzer/empty.zuc b/fuzzers/testdata/patch_fuzzer/empty.zuc
new file mode 100644
index 0000000..64eacf5
--- /dev/null
+++ b/fuzzers/testdata/patch_fuzzer/empty.zuc
Binary files differ
diff --git a/fuzzers/testdata/raw_or_ztf_gen_fuzzer/seed_proto.bin b/fuzzers/testdata/raw_or_ztf_gen_fuzzer/seed_proto.bin
new file mode 100644
index 0000000..5939c72
--- /dev/null
+++ b/fuzzers/testdata/raw_or_ztf_gen_fuzzer/seed_proto.bin
@@ -0,0 +1,42 @@
+
+¤ZTxt
+ZucZucZucZucZucZucZucZucZuc
+ZucZucZucZucZucZucZucZucZuc
+ZucZucZucZucZucZucZucZucZuc
+ZucZucZucZucZucZucZucZucZuc
+BLOCK1
+Lorem Ipsum, Ipsum Lorem, Alpha Beta Gamma <1,1>
+{3,4} [4,5] (90,08)
+(1,4)
+[+001, +001]
+References {-004,-003}, <001,001>, [98,78]
+(+01,+00)
+AAAAAAAAA
+
+BLOCK2
+{06,01} Another block. Lorem Ipsum, Ipsum, Ipsum
+<><><><><>{}{}{}{}[][][]()()()()
+[4,1]
+
+Old bytes live here as this is reasonable.
+txTZ
+ÛZTxt
+BLOCK2
+{20,01} Another block. Lorem Ipsum, Ipsum, Ipsum
+<><><><><>{}{}{}{}[][][]()()()()
+[4,1]
+
+BLOCK1
+Lorem Ipsum, Ipsum Lorem, Alpha Beta Gamma <1,1>
+{4,4} [5,8] (90,08)
+(1,4)
+[+001, +001]
+References {-005,-006}, <001,002>, [98,78]
+(+01,+04)
+AAAAAAAAA
+
+Other new bytes.
+
+Old bytes live here as this is reasonable.
+New bytes live here.
+txTZ
diff --git a/fuzzers/ztf_gen_fuzzer.cc b/fuzzers/ztf_gen_fuzzer.cc
new file mode 100644
index 0000000..ee2d47c
--- /dev/null
+++ b/fuzzers/ztf_gen_fuzzer.cc
@@ -0,0 +1,67 @@
+// Copyright 2018 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <stdint.h>
+
+#include <iostream>
+#include <memory>
+
+#include "base/environment.h"
+#include "base/logging.h"
+#include "components/zucchini/buffer_sink.h"
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/fuzzers/file_pair.pb.h"
+#include "components/zucchini/patch_writer.h"
+#include "components/zucchini/zucchini_gen.h"
+#include "testing/libfuzzer/proto/lpm_interface.h"
+
+namespace {
+
+constexpr size_t kMinImageSize = 16;
+constexpr size_t kMaxImageSize = 1024;
+
+} // namespace
+
+struct Environment {
+ Environment() {
+ logging::SetMinLogLevel(logging::LOG_FATAL); // Disable console spamming.
+ }
+};
+
+Environment* env = new Environment();
+
+DEFINE_BINARY_PROTO_FUZZER(const zucchini::fuzzers::FilePair& file_pair) {
+ // Dump code for debugging.
+ if (base::Environment::Create()->HasVar("LPM_DUMP_NATIVE_INPUT")) {
+ std::cout << "Old File: " << file_pair.old_file() << std::endl
+ << "New File: " << file_pair.new_or_patch_file() << std::endl;
+ }
+
+ // Prepare data. These are originally Zucchini Text Format (ZTF) files but may
+ // in relatively unlikely circumstances mutate into other formats.
+ zucchini::ConstBufferView old_image(
+ reinterpret_cast<const uint8_t*>(file_pair.old_file().data()),
+ file_pair.old_file().size());
+ zucchini::ConstBufferView new_image(
+ reinterpret_cast<const uint8_t*>(file_pair.new_or_patch_file().data()),
+ file_pair.new_or_patch_file().size());
+
+ // Restrict image sizes to speed up fuzzing.
+ if (old_image.size() < kMinImageSize || old_image.size() > kMaxImageSize ||
+ new_image.size() < kMinImageSize || new_image.size() > kMaxImageSize) {
+ return;
+ }
+
+ // Generate a patch writer.
+ zucchini::EnsemblePatchWriter patch_writer(old_image, new_image);
+
+ // Fuzz Target.
+ zucchini::GenerateBuffer(old_image, new_image, &patch_writer);
+
+ // Write to buffer to avoid IO.
+ size_t patch_size = patch_writer.SerializedSize();
+ std::unique_ptr<uint8_t[]> patch_data(new uint8_t[patch_size]);
+ zucchini::BufferSink patch(patch_data.get(), patch_size);
+ patch_writer.SerializeInto(patch);
+}
diff --git a/heuristic_ensemble_matcher.cc b/heuristic_ensemble_matcher.cc
new file mode 100644
index 0000000..2f01d34
--- /dev/null
+++ b/heuristic_ensemble_matcher.cc
@@ -0,0 +1,369 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/heuristic_ensemble_matcher.h"
+
+#include <algorithm>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "base/bind.h"
+#include "base/logging.h"
+#include "base/numerics/safe_conversions.h"
+#include "base/strings/stringprintf.h"
+#include "components/zucchini/binary_data_histogram.h"
+#include "components/zucchini/element_detection.h"
+#include "components/zucchini/image_utils.h"
+#include "components/zucchini/io_utils.h"
+
+namespace zucchini {
+
+namespace {
+
+/******** Helper Functions ********/
+
+// Uses |detector| to find embedded executables inside |image|, and returns the
+// result on success, or absl::nullopt on failure, which occurs if too many (>
+// |kElementLimit|) elements are found.
+absl::optional<std::vector<Element>> FindEmbeddedElements(
+ ConstBufferView image,
+ const std::string& name,
+ ElementDetector&& detector) {
+ // Maximum number of Elements in a file. This is enforced because our matching
+ // algorithm is O(n^2), which suffices for regular archive files that should
+ // have up to 10's of executable files. An archive containing 100's of
+ // executables is likely pathological, and is rejected to prevent exploits.
+ static constexpr size_t kElementLimit = 256;
+ std::vector<Element> elements;
+ ElementFinder element_finder(image, std::move(detector));
+ for (auto element = element_finder.GetNext();
+ element.has_value() && elements.size() <= kElementLimit;
+ element = element_finder.GetNext()) {
+ elements.push_back(*element);
+ }
+ if (elements.size() >= kElementLimit) {
+ LOG(WARNING) << name << ": Found too many elements.";
+ return absl::nullopt;
+ }
+ LOG(INFO) << name << ": Found " << elements.size() << " elements.";
+ return elements;
+}
+
+// Determines whether a proposed comparison between Elements should be rejected
+// early, to decrease the likelihood of creating false-positive matches, which
+// may be costly for patching. Our heuristic simply prohibits big difference in
+// size (relative and absolute) between matched elements.
+bool UnsafeDifference(const Element& old_element, const Element& new_element) {
+ static constexpr double kMaxBloat = 2.0;
+ static constexpr size_t kMinWorrysomeDifference = 2 << 20; // 2MB
+ size_t lo_size = std::min(old_element.size, new_element.size);
+ size_t hi_size = std::max(old_element.size, new_element.size);
+ if (hi_size - lo_size < kMinWorrysomeDifference)
+ return false;
+ if (hi_size < lo_size * kMaxBloat)
+ return false;
+ return true;
+}
+
+std::ostream& operator<<(std::ostream& stream, const Element& elt) {
+ stream << "(" << CastExecutableTypeToString(elt.exe_type) << ", "
+ << AsHex<8, size_t>(elt.offset) << " +" << AsHex<8, size_t>(elt.size)
+ << ")";
+ return stream;
+}
+
+/******** MatchingInfoOut ********/
+
+// A class to output detailed information during ensemble matching. Extracting
+// the functionality to a separate class decouples formatting and printing logic
+// from matching logic. The base class consists of stubs.
+class MatchingInfoOut {
+ protected:
+ MatchingInfoOut() = default;
+ MatchingInfoOut(const MatchingInfoOut&) = delete;
+ const MatchingInfoOut& operator=(const MatchingInfoOut&) = delete;
+
+ public:
+ virtual ~MatchingInfoOut() = default;
+ virtual void InitSizes(size_t old_size, size_t new_size) {}
+ virtual void DeclareTypeMismatch(int iold, int inew) {}
+ virtual void DeclareUnsafeDistance(int iold, int inew) {}
+ virtual void DeclareCandidate(int iold, int inew) {}
+ virtual void DeclareMatch(int iold,
+ int inew,
+ double dist,
+ bool is_identical) {}
+ virtual void DeclareOutlier(int iold, int inew) {}
+
+ virtual void OutputCompare(const Element& old_element,
+ const Element& new_element,
+ double dist) {}
+
+ virtual void OutputMatch(const Element& best_old_element,
+ const Element& new_element,
+ bool is_identical,
+ double best_dist) {}
+
+ virtual void OutputScores(const std::string& stats) {}
+
+ virtual void OutputTextGrid() {}
+};
+
+/******** MatchingInfoTerse ********/
+
+// A terse MatchingInfoOut that prints only basic information, using LOG().
+class MatchingInfoOutTerse : public MatchingInfoOut {
+ public:
+ MatchingInfoOutTerse() = default;
+ MatchingInfoOutTerse(const MatchingInfoOutTerse&) = delete;
+ const MatchingInfoOutTerse& operator=(const MatchingInfoOutTerse&) = delete;
+ ~MatchingInfoOutTerse() override = default;
+
+ void OutputScores(const std::string& stats) override {
+ LOG(INFO) << "Best dists: " << stats;
+ }
+};
+
+/******** MatchingInfoOutVerbose ********/
+
+// A verbose MatchingInfoOut that prints detailed information using |out_|,
+// including comparison pairs, scores, and a text grid representation of
+// pairwise matching results.
+class MatchingInfoOutVerbose : public MatchingInfoOut {
+ public:
+ explicit MatchingInfoOutVerbose(std::ostream& out) : out_(out) {}
+ MatchingInfoOutVerbose(const MatchingInfoOutVerbose&) = delete;
+ const MatchingInfoOutVerbose& operator=(const MatchingInfoOutVerbose&) =
+ delete;
+ ~MatchingInfoOutVerbose() override = default;
+
+ // Outputs sizes and initializes |text_grid_|.
+ void InitSizes(size_t old_size, size_t new_size) override {
+ out_ << "Comparing old (" << old_size << " elements) and new (" << new_size
+ << " elements)" << std::endl;
+ text_grid_.assign(new_size, std::string(old_size, '-'));
+ best_dist_.assign(new_size, -1.0);
+ }
+
+ // Functions to update match status in text grid representation.
+
+ void DeclareTypeMismatch(int iold, int inew) override {
+ text_grid_[inew][iold] = 'T';
+ }
+ void DeclareUnsafeDistance(int iold, int inew) override {
+ text_grid_[inew][iold] = 'U';
+ }
+ void DeclareCandidate(int iold, int inew) override {
+ text_grid_[inew][iold] = 'C'; // Provisional.
+ }
+ void DeclareMatch(int iold,
+ int inew,
+ double dist,
+ bool is_identical) override {
+ text_grid_[inew][iold] = is_identical ? 'I' : 'M';
+ best_dist_[inew] = dist;
+ }
+ void DeclareOutlier(int iold, int inew) override {
+ text_grid_[inew][iold] = 'O';
+ }
+
+ // Functions to print detailed information.
+
+ void OutputCompare(const Element& old_element,
+ const Element& new_element,
+ double dist) override {
+ out_ << "Compare old" << old_element << " to new" << new_element << " --> "
+ << base::StringPrintf("%.5f", dist) << std::endl;
+ }
+
+ void OutputMatch(const Element& best_old_element,
+ const Element& new_element,
+ bool is_identical,
+ double best_dist) override {
+ if (is_identical) {
+ out_ << "Skipped old" << best_old_element << " - identical to new"
+ << new_element;
+ } else {
+ out_ << "Matched old" << best_old_element << " to new" << new_element
+ << " --> " << base::StringPrintf("%.5f", best_dist);
+ }
+ out_ << std::endl;
+ }
+
+ void OutputScores(const std::string& stats) override {
+ out_ << "Best dists: " << stats << std::endl;
+ }
+
+ void OutputTextGrid() override {
+ int new_size = static_cast<int>(text_grid_.size());
+ for (int inew = 0; inew < new_size; ++inew) {
+ const std::string& line = text_grid_[inew];
+ out_ << " ";
+ for (char ch : line) {
+ char prefix = (ch == 'I' || ch == 'M') ? '(' : ' ';
+ char suffix = (ch == 'I' || ch == 'M') ? ')' : ' ';
+ out_ << prefix << ch << suffix;
+ }
+ if (best_dist_[inew] >= 0)
+ out_ << " " << base::StringPrintf("%.5f", best_dist_[inew]);
+ out_ << std::endl;
+ }
+ if (!text_grid_.empty()) {
+ out_ << " Legend: I = identical, M = matched, T = type mismatch, "
+ "U = unsafe distance, C = candidate, O = outlier, - = skipped."
+ << std::endl;
+ }
+ }
+
+ private:
+ std::ostream& out_;
+
+ // Text grid representation of matches. Rows correspond to "old" and columns
+ // correspond to "new".
+ std::vector<std::string> text_grid_;
+
+ // For each "new" element, distance of best match. -1 denotes no match.
+ std::vector<double> best_dist_;
+};
+
+} // namespace
+
+/******** HeuristicEnsembleMatcher ********/
+
+HeuristicEnsembleMatcher::HeuristicEnsembleMatcher(std::ostream* out)
+ : out_(out) {}
+
+HeuristicEnsembleMatcher::~HeuristicEnsembleMatcher() = default;
+
+bool HeuristicEnsembleMatcher::RunMatch(ConstBufferView old_image,
+ ConstBufferView new_image) {
+ DCHECK(matches_.empty());
+ LOG(INFO) << "Start matching.";
+
+ // Find all elements in "old" and "new".
+ absl::optional<std::vector<Element>> old_elements =
+ FindEmbeddedElements(old_image, "Old file",
+ base::BindRepeating(DetectElementFromDisassembler));
+ if (!old_elements.has_value())
+ return false;
+ absl::optional<std::vector<Element>> new_elements =
+ FindEmbeddedElements(new_image, "New file",
+ base::BindRepeating(DetectElementFromDisassembler));
+ if (!new_elements.has_value())
+ return false;
+
+ std::unique_ptr<MatchingInfoOut> info_out;
+ if (out_)
+ info_out = std::make_unique<MatchingInfoOutVerbose>(*out_);
+ else
+ info_out = std::make_unique<MatchingInfoOutTerse>();
+
+ const int num_new_elements = base::checked_cast<int>(new_elements->size());
+ const int num_old_elements = base::checked_cast<int>(old_elements->size());
+ info_out->InitSizes(num_old_elements, num_new_elements);
+
+ // For each "new" element, match it with the "old" element that's nearest to
+ // it, with distance determined by BinaryDataHistogram. The resulting
+ // "old"-"new" pairs are stored into |results|. Possibilities:
+ // - Type mismatch: No match.
+ // - UnsafeDifference() heuristics fail: No match.
+ // - Identical match: Skip "new" since this is a trivial case.
+ // - Non-identical match: Match "new" with "old" with min distance.
+ // - No match: Skip "new".
+ struct Results {
+ int iold;
+ int inew;
+ double dist;
+ };
+ std::vector<Results> results;
+
+ // Precompute histograms for "old" since they get reused.
+ std::vector<BinaryDataHistogram> old_his(num_old_elements);
+ for (int iold = 0; iold < num_old_elements; ++iold) {
+ ConstBufferView sub_image(old_image[(*old_elements)[iold]]);
+ old_his[iold].Compute(sub_image);
+ // ProgramDetector should have imposed minimal size limit to |sub_image|.
+ // Therefore resulting histogram are expected to be valid.
+ CHECK(old_his[iold].IsValid());
+ }
+
+ const int kUninitIold = num_old_elements;
+ for (int inew = 0; inew < num_new_elements; ++inew) {
+ const Element& cur_new_element = (*new_elements)[inew];
+ ConstBufferView cur_new_sub_image(new_image[cur_new_element.region()]);
+ BinaryDataHistogram new_his;
+ new_his.Compute(cur_new_sub_image);
+ CHECK(new_his.IsValid());
+
+ double best_dist = HUGE_VAL;
+ int best_iold = kUninitIold;
+ bool is_identical = false;
+
+ for (int iold = 0; iold < num_old_elements; ++iold) {
+ const Element& cur_old_element = (*old_elements)[iold];
+ if (cur_old_element.exe_type != cur_new_element.exe_type) {
+ info_out->DeclareTypeMismatch(iold, inew);
+ continue;
+ }
+ if (UnsafeDifference(cur_old_element, cur_new_element)) {
+ info_out->DeclareUnsafeDistance(iold, inew);
+ continue;
+ }
+ double dist = old_his[iold].Distance(new_his);
+ info_out->DeclareCandidate(iold, inew);
+ info_out->OutputCompare(cur_old_element, cur_new_element, dist);
+ if (best_dist > dist) { // Tie resolution: First-one, first-serve.
+ best_iold = iold;
+ best_dist = dist;
+ if (best_dist == 0) {
+ ConstBufferView sub_image(old_image[cur_old_element.region()]);
+ if (sub_image.equals(cur_new_sub_image)) {
+ is_identical = true;
+ break;
+ }
+ }
+ }
+ }
+
+ if (best_iold != kUninitIold) {
+ const Element& best_old_element = (*old_elements)[best_iold];
+ info_out->DeclareMatch(best_iold, inew, best_dist, is_identical);
+ if (is_identical) // Skip "new" if identical match is found.
+ ++num_identical_;
+ else
+ results.push_back({best_iold, inew, best_dist});
+ info_out->OutputMatch(best_old_element, cur_new_element, is_identical,
+ best_dist);
+ }
+ }
+
+ // Populate |matches_| from |result|. To reduce that chance of false-positive
+ // matches, statistics on dists are computed. If a match's |dist| is an
+ // outlier then it is rejected.
+ if (results.size() > 0) {
+ OutlierDetector detector;
+ for (const auto& result : results) {
+ if (result.dist > 0)
+ detector.Add(result.dist);
+ }
+ detector.Prepare();
+ info_out->OutputScores(detector.RenderStats());
+ for (const Results& result : results) {
+ if (detector.DecideOutlier(result.dist) > 0) {
+ info_out->DeclareOutlier(result.iold, result.inew);
+ } else {
+ matches_.push_back(
+ {(*old_elements)[result.iold], (*new_elements)[result.inew]});
+ }
+ }
+ info_out->OutputTextGrid();
+ }
+
+ Trim();
+ return true;
+}
+
+} // namespace zucchini
diff --git a/heuristic_ensemble_matcher.h b/heuristic_ensemble_matcher.h
new file mode 100644
index 0000000..ec40787
--- /dev/null
+++ b/heuristic_ensemble_matcher.h
@@ -0,0 +1,39 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_HEURISTIC_ENSEMBLE_MATCHER_H_
+#define COMPONENTS_ZUCCHINI_HEURISTIC_ENSEMBLE_MATCHER_H_
+
+#include <ostream>
+
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/ensemble_matcher.h"
+
+namespace zucchini {
+
+// An ensemble matcher that:
+// - Detects embedded elements in "old" and "new" archive files.
+// - Applies heuristics to create matched pairs.
+// It is desired to have matched pairs that:
+// - Have "reasonable" size difference (see UnsafeDifference() in the .cc file).
+// - Have "minimal distance" among other potential matched pairs.
+class HeuristicEnsembleMatcher : public EnsembleMatcher {
+ public:
+ explicit HeuristicEnsembleMatcher(std::ostream* out);
+ HeuristicEnsembleMatcher(const HeuristicEnsembleMatcher&) = delete;
+ const HeuristicEnsembleMatcher& operator=(const HeuristicEnsembleMatcher&) =
+ delete;
+ ~HeuristicEnsembleMatcher() override;
+
+ // EnsembleMatcher:
+ bool RunMatch(ConstBufferView old_image, ConstBufferView new_image) override;
+
+ private:
+ // Optional stream to print detailed information during matching.
+ std::ostream* out_ = nullptr;
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_HEURISTIC_ENSEMBLE_MATCHER_H_
diff --git a/image_index.cc b/image_index.cc
new file mode 100644
index 0000000..1efe5d8
--- /dev/null
+++ b/image_index.cc
@@ -0,0 +1,78 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/image_index.h"
+
+#include <algorithm>
+#include <utility>
+
+#include "components/zucchini/algorithm.h"
+#include "components/zucchini/disassembler.h"
+
+namespace zucchini {
+
+ImageIndex::ImageIndex(ConstBufferView image)
+ : image_(image), type_tags_(image.size(), kNoTypeTag) {}
+
+ImageIndex::ImageIndex(ImageIndex&&) = default;
+
+ImageIndex::~ImageIndex() = default;
+
+bool ImageIndex::Initialize(Disassembler* disasm) {
+ std::vector<ReferenceGroup> ref_groups = disasm->MakeReferenceGroups();
+ for (const auto& group : ref_groups) {
+ // Build pool-to-type mapping.
+ DCHECK_NE(kNoPoolTag, group.pool_tag());
+ TargetPool& target_pool = target_pools_[group.pool_tag()];
+ target_pool.AddType(group.type_tag());
+ target_pool.InsertTargets(std::move(*group.GetReader(disasm)));
+ }
+ for (const auto& group : ref_groups) {
+ // Find and store all references for each type, returns false on finding
+ // any overlap, to signal error.
+ if (!InsertReferences(group.traits(),
+ std::move(*group.GetReader(disasm)))) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool ImageIndex::IsToken(offset_t location) const {
+ TypeTag type = LookupType(location);
+
+ // |location| points into raw data.
+ if (type == kNoTypeTag)
+ return true;
+
+ // |location| points into a Reference.
+ Reference reference = refs(type).at(location);
+ // Only the first byte of a reference is a token.
+ return location == reference.location;
+}
+
+bool ImageIndex::InsertReferences(const ReferenceTypeTraits& traits,
+ ReferenceReader&& ref_reader) {
+ // Store ReferenceSet for current type (of |group|).
+ DCHECK_NE(kNoTypeTag, traits.type_tag);
+ auto result = reference_sets_.emplace(
+ traits.type_tag, ReferenceSet(traits, pool(traits.pool_tag)));
+ DCHECK(result.second);
+
+ result.first->second.InitReferences(std::move(ref_reader));
+ for (auto ref : reference_sets_.at(traits.type_tag)) {
+ DCHECK(RangeIsBounded(ref.location, traits.width, size()));
+ auto cur_type_tag = type_tags_.begin() + ref.location;
+
+ // Check for overlap with existing reference. If found, then invalidate.
+ if (std::any_of(cur_type_tag, cur_type_tag + traits.width,
+ [](TypeTag type) { return type != kNoTypeTag; })) {
+ return false;
+ }
+ std::fill(cur_type_tag, cur_type_tag + traits.width, traits.type_tag);
+ }
+ return true;
+}
+
+} // namespace zucchini
diff --git a/image_index.h b/image_index.h
new file mode 100644
index 0000000..b5acee1
--- /dev/null
+++ b/image_index.h
@@ -0,0 +1,116 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_IMAGE_INDEX_H_
+#define COMPONENTS_ZUCCHINI_IMAGE_INDEX_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <map>
+#include <vector>
+
+#include "base/check_op.h"
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/image_utils.h"
+#include "components/zucchini/reference_set.h"
+#include "components/zucchini/target_pool.h"
+
+namespace zucchini {
+
+class Disassembler;
+
+// A class that holds annotations of an image, allowing quick access to its raw
+// and reference content. The memory overhead of storing all references is
+// relatively high, so this is only used during patch generation.
+class ImageIndex {
+ public:
+ explicit ImageIndex(ConstBufferView image);
+ ImageIndex(const ImageIndex&) = delete;
+ ImageIndex(ImageIndex&&);
+ ~ImageIndex();
+
+ // Inserts all references read from |disasm|. This should be called exactly
+ // once. If overlap between any two references of any type is encountered,
+ // returns false and leaves the object in an invalid state. Otherwise,
+ // returns true.
+ // TODO(huangs): Refactor ReaderFactory and WriterFactory so
+ // |const Disassembler&| can be used here.
+ bool Initialize(Disassembler* disasm);
+
+ // Returns the array size needed to accommodate all reference type values.
+ size_t TypeCount() const {
+ if (reference_sets_.empty())
+ return 0U;
+ return reference_sets_.rbegin()->first.value() + 1; // Max key + 1.
+ }
+
+ // Returns the array size needed to accommodate all pool values.
+ size_t PoolCount() const {
+ if (target_pools_.empty())
+ return 0U;
+ return target_pools_.rbegin()->first.value() + 1; // Max key + 1.
+ }
+
+ // Returns true if |image_[location]| is either:
+ // - A raw value.
+ // - The first byte of a reference.
+ bool IsToken(offset_t location) const;
+
+ // Returns true if |image_[location]| is part of a reference.
+ bool IsReference(offset_t location) const {
+ return LookupType(location) != kNoTypeTag;
+ }
+
+ // Returns the type tag of the reference covering |location|, or kNoTypeTag if
+ // |location| is not part of a reference.
+ TypeTag LookupType(offset_t location) const {
+ DCHECK_LT(location, size());
+ return type_tags_[location];
+ }
+
+ // Returns the raw value at |location|.
+ uint8_t GetRawValue(offset_t location) const {
+ DCHECK_LT(location, size());
+ return image_[location];
+ }
+
+ const std::map<PoolTag, TargetPool>& target_pools() const {
+ return target_pools_;
+ }
+ const std::map<TypeTag, ReferenceSet>& reference_sets() const {
+ return reference_sets_;
+ }
+
+ const TargetPool& pool(PoolTag pool_tag) const {
+ return target_pools_.at(pool_tag);
+ }
+ const ReferenceSet& refs(TypeTag type_tag) const {
+ return reference_sets_.at(type_tag);
+ }
+
+ // Returns the size of the image.
+ size_t size() const { return image_.size(); }
+
+ private:
+ // Inserts to |*this| index, all references described by |traits| read from
+ // |ref_reader|, which gets consumed. This should be called exactly once for
+ // each reference type. If overlap between any two references of any type is
+ // encountered, returns false and leaves the object in an invalid state.
+ // Otherwise, returns true.
+ bool InsertReferences(const ReferenceTypeTraits& traits,
+ ReferenceReader&& ref_reader);
+
+ const ConstBufferView image_;
+
+ // Used for random access lookup of reference type, for each byte in |image_|.
+ std::vector<TypeTag> type_tags_;
+
+ std::map<PoolTag, TargetPool> target_pools_;
+ std::map<TypeTag, ReferenceSet> reference_sets_;
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_IMAGE_INDEX_H_
diff --git a/image_index_unittest.cc b/image_index_unittest.cc
new file mode 100644
index 0000000..cf6f8a7
--- /dev/null
+++ b/image_index_unittest.cc
@@ -0,0 +1,131 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/image_index.h"
+
+#include <stddef.h>
+
+#include <numeric>
+#include <vector>
+
+#include "base/test/gtest_util.h"
+#include "components/zucchini/image_utils.h"
+#include "components/zucchini/test_disassembler.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+class ImageIndexTest : public testing::Test {
+ protected:
+ ImageIndexTest()
+ : buffer_(20),
+ image_index_(ConstBufferView(buffer_.data(), buffer_.size())) {
+ std::iota(buffer_.begin(), buffer_.end(), 0);
+ }
+
+ void InitializeWithDefaultTestData() {
+ TestDisassembler disasm({2, TypeTag(0), PoolTag(0)},
+ {{1, 0}, {8, 1}, {10, 2}},
+ {4, TypeTag(1), PoolTag(0)}, {{3, 3}},
+ {3, TypeTag(2), PoolTag(1)}, {{12, 4}, {17, 5}});
+ EXPECT_TRUE(image_index_.Initialize(&disasm));
+ }
+
+ std::vector<uint8_t> buffer_;
+ ImageIndex image_index_;
+};
+
+TEST_F(ImageIndexTest, TypeAndPool) {
+ TestDisassembler disasm({2, TypeTag(0), PoolTag(0)}, {},
+ {4, TypeTag(1), PoolTag(0)}, {},
+ {3, TypeTag(2), PoolTag(1)}, {});
+ EXPECT_TRUE(image_index_.Initialize(&disasm));
+
+ EXPECT_EQ(3U, image_index_.TypeCount());
+ EXPECT_EQ(2U, image_index_.PoolCount());
+
+ EXPECT_EQ(TypeTag(0), image_index_.refs(TypeTag(0)).type_tag());
+ EXPECT_EQ(TypeTag(1), image_index_.refs(TypeTag(1)).type_tag());
+ EXPECT_EQ(TypeTag(2), image_index_.refs(TypeTag(2)).type_tag());
+
+ EXPECT_EQ(PoolTag(0), image_index_.refs(TypeTag(0)).pool_tag());
+ EXPECT_EQ(PoolTag(0), image_index_.refs(TypeTag(1)).pool_tag());
+ EXPECT_EQ(PoolTag(1), image_index_.refs(TypeTag(2)).pool_tag());
+}
+
+TEST_F(ImageIndexTest, InvalidInitialize1) {
+ // Overlap within the same group.
+ TestDisassembler disasm({2, TypeTag(0), PoolTag(0)}, {{1, 0}, {2, 0}},
+ {4, TypeTag(1), PoolTag(0)}, {},
+ {3, TypeTag(2), PoolTag(1)}, {});
+ EXPECT_FALSE(image_index_.Initialize(&disasm));
+}
+
+TEST_F(ImageIndexTest, InvalidInitialize2) {
+ // Overlap across different readers.
+ TestDisassembler disasm({2, TypeTag(0), PoolTag(0)},
+ {{1, 0}, {8, 1}, {10, 2}},
+ {4, TypeTag(1), PoolTag(0)}, {{3, 3}},
+ {3, TypeTag(2), PoolTag(1)}, {{11, 0}});
+ EXPECT_FALSE(image_index_.Initialize(&disasm));
+}
+
+TEST_F(ImageIndexTest, LookupType) {
+ InitializeWithDefaultTestData();
+
+ std::vector<int> expected = {
+ -1, // raw
+ 0, 0, // ref 0
+ 1, 1, 1, 1, // ref 1
+ -1, // raw
+ 0, 0, // ref 0
+ 0, 0, // ref 0
+ 2, 2, 2, // ref 2
+ -1, -1, // raw
+ 2, 2, 2, // ref 2
+ };
+
+ for (offset_t i = 0; i < image_index_.size(); ++i)
+ EXPECT_EQ(TypeTag(expected[i]), image_index_.LookupType(i));
+}
+
+TEST_F(ImageIndexTest, IsToken) {
+ InitializeWithDefaultTestData();
+
+ std::vector<bool> expected = {
+ 1, // raw
+ 1, 0, // ref 0
+ 1, 0, 0, 0, // ref 1
+ 1, // raw
+ 1, 0, // ref 0
+ 1, 0, // ref 0
+ 1, 0, 0, // ref 2
+ 1, 1, // raw
+ 1, 0, 0, // ref 2
+ };
+
+ for (offset_t i = 0; i < image_index_.size(); ++i)
+ EXPECT_EQ(expected[i], image_index_.IsToken(i));
+}
+
+TEST_F(ImageIndexTest, IsReference) {
+ InitializeWithDefaultTestData();
+
+ std::vector<bool> expected = {
+ 0, // raw
+ 1, 1, // ref 0
+ 1, 1, 1, 1, // ref 1
+ 0, // raw
+ 1, 1, // ref 0
+ 1, 1, // ref 0
+ 1, 1, 1, // ref 2
+ 0, 0, // raw
+ 1, 1, 1, // ref 2
+ };
+
+ for (offset_t i = 0; i < image_index_.size(); ++i)
+ EXPECT_EQ(expected[i], image_index_.IsReference(i));
+}
+
+} // namespace zucchini
diff --git a/image_utils.h b/image_utils.h
new file mode 100644
index 0000000..748e20b
--- /dev/null
+++ b/image_utils.h
@@ -0,0 +1,225 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_IMAGE_UTILS_H_
+#define COMPONENTS_ZUCCHINI_IMAGE_UTILS_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <string>
+
+#include "base/format_macros.h"
+#include "base/numerics/safe_conversions.h"
+#include "base/strings/stringprintf.h"
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/typed_value.h"
+#include "third_party/abseil-cpp/absl/types/optional.h"
+
+namespace zucchini {
+
+// offset_t is used to describe an offset in an image.
+// Files bigger than 4GB are not supported.
+using offset_t = uint32_t;
+// Divide by 2 since label marking uses the most significant bit.
+constexpr offset_t kOffsetBound = static_cast<offset_t>(-1) / 2;
+// Use 0xFFFFFFF*E*, since 0xFFFFFFF*F* is a sentinel value for Dex references.
+constexpr offset_t kInvalidOffset = static_cast<offset_t>(-2);
+
+// key_t is used to identify an offset in a table.
+using key_t = uint32_t;
+
+enum Bitness : uint8_t {
+ // The numerical values are intended to simplify WidthOf() below.
+ kBit32 = 4,
+ kBit64 = 8
+};
+
+inline uint32_t WidthOf(Bitness bitness) {
+ return static_cast<uint32_t>(bitness);
+}
+
+// Used to uniquely identify a reference type.
+// Strongly typed objects are used to avoid ambiguitees with PoolTag.
+struct TypeTag : public TypedValue<TypeTag, uint8_t> {
+ // inheriting constructor:
+ using TypedValue<TypeTag, uint8_t>::TypedValue;
+};
+
+// Used to uniquely identify a pool.
+struct PoolTag : public TypedValue<PoolTag, uint8_t> {
+ // inheriting constructor:
+ using TypedValue<PoolTag, uint8_t>::TypedValue;
+};
+
+constexpr TypeTag kNoTypeTag(0xFF); // Typically used to identify raw data.
+constexpr PoolTag kNoPoolTag(0xFF);
+
+// Specification of references in an image file.
+struct ReferenceTypeTraits {
+ constexpr ReferenceTypeTraits(offset_t width_in,
+ TypeTag type_tag_in,
+ PoolTag pool_tag_in)
+ : width(width_in), type_tag(type_tag_in), pool_tag(pool_tag_in) {}
+
+ // |width| specifies number of bytes covered by the reference's binary
+ // encoding.
+ const offset_t width;
+ // |type_tag| identifies the reference type being described.
+ const TypeTag type_tag;
+ // |pool_tag| identifies the pool this type belongs to.
+ const PoolTag pool_tag;
+};
+
+// There is no need to store |type| because references of the same type are
+// always aggregated into the same container, and so during iteration we'd have
+// |type| already.
+struct Reference {
+ offset_t location;
+ offset_t target;
+};
+
+inline bool operator==(const Reference& a, const Reference& b) {
+ return a.location == b.location && a.target == b.target;
+}
+
+// Interface for extracting References through member function GetNext().
+// This is used by Disassemblers to extract references from an image file.
+// Typically, a Reader lazily extracts values and does not hold any storage.
+class ReferenceReader {
+ public:
+ virtual ~ReferenceReader() = default;
+
+ // Returns the next available Reference, or nullopt_t if exhausted.
+ // Extracted References must be ordered by their location in the image.
+ virtual absl::optional<Reference> GetNext() = 0;
+};
+
+// Interface for writing References through member function
+// PutNext(reference). This is used by Disassemblers to write new References
+// in the image file.
+class ReferenceWriter {
+ public:
+ virtual ~ReferenceWriter() = default;
+
+ // Writes |reference| in the underlying image file. This operation always
+ // succeeds.
+ virtual void PutNext(Reference reference) = 0;
+};
+
+// An Equivalence is a block of length |length| that approximately match in
+// |old_image| at an offset of |src_offset| and in |new_image| at an offset of
+// |dst_offset|.
+struct Equivalence {
+ offset_t src_offset;
+ offset_t dst_offset;
+ offset_t length;
+
+ offset_t src_end() const { return src_offset + length; }
+ offset_t dst_end() const { return dst_offset + length; }
+};
+
+inline bool operator==(const Equivalence& a, const Equivalence& b) {
+ return a.src_offset == b.src_offset && a.dst_offset == b.dst_offset &&
+ a.length == b.length;
+}
+
+// Same as Equivalence, but with a similarity score. This is only used when
+// generating the patch.
+struct EquivalenceCandidate {
+ Equivalence eq;
+ double similarity;
+};
+
+template <size_t N>
+inline constexpr uint32_t ExeTypeToUint32(const char (&exe_type)[N]) {
+ static_assert(N == 5, "Expected ExeType of length 4 + 1 null byte.");
+ return (exe_type[3] << 24) | (exe_type[2] << 16) | (exe_type[1] << 8) |
+ exe_type[0];
+}
+
+// Enumerations for supported executables. Values in this enum must be distinct.
+// Once present, values should never be altered or removed to ensure backwards
+// compatibility and patch type collision avoidance.
+enum ExecutableType : uint32_t {
+ kExeTypeUnknown = UINT32_MAX,
+ kExeTypeNoOp = ExeTypeToUint32("NoOp"),
+ kExeTypeWin32X86 = ExeTypeToUint32("Px86"),
+ kExeTypeWin32X64 = ExeTypeToUint32("Px64"),
+ kExeTypeElfX86 = ExeTypeToUint32("Ex86"),
+ kExeTypeElfX64 = ExeTypeToUint32("Ex64"),
+ kExeTypeElfAArch32 = ExeTypeToUint32("EA32"),
+ kExeTypeElfAArch64 = ExeTypeToUint32("EA64"),
+ kExeTypeDex = ExeTypeToUint32("DEX "),
+ kExeTypeZtf = ExeTypeToUint32("ZTF "),
+};
+
+constexpr ExecutableType CastToExecutableType(uint32_t possible_exe_type) {
+ switch (static_cast<ExecutableType>(possible_exe_type)) {
+ case kExeTypeNoOp: // Falls through.
+ case kExeTypeWin32X86: // Falls through.
+ case kExeTypeWin32X64: // Falls through.
+ case kExeTypeElfX86: // Falls through.
+ case kExeTypeElfX64: // Falls through.
+ case kExeTypeElfAArch32: // Falls through.
+ case kExeTypeElfAArch64: // Falls through.
+ case kExeTypeDex: // Falls through.
+ case kExeTypeZtf: // Falls through.
+ case kExeTypeUnknown:
+ return static_cast<ExecutableType>(possible_exe_type);
+ default:
+ return kExeTypeUnknown;
+ }
+}
+
+inline std::string CastExecutableTypeToString(ExecutableType exe_type) {
+ uint32_t v = static_cast<uint32_t>(exe_type);
+ char result[] = {static_cast<char>(v), static_cast<char>(v >> 8),
+ static_cast<char>(v >> 16), static_cast<char>(v >> 24), 0};
+ return result;
+}
+
+// A region in an image with associated executable type |exe_type|. If
+// |exe_type == kExeTypeNoOp|, then the Element represents a region of raw data.
+struct Element : public BufferRegion {
+ Element() = default;
+ constexpr Element(const BufferRegion& region_in, ExecutableType exe_type_in)
+ : BufferRegion(region_in), exe_type(exe_type_in) {}
+ constexpr explicit Element(const BufferRegion& region_in)
+ : BufferRegion(region_in), exe_type(kExeTypeNoOp) {}
+
+ // Similar to lo() and hi(), but returns values in offset_t.
+ offset_t BeginOffset() const { return base::checked_cast<offset_t>(lo()); }
+ offset_t EndOffset() const { return base::checked_cast<offset_t>(hi()); }
+
+ BufferRegion region() const { return {offset, size}; }
+
+ friend bool operator==(const Element& a, const Element& b) {
+ return a.exe_type == b.exe_type && a.offset == b.offset && a.size == b.size;
+ }
+
+ ExecutableType exe_type;
+};
+
+// A matched pair of Elements.
+struct ElementMatch {
+ bool IsValid() const { return old_element.exe_type == new_element.exe_type; }
+ ExecutableType exe_type() const { return old_element.exe_type; }
+
+ // Represents match as "#+#=#+#", where "#" denotes the integers:
+ // [offset in "old", size in "old", offset in "new", size in "new"].
+ // Note that element type is omitted.
+ std::string ToString() const {
+ return base::StringPrintf("%" PRIuS "+%" PRIuS "=%" PRIuS "+%" PRIuS "",
+ old_element.offset, old_element.size,
+ new_element.offset, new_element.size);
+ }
+
+ Element old_element;
+ Element new_element;
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_IMAGE_UTILS_H_
diff --git a/image_utils_unittest.cc b/image_utils_unittest.cc
new file mode 100644
index 0000000..2cf6455
--- /dev/null
+++ b/image_utils_unittest.cc
@@ -0,0 +1,33 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/image_utils.h"
+
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+TEST(ImageUtilsTest, Bitness) {
+ EXPECT_EQ(4U, WidthOf(kBit32));
+ EXPECT_EQ(8U, WidthOf(kBit64));
+}
+
+TEST(ImageUtilsTest, CastExecutableTypeToString) {
+ EXPECT_EQ("NoOp", CastExecutableTypeToString(kExeTypeNoOp));
+ EXPECT_EQ("Px86", CastExecutableTypeToString(kExeTypeWin32X86));
+ EXPECT_EQ("EA64", CastExecutableTypeToString(kExeTypeElfAArch64));
+ EXPECT_EQ("DEX ", CastExecutableTypeToString(kExeTypeDex));
+}
+
+TEST(ImageUtilsTest, ElementMatchToString) {
+ constexpr ExecutableType kAnyType = kExeTypeWin32X86;
+ EXPECT_EQ("1+2=3+4",
+ (ElementMatch{{{1, 2}, kAnyType}, {{3, 4}, kAnyType}}).ToString());
+ EXPECT_EQ(
+ "1000000000+1=0+1000000000",
+ (ElementMatch{{{1000000000, 1}, kAnyType}, {{0, 1000000000}, kAnyType}})
+ .ToString());
+}
+
+} // namespace zucchini
diff --git a/imposed_ensemble_matcher.cc b/imposed_ensemble_matcher.cc
new file mode 100644
index 0000000..1c1301b
--- /dev/null
+++ b/imposed_ensemble_matcher.cc
@@ -0,0 +1,143 @@
+// Copyright 2018 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/imposed_ensemble_matcher.h"
+
+#include <algorithm>
+#include <sstream>
+#include <utility>
+
+#include "base/bind.h"
+#include "base/logging.h"
+#include "components/zucchini/io_utils.h"
+
+namespace zucchini {
+
+/******** ImposedMatchParser ********/
+
+ImposedMatchParser::ImposedMatchParser() = default;
+
+ImposedMatchParser::~ImposedMatchParser() = default;
+
+ImposedMatchParser::Status ImposedMatchParser::Parse(
+ std::string imposed_matches,
+ ConstBufferView old_image,
+ ConstBufferView new_image,
+ ElementDetector&& detector) {
+ CHECK(matches_.empty());
+ CHECK(bad_matches_.empty());
+
+ // Parse |imposed_matches| and check bounds.
+ std::istringstream iss(std::move(imposed_matches));
+ bool first = true;
+ iss.peek(); // Makes empty |iss| realize EOF is reached.
+ while (iss && !iss.eof()) {
+ // Eat delimiter.
+ if (first) {
+ first = false;
+ } else if (!(iss >> EatChar(','))) {
+ return kInvalidDelimiter;
+ }
+ // Extract parameters for one imposed match.
+ offset_t old_offset = 0U;
+ size_t old_size = 0U;
+ offset_t new_offset = 0U;
+ size_t new_size = 0U;
+ if (!(iss >> StrictUInt<offset_t>(old_offset) >> EatChar('+') >>
+ StrictUInt<size_t>(old_size) >> EatChar('=') >>
+ StrictUInt<offset_t>(new_offset) >> EatChar('+') >>
+ StrictUInt<size_t>(new_size))) {
+ return kParseError;
+ }
+ // Check bounds.
+ if (old_size == 0 || new_size == 0 ||
+ !old_image.covers({old_offset, old_size}) ||
+ !new_image.covers({new_offset, new_size})) {
+ return kOutOfBound;
+ }
+ matches_.push_back(
+ {{{old_offset, old_size}, kExeTypeUnknown}, // Assign type later.
+ {{new_offset, new_size}, kExeTypeUnknown}}); // Assign type later.
+ }
+ // Sort matches by "new" file offsets. This helps with overlap checks.
+ std::sort(matches_.begin(), matches_.end(),
+ [](const ElementMatch& match_a, const ElementMatch& match_b) {
+ return match_a.new_element.offset < match_b.new_element.offset;
+ });
+
+ // Check for overlaps in "new" file.
+ if (std::adjacent_find(
+ matches_.begin(), matches_.end(),
+ [](const ElementMatch& match1, const ElementMatch& match2) {
+ return match1.new_element.hi() > match2.new_element.lo();
+ }) != matches_.end()) {
+ return kOverlapInNew;
+ }
+
+ // Compute types and verify consistency. Remove identical matches and matches
+ // where any sub-image has an unknown type.
+ size_t write_idx = 0;
+ for (size_t read_idx = 0; read_idx < matches_.size(); ++read_idx) {
+ ConstBufferView old_sub_image(
+ old_image[matches_[read_idx].old_element.region()]);
+ ConstBufferView new_sub_image(
+ new_image[matches_[read_idx].new_element.region()]);
+ // Remove identical match.
+ if (old_sub_image.equals(new_sub_image)) {
+ ++num_identical_;
+ continue;
+ }
+ // Check executable types of sub-images.
+ absl::optional<Element> old_element = detector.Run(old_sub_image);
+ absl::optional<Element> new_element = detector.Run(new_sub_image);
+ if (!old_element || !new_element) {
+ // Skip unknown types, including those mixed with known types.
+ bad_matches_.push_back(matches_[read_idx]);
+ continue;
+ } else if (old_element->exe_type != new_element->exe_type) {
+ // Error if types are known, but inconsistent.
+ return kTypeMismatch;
+ }
+
+ // Keep match and remove gaps.
+ matches_[read_idx].old_element.exe_type = old_element->exe_type;
+ matches_[read_idx].new_element.exe_type = new_element->exe_type;
+ if (write_idx < read_idx)
+ matches_[write_idx] = matches_[read_idx];
+ ++write_idx;
+ }
+ matches_.resize(write_idx);
+ return kSuccess;
+}
+
+/******** ImposedEnsembleMatcher ********/
+
+ImposedEnsembleMatcher::ImposedEnsembleMatcher(
+ const std::string& imposed_matches)
+ : imposed_matches_(imposed_matches) {}
+
+ImposedEnsembleMatcher::~ImposedEnsembleMatcher() = default;
+
+bool ImposedEnsembleMatcher::RunMatch(ConstBufferView old_image,
+ ConstBufferView new_image) {
+ DCHECK(matches_.empty());
+ LOG(INFO) << "Start matching.";
+ ImposedMatchParser parser;
+ ImposedMatchParser::Status status =
+ parser.Parse(std::move(imposed_matches_), old_image, new_image,
+ base::BindRepeating(DetectElementFromDisassembler));
+ // Print all warnings first.
+ for (const ElementMatch& bad_match : *parser.mutable_bad_matches())
+ LOG(WARNING) << "Skipped match with unknown type: " << bad_match.ToString();
+ if (status != ImposedMatchParser::kSuccess) {
+ LOG(ERROR) << "Imposed match failed with error code " << status << ".";
+ return false;
+ }
+ num_identical_ = parser.num_identical();
+ matches_ = std::move(*parser.mutable_matches());
+ Trim();
+ return true;
+}
+
+} // namespace zucchini
diff --git a/imposed_ensemble_matcher.h b/imposed_ensemble_matcher.h
new file mode 100644
index 0000000..39b0df5
--- /dev/null
+++ b/imposed_ensemble_matcher.h
@@ -0,0 +1,83 @@
+// Copyright 2018 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_IMPOSED_ENSEMBLE_MATCHER_H_
+#define COMPONENTS_ZUCCHINI_IMPOSED_ENSEMBLE_MATCHER_H_
+
+#include <stddef.h>
+
+#include <string>
+#include <vector>
+
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/element_detection.h"
+#include "components/zucchini/ensemble_matcher.h"
+
+namespace zucchini {
+
+// A class to parse imposed match format, which is either an empty string (no
+// imposed patch), or a string formatted as:
+// "#+#=#+#,#+#=#+#,..." (e.g., "1+2=3+4", "1+2=3+4,5+6=7+8"),
+// where "#+#=#+#" encodes a match as 4 unsigned integers:
+// [offset in "old", size in "old", offset in "new", size in "new"].
+class ImposedMatchParser {
+ public:
+ enum Status {
+ kSuccess,
+ kInvalidDelimiter,
+ kParseError,
+ kOutOfBound,
+ kOverlapInNew,
+ kTypeMismatch,
+ };
+
+ ImposedMatchParser();
+ ImposedMatchParser(const ImposedMatchParser&) = delete;
+ const ImposedMatchParser& operator=(const ImposedMatchParser&) = delete;
+ ~ImposedMatchParser();
+
+ // Parses |imposed_matches| and writes the results to member variables.
+ // |old_image| and |new_image| are used for validation. Returns a Status value
+ // to signal success or various error modes. |detector| is used to validate
+ // Element types for matched pairs. This should only be called once for each
+ // instance.
+ Status Parse(std::string imposed_matches,
+ ConstBufferView old_image,
+ ConstBufferView new_image,
+ ElementDetector&& detector);
+
+ size_t num_identical() const { return num_identical_; }
+ std::vector<ElementMatch>* mutable_matches() { return &matches_; }
+ std::vector<ElementMatch>* mutable_bad_matches() { return &bad_matches_; }
+
+ private:
+ size_t num_identical_ = 0;
+ std::vector<ElementMatch> matches_;
+ // Stores "forgiven" bad matches, so the caller can impose matches for
+ // unsupported image types (which will simply be ignored). Note that imposing
+ // matches for known but incompatible image types would result in error.
+ std::vector<ElementMatch> bad_matches_;
+};
+
+// An ensemble matcher that parses a format string that describes matches.
+class ImposedEnsembleMatcher : public EnsembleMatcher {
+ public:
+ // |imposed_matches| specifies imposed maches, using a format described below.
+ // Validation is performed in RunMatch().
+ explicit ImposedEnsembleMatcher(const std::string& imposed_matches);
+ ImposedEnsembleMatcher(const ImposedEnsembleMatcher&) = delete;
+ const ImposedEnsembleMatcher& operator=(const ImposedEnsembleMatcher&) =
+ delete;
+ ~ImposedEnsembleMatcher() override;
+
+ // EnsembleMatcher:
+ bool RunMatch(ConstBufferView old_image, ConstBufferView new_image) override;
+
+ private:
+ const std::string imposed_matches_;
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_IMPOSED_ENSEMBLE_MATCHER_H_
diff --git a/imposed_ensemble_matcher_unittest.cc b/imposed_ensemble_matcher_unittest.cc
new file mode 100644
index 0000000..9a6dc7d
--- /dev/null
+++ b/imposed_ensemble_matcher_unittest.cc
@@ -0,0 +1,214 @@
+// Copyright 2018 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "components/zucchini/imposed_ensemble_matcher.h"
+
+#include "base/bind.h"
+#include "base/callback_helpers.h"
+#include "base/check_op.h"
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/disassembler.h"
+#include "components/zucchini/element_detection.h"
+#include "components/zucchini/image_utils.h"
+#include "testing/gtest/include/gtest/gtest.h"
+#include "third_party/abseil-cpp/absl/types/optional.h"
+
+namespace zucchini {
+
+namespace {
+
+// This test uses a mock archive format where regions are determined by their
+// consecutive byte values rather than parsing real executables. In fact, since
+// elements are imposed, only the first byte of the element is used to specify
+// executable type of the mock data:
+// - 'W' and 'w' specify kExeTypeWin32X86.
+// - 'E' and 'e' specify kExeTypeElfX86.
+// - Everything else specify kExeTypeUnknown.
+class TestElementDetector {
+ public:
+ TestElementDetector() {}
+
+ absl::optional<Element> Run(ConstBufferView image) const {
+ DCHECK_GT(image.size(), 0U);
+ char first_char = *image.begin();
+ if (first_char == 'W' || first_char == 'w')
+ return Element(image.local_region(), kExeTypeWin32X86);
+ if (first_char == 'E' || first_char == 'e')
+ return Element(image.local_region(), kExeTypeElfX86);
+ return absl::nullopt;
+ }
+};
+
+} // namespace
+
+TEST(ImposedMatchParserTest, ImposedMatchParser) {
+ std::vector<uint8_t> old_data;
+ std::vector<uint8_t> new_data;
+ auto populate = [](const std::string& s, std::vector<uint8_t>* data) {
+ for (char ch : s)
+ data->push_back(static_cast<uint8_t>(ch));
+ };
+ // Pos: 11111111
+ // 012345678901234567
+ populate("1WW222EEEE", &old_data);
+ populate("33eee2222222wwww44", &new_data);
+
+ ConstBufferView old_image(&old_data[0], old_data.size());
+ ConstBufferView new_image(&new_data[0], new_data.size());
+
+ TestElementDetector detector;
+
+ // Reusable output values.
+ std::string prev_imposed_matches;
+ ImposedMatchParser::Status status;
+ size_t num_identical;
+ std::vector<ElementMatch> matches;
+ std::vector<ElementMatch> bad_matches;
+
+ auto run_test = [&](const std::string& imposed_matches) -> bool {
+ prev_imposed_matches = imposed_matches;
+ status = ImposedMatchParser::kSuccess;
+ num_identical = 0;
+ matches.clear();
+ bad_matches.clear();
+ ImposedMatchParser parser;
+ status = parser.Parse(imposed_matches, old_image, new_image,
+ base::BindRepeating(&TestElementDetector::Run,
+ base::Unretained(&detector)));
+ num_identical = parser.num_identical();
+ matches = std::move(*parser.mutable_matches());
+ bad_matches = std::move(*parser.mutable_bad_matches());
+ return status == ImposedMatchParser::kSuccess;
+ };
+
+ auto run_check = [&](const ElementMatch& match, ExecutableType exe_type,
+ offset_t old_offset, size_t old_size,
+ offset_t new_offset, size_t new_size) {
+ EXPECT_EQ(exe_type, match.exe_type()) << prev_imposed_matches;
+ EXPECT_EQ(exe_type, match.old_element.exe_type) << prev_imposed_matches;
+ EXPECT_EQ(old_offset, match.old_element.offset) << prev_imposed_matches;
+ EXPECT_EQ(old_size, match.old_element.size) << prev_imposed_matches;
+ EXPECT_EQ(exe_type, match.new_element.exe_type) << prev_imposed_matches;
+ EXPECT_EQ(new_offset, match.new_element.offset) << prev_imposed_matches;
+ EXPECT_EQ(new_size, match.new_element.size) << prev_imposed_matches;
+ };
+
+ // Empty string: Vacuous but valid.
+ EXPECT_TRUE(run_test(""));
+ EXPECT_EQ(0U, num_identical);
+ EXPECT_EQ(0U, matches.size());
+ EXPECT_EQ(0U, bad_matches.size());
+
+ // Full matches. Different permutations give same result.
+ for (const std::string& imposed_matches :
+ {"1+2=12+4,4+2=5+2,6+4=2+3", "1+2=12+4,6+4=2+3,4+2=5+2",
+ "4+2=5+2,1+2=12+4,6+4=2+3", "4+2=5+2,6+4=2+3,1+2=12+4",
+ "6+4=2+3,1+2=12+4,4+2=5+2", "6+4=2+3,1+2=12+4,4+2=5+2"}) {
+ EXPECT_TRUE(run_test(imposed_matches));
+ EXPECT_EQ(1U, num_identical); // "4+2=5+2"
+ EXPECT_EQ(2U, matches.size());
+ // Results are sorted by "new" offsets.
+ run_check(matches[0], kExeTypeElfX86, 6, 4, 2, 3);
+ run_check(matches[1], kExeTypeWin32X86, 1, 2, 12, 4);
+ EXPECT_EQ(0U, bad_matches.size());
+ }
+
+ // Single subregion match.
+ EXPECT_TRUE(run_test("1+2=12+4"));
+ EXPECT_EQ(0U, num_identical);
+ EXPECT_EQ(1U, matches.size());
+ run_check(matches[0], kExeTypeWin32X86, 1, 2, 12, 4);
+ EXPECT_EQ(0U, bad_matches.size());
+
+ // Single subregion match. We're lax with redundant 0.
+ EXPECT_TRUE(run_test("6+04=02+10"));
+ EXPECT_EQ(0U, num_identical);
+ EXPECT_EQ(1U, matches.size());
+ run_check(matches[0], kExeTypeElfX86, 6, 4, 2, 10);
+ EXPECT_EQ(0U, bad_matches.size());
+
+ // Successive elements, no overlap.
+ EXPECT_TRUE(run_test("1+1=12+1,2+1=13+1"));
+ EXPECT_EQ(0U, num_identical);
+ EXPECT_EQ(2U, matches.size());
+ run_check(matches[0], kExeTypeWin32X86, 1, 1, 12, 1);
+ run_check(matches[1], kExeTypeWin32X86, 2, 1, 13, 1);
+ EXPECT_EQ(0U, bad_matches.size());
+
+ // Overlap in "old" file is okay.
+ EXPECT_TRUE(run_test("1+2=12+2,1+2=14+2"));
+ EXPECT_EQ(0U, num_identical);
+ EXPECT_EQ(2U, matches.size());
+ run_check(matches[0], kExeTypeWin32X86, 1, 2, 12, 2);
+ run_check(matches[1], kExeTypeWin32X86, 1, 2, 14, 2);
+ EXPECT_EQ(0U, bad_matches.size());
+
+ // Entire files: Have unknown type, so are recognized as such, and ignored.
+ EXPECT_TRUE(run_test("0+10=0+18"));
+ EXPECT_EQ(0U, num_identical);
+ EXPECT_EQ(0U, matches.size());
+ EXPECT_EQ(1U, bad_matches.size());
+ run_check(bad_matches[0], kExeTypeUnknown, 0, 10, 0, 18);
+
+ // Forgive matches that mix known type with unknown type.
+ EXPECT_TRUE(run_test("1+2=0+18"));
+ EXPECT_EQ(0U, num_identical);
+ EXPECT_EQ(0U, matches.size());
+ EXPECT_EQ(1U, bad_matches.size());
+ run_check(bad_matches[0], kExeTypeUnknown, 1, 2, 0, 18);
+
+ EXPECT_TRUE(run_test("0+10=12+4"));
+ EXPECT_EQ(0U, num_identical);
+ EXPECT_EQ(0U, matches.size());
+ EXPECT_EQ(1U, bad_matches.size());
+ run_check(bad_matches[0], kExeTypeUnknown, 0, 10, 12, 4);
+
+ // Test invalid delimiter.
+ for (const std::string& imposed_matches :
+ {"1+2=12+4,4+2=5+2x", "1+2=12+4 4+2=5+2", "1+2=12+4,4+2=5+2 ",
+ "1+2=12+4 "}) {
+ EXPECT_FALSE(run_test(imposed_matches));
+ EXPECT_EQ(ImposedMatchParser::kInvalidDelimiter, status);
+ }
+
+ // Test parse errors, including uint32_t overflow.
+ for (const std::string& imposed_matches :
+ {"x1+2=12+4,4+2=5+2,6+4=2+3", "x1+2=12+4,4+2=5+2,6+4=2+3x", ",", " ",
+ "+2=12+4", "1+2+12+4", "1=2+12+4", " 1+2=12+4", "1+2= 12+4", "1", "1+2",
+ "1+2=", "1+2=12", "1+2=12+", "4294967296+2=12+4"}) {
+ EXPECT_FALSE(run_test(imposed_matches));
+ EXPECT_EQ(ImposedMatchParser::kParseError, status);
+ }
+
+ // Test bound errors, include 0-size.
+ for (const std::string& imposed_matches :
+ {"1+10=12+4", "1+2=12+7", "0+11=0+18", "0+12=0+17", "10+1=0+18",
+ "0+10=18+1", "0+0=0+18", "0+10=0+0", "1000000000+1=0+1000000000"}) {
+ EXPECT_FALSE(run_test(imposed_matches));
+ EXPECT_EQ(ImposedMatchParser::kOutOfBound, status);
+ }
+
+ // Test overlap errors. Matches that get ignored are still tested.
+ for (const std::string& imposed_matches :
+ {"1+2=12+4,4+2=5+2,6+4=2+4", "0+10=0+18,1+2=12+4", "6+4=2+10,3+2=5+2"}) {
+ EXPECT_FALSE(run_test(imposed_matches));
+ EXPECT_EQ(ImposedMatchParser::kOverlapInNew, status);
+ }
+
+ // Test type mismatch errors.
+ EXPECT_FALSE(run_test("1+2=2+3"));
+ EXPECT_EQ(ImposedMatchParser::kTypeMismatch, status);
+
+ EXPECT_FALSE(run_test("6+4=12+4"));
+ EXPECT_EQ(ImposedMatchParser::kTypeMismatch, status);
+}
+
+} // namespace zucchini
diff --git a/integration_test.cc b/integration_test.cc
new file mode 100644
index 0000000..1baccc3
--- /dev/null
+++ b/integration_test.cc
@@ -0,0 +1,103 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <stdint.h>
+
+#include <algorithm>
+#include <string>
+#include <vector>
+
+#include "base/files/file_path.h"
+#include "base/files/memory_mapped_file.h"
+#include "base/path_service.h"
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/patch_reader.h"
+#include "components/zucchini/patch_writer.h"
+#include "components/zucchini/zucchini.h"
+#include "testing/gtest/include/gtest/gtest.h"
+#include "third_party/abseil-cpp/absl/types/optional.h"
+
+namespace zucchini {
+
+base::FilePath MakeTestPath(const std::string& filename) {
+ base::FilePath path;
+ DCHECK(base::PathService::Get(base::DIR_SOURCE_ROOT, &path));
+ return path.AppendASCII("components")
+ .AppendASCII("zucchini")
+ .AppendASCII("testdata")
+ .AppendASCII(filename);
+}
+
+void TestGenApply(const std::string& old_filename,
+ const std::string& new_filename,
+ bool raw) {
+ base::FilePath old_path = MakeTestPath(old_filename);
+ base::FilePath new_path = MakeTestPath(new_filename);
+
+ base::MemoryMappedFile old_file;
+ ASSERT_TRUE(old_file.Initialize(old_path));
+
+ base::MemoryMappedFile new_file;
+ ASSERT_TRUE(new_file.Initialize(new_path));
+
+ ConstBufferView old_region(old_file.data(), old_file.length());
+ ConstBufferView new_region(new_file.data(), new_file.length());
+
+ EnsemblePatchWriter patch_writer(old_region, new_region);
+
+ // Generate patch from "old" to "new".
+ ASSERT_EQ(status::kStatusSuccess,
+ raw ? GenerateBufferRaw(old_region, new_region, &patch_writer)
+ : GenerateBuffer(old_region, new_region, &patch_writer));
+
+ size_t patch_size = patch_writer.SerializedSize();
+ EXPECT_GE(patch_size, 80U); // Minimum size is empty patch.
+ // TODO(etiennep): Add check on maximum expected size.
+
+ std::vector<uint8_t> patch_buffer(patch_writer.SerializedSize());
+ patch_writer.SerializeInto({patch_buffer.data(), patch_buffer.size()});
+
+ // Read back generated patch.
+ absl::optional<EnsemblePatchReader> patch_reader =
+ EnsemblePatchReader::Create({patch_buffer.data(), patch_buffer.size()});
+ ASSERT_TRUE(patch_reader.has_value());
+
+ // Check basic properties.
+ EXPECT_TRUE(patch_reader->CheckOldFile(old_region));
+ EXPECT_TRUE(patch_reader->CheckNewFile(new_region));
+ EXPECT_EQ(old_file.length(), patch_reader->header().old_size);
+ // If new_size doesn't match expectation, the function is aborted.
+ ASSERT_EQ(new_file.length(), patch_reader->header().new_size);
+
+ // Apply patch to "old" to get "patched new", ensure it's identical to "new".
+ std::vector<uint8_t> patched_new_buffer(new_region.size());
+ ASSERT_EQ(status::kStatusSuccess, ApplyBuffer(old_region, *patch_reader,
+ {patched_new_buffer.data(),
+ patched_new_buffer.size()}));
+
+ // Note that |new_region| and |patched_new_buffer| are the same size.
+ EXPECT_TRUE(std::equal(new_region.begin(), new_region.end(),
+ patched_new_buffer.begin()));
+}
+
+TEST(EndToEndTest, GenApplyRaw) {
+ TestGenApply("setup1.exe", "setup2.exe", true);
+ TestGenApply("chrome64_1.exe", "chrome64_2.exe", true);
+}
+
+TEST(EndToEndTest, GenApplyIdentity) {
+ TestGenApply("setup1.exe", "setup1.exe", false);
+}
+
+TEST(EndToEndTest, GenApplySimple) {
+ TestGenApply("setup1.exe", "setup2.exe", false);
+ TestGenApply("setup2.exe", "setup1.exe", false);
+ TestGenApply("chrome64_1.exe", "chrome64_2.exe", false);
+}
+
+TEST(EndToEndTest, GenApplyCross) {
+ TestGenApply("setup1.exe", "chrome64_1.exe", false);
+}
+
+} // namespace zucchini
diff --git a/io_utils.cc b/io_utils.cc
new file mode 100644
index 0000000..aa493d0
--- /dev/null
+++ b/io_utils.cc
@@ -0,0 +1,52 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/io_utils.h"
+
+#include <iostream>
+
+namespace zucchini {
+
+/******** LimitedOutputStream::StreamBuf ********/
+
+LimitedOutputStream::StreamBuf::StreamBuf(std::ostream& os, int limit)
+ : os_(os), limit_(limit) {}
+
+LimitedOutputStream::StreamBuf::~StreamBuf() {
+ // Display warning in case we forget to flush data with std::endl.
+ if (!str().empty()) {
+ std::cerr << "Warning: LimitedOutputStream has " << str().length()
+ << " bytes of unflushed output." << std::endl;
+ }
+}
+
+int LimitedOutputStream::StreamBuf::sync() {
+ if (full()) {
+ str("");
+ return 0;
+ }
+ os_ << str();
+ str("");
+ if (++counter_ >= limit_)
+ os_ << "(Additional output suppressed)\n";
+ os_.flush();
+ return 0;
+}
+
+/******** LimitedOutputStream ********/
+
+LimitedOutputStream::LimitedOutputStream(std::ostream& os, int limit)
+ : std::ostream(&buf_), buf_(os, limit) {}
+
+/******** PrefixSep ********/
+
+std::ostream& operator<<(std::ostream& ostr, PrefixSep& obj) {
+ if (obj.first_)
+ obj.first_ = false;
+ else
+ ostr << obj.sep_str_;
+ return ostr;
+}
+
+} // namespace zucchini
diff --git a/io_utils.h b/io_utils.h
new file mode 100644
index 0000000..63eeec8
--- /dev/null
+++ b/io_utils.h
@@ -0,0 +1,144 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_IO_UTILS_H_
+#define COMPONENTS_ZUCCHINI_IO_UTILS_H_
+
+#include <stdint.h>
+
+#include <cctype>
+#include <istream>
+#include <ostream>
+#include <sstream>
+#include <string>
+
+namespace zucchini {
+
+// An std::ostream wrapper that that limits number of std::endl lines to output,
+// useful for preventing excessive debug message output. Usage requires some
+// work by the caller. Sample:
+// static LimitedOutputStream los(std::cerr, 10);
+// if (!los.full()) {
+// ... // Prepare message. Block may be skipped so don't do other work!
+// los << message;
+// los << std::endl; // Important!
+// }
+class LimitedOutputStream : public std::ostream {
+ private:
+ class StreamBuf : public std::stringbuf {
+ public:
+ StreamBuf(std::ostream& os, int limit);
+ ~StreamBuf() override;
+
+ int sync() override;
+ bool full() const { return counter_ >= limit_; }
+
+ private:
+ std::ostream& os_;
+ const int limit_;
+ int counter_ = 0;
+ };
+
+ public:
+ LimitedOutputStream(std::ostream& os, int limit);
+ LimitedOutputStream(const LimitedOutputStream&) = delete;
+ const LimitedOutputStream& operator=(const LimitedOutputStream&) = delete;
+ bool full() const { return buf_.full(); }
+
+ private:
+ StreamBuf buf_;
+};
+
+// A class to render hexadecimal numbers for std::ostream with 0-padding. This
+// is more concise and flexible than stateful STL manipulator alternatives; so:
+// std::ios old_fmt(nullptr);
+// old_fmt.copyfmt(std::cout);
+// std::cout << std::uppercase << std::hex;
+// std::cout << std::setfill('0') << std::setw(8) << int_data << std::endl;
+// std::cout.copyfmt(old_fmt);
+// can be expressed as:
+// std::cout << AxHex<8>(int_data) << std::endl;
+template <int N, typename T = uint32_t>
+struct AsHex {
+ explicit AsHex(T value_in) : value(value_in) {}
+ T value;
+};
+
+template <int N, typename T>
+std::ostream& operator<<(std::ostream& os, const AsHex<N, T>& as_hex) {
+ char buf[N + 1];
+ buf[N] = '\0';
+ T value = as_hex.value;
+ for (int i = N - 1; i >= 0; --i, value >>= 4)
+ buf[i] = "0123456789ABCDEF"[static_cast<int>(value & 0x0F)];
+ if (value)
+ os << "..."; // To indicate data truncation, or negative values.
+ os << buf;
+ return os;
+}
+
+// An output manipulator to simplify printing list separators. Sample usage:
+// PrefixSep sep(",");
+// for (int i : {3, 1, 4, 1, 5, 9})
+// std::cout << sep << i;
+// std::cout << std::endl; // Outputs "3,1,4,1,5,9\n".
+class PrefixSep {
+ public:
+ explicit PrefixSep(const std::string& sep_str) : sep_str_(sep_str) {}
+ PrefixSep(const PrefixSep&) = delete;
+ const PrefixSep& operator=(const PrefixSep&) = delete;
+
+ friend std::ostream& operator<<(std::ostream& ostr, PrefixSep& obj);
+
+ private:
+ std::string sep_str_;
+ bool first_ = true;
+};
+
+// An input manipulator that dictates the expected next character in
+// |std::istream|, and invalidates the stream if expectation is not met.
+class EatChar {
+ public:
+ explicit EatChar(char ch) : ch_(ch) {}
+ EatChar(const EatChar&) = delete;
+ const EatChar& operator=(const EatChar&) = delete;
+
+ friend inline std::istream& operator>>(std::istream& istr,
+ const EatChar& obj) {
+ if (!istr.fail() && istr.get() != obj.ch_)
+ istr.setstate(std::ios_base::failbit);
+ return istr;
+ }
+
+ private:
+ char ch_;
+};
+
+// An input manipulator that reads an unsigned integer from |std::istream|,
+// and invalidates the stream on failure. Intolerant of leading white spaces,
+template <typename T>
+class StrictUInt {
+ public:
+ explicit StrictUInt(T& var) : var_(var) {}
+ StrictUInt(const StrictUInt&) = default;
+
+ friend std::istream& operator>>(std::istream& istr, StrictUInt<T> obj) {
+ if (!istr.fail() && !::isdigit(istr.peek())) {
+ istr.setstate(std::ios_base::failbit);
+ return istr;
+ }
+ return istr >> obj.var_;
+ }
+
+ private:
+ T& var_;
+};
+
+// Stub out uint8_t: istream treats it as char, and value won't be read as int!
+template <>
+struct StrictUInt<uint8_t> {};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_IO_UTILS_H_
diff --git a/io_utils_unittest.cc b/io_utils_unittest.cc
new file mode 100644
index 0000000..521e7ce
--- /dev/null
+++ b/io_utils_unittest.cc
@@ -0,0 +1,160 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/io_utils.h"
+
+#include <stdint.h>
+
+#include <sstream>
+#include <string>
+
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+TEST(IOUtilsTest, LimitedOutputStream) {
+ std::ostringstream oss;
+ LimitedOutputStream los(oss, 3);
+ EXPECT_FALSE(los.full());
+ EXPECT_EQ("", oss.str());
+ // Line 1.
+ los << "a" << 1 << "b" << 2 << "c" << 3 << std::endl;
+ EXPECT_FALSE(los.full());
+ EXPECT_EQ("a1b2c3\n", oss.str());
+ // Line 2.
+ oss.str("");
+ los << "\r\r\n\n" << std::endl; // Manual new lines don't count.
+ EXPECT_FALSE(los.full());
+ EXPECT_EQ("\r\r\n\n\n", oss.str());
+ // Line 3.
+ oss.str("");
+ los << "blah" << 137;
+ EXPECT_FALSE(los.full());
+ los << std::endl;
+ EXPECT_TRUE(los.full());
+ EXPECT_EQ("blah137\n(Additional output suppressed)\n", oss.str());
+ // Not testing adding more lines: the behavior is undefined since we rely on
+ // caller suppressing output if |los.full()| is true.
+}
+
+TEST(IOUtilsTest, AsHex) {
+ std::ostringstream oss;
+ // Helper for single-line tests. Eats dummy std::ostream& from operator<<().
+ auto extract = [&oss](std::ostream&) -> std::string {
+ std::string ret = oss.str();
+ oss.str("");
+ return ret;
+ };
+
+ EXPECT_EQ("00000000", extract(oss << AsHex<8>(0)));
+ EXPECT_EQ("12345678", extract(oss << AsHex<8>(0x12345678U)));
+ EXPECT_EQ("9ABCDEF0", extract(oss << AsHex<8>(0x9ABCDEF0U)));
+ EXPECT_EQ("(00000064)", extract(oss << "(" << AsHex<8>(100) << ")"));
+ EXPECT_EQ("00FFFF", extract(oss << AsHex<6>(0xFFFFU)));
+ EXPECT_EQ("FFFF", extract(oss << AsHex<4>(0xFFFFU)));
+ EXPECT_EQ("...FF", extract(oss << AsHex<2>(0xFFFFU)));
+ EXPECT_EQ("...00", extract(oss << AsHex<2>(0x100U)));
+ EXPECT_EQ("FF\n", extract(oss << AsHex<2>(0xFFU) << std::endl));
+ EXPECT_EQ("132457689BACDEF0",
+ extract(oss << AsHex<16, uint64_t>(0x132457689BACDEF0LLU)));
+ EXPECT_EQ("000000000001", extract(oss << AsHex<12, uint8_t>(1)));
+ EXPECT_EQ("00000089", extract(oss << AsHex<8, int32_t>(137)));
+ EXPECT_EQ("...FFFFFFFF", extract(oss << AsHex<8, int32_t>(-1)));
+ EXPECT_EQ("7FFF", extract(oss << AsHex<4, int16_t>(0x7FFFU)));
+ EXPECT_EQ("...8000", extract(oss << AsHex<4, int16_t>(0x8000U)));
+ EXPECT_EQ("8000", extract(oss << AsHex<4, uint16_t>(0x8000U)));
+}
+
+TEST(IOUtilsTest, PrefixSep) {
+ std::ostringstream oss;
+ PrefixSep sep(",");
+ oss << sep << 3;
+ EXPECT_EQ("3", oss.str());
+ oss << sep << 1;
+ EXPECT_EQ("3,1", oss.str());
+ oss << sep << 4 << sep << 1 << sep << "59";
+ EXPECT_EQ("3,1,4,1,59", oss.str());
+}
+
+TEST(IOUtilsTest, PrefixSepAlt) {
+ std::ostringstream oss;
+ PrefixSep sep(" ");
+ oss << sep << 3;
+ EXPECT_EQ("3", oss.str());
+ oss << sep << 1;
+ EXPECT_EQ("3 1", oss.str());
+ oss << sep << 4 << sep << 1 << sep << "59";
+ EXPECT_EQ("3 1 4 1 59", oss.str());
+}
+
+TEST(IOUtilsTest, EatChar) {
+ std::istringstream main_iss;
+ // Helper for single-line tests.
+ auto iss = [&main_iss](const std::string s) -> std::istringstream& {
+ main_iss.clear();
+ main_iss.str(s);
+ return main_iss;
+ };
+
+ EXPECT_TRUE(iss("a,1") >> EatChar('a') >> EatChar(',') >> EatChar('1'));
+ EXPECT_FALSE(iss("a,a") >> EatChar('a') >> EatChar(',') >> EatChar('1'));
+ EXPECT_FALSE(iss("a") >> EatChar('a') >> EatChar(',') >> EatChar('1'));
+ EXPECT_FALSE(iss("x") >> EatChar('X'));
+ EXPECT_TRUE(iss("_\n") >> EatChar('_') >> EatChar('\n'));
+}
+
+TEST(IOUtilsTest, StrictUInt) {
+ std::istringstream main_iss;
+ // Helper for single-line tests.
+ auto iss = [&main_iss](const std::string& s) -> std::istringstream& {
+ main_iss.clear();
+ main_iss.str(s);
+ return main_iss;
+ };
+
+ uint32_t u32 = 0;
+ EXPECT_TRUE(iss("1234") >> StrictUInt<uint32_t>(u32));
+ EXPECT_EQ(uint32_t(1234), u32);
+ EXPECT_TRUE(iss("001234") >> StrictUInt<uint32_t>(u32));
+ EXPECT_EQ(uint32_t(1234), u32);
+ EXPECT_FALSE(iss("blahblah") >> StrictUInt<uint32_t>(u32));
+ EXPECT_EQ(uint32_t(1234), u32); // No overwrite on failure.
+ EXPECT_TRUE(iss("137suffix") >> StrictUInt<uint32_t>(u32));
+ EXPECT_EQ(uint32_t(137), u32);
+ EXPECT_FALSE(iss(" 1234") >> StrictUInt<uint32_t>(u32));
+ EXPECT_FALSE(iss("-1234") >> StrictUInt<uint32_t>(u32));
+
+ uint16_t u16 = 0;
+ EXPECT_TRUE(iss("65535") >> StrictUInt<uint16_t>(u16));
+ EXPECT_EQ(uint16_t(65535), u16);
+ EXPECT_FALSE(iss("65536") >> StrictUInt<uint16_t>(u16)); // Overflow.
+
+ uint64_t u64 = 0;
+ EXPECT_TRUE(iss("1000000000001") >> StrictUInt<uint64_t>(u64));
+ EXPECT_EQ(uint64_t(1000000000001LL), u64);
+
+ // uint8_t is stubbed out, so no tests for it.
+}
+
+TEST(IOUtilsTest, ParseSimpleEquations) {
+ std::istringstream iss("123+456=579,4-3=1");
+ uint32_t a = 0;
+ uint32_t b = 0;
+ uint32_t c = 0;
+ EXPECT_TRUE(iss >> StrictUInt<uint32_t>(a) >> EatChar('+') >>
+ StrictUInt<uint32_t>(b) >> EatChar('=') >>
+ StrictUInt<uint32_t>(c));
+ EXPECT_EQ(uint32_t(123), a);
+ EXPECT_EQ(uint32_t(456), b);
+ EXPECT_EQ(uint32_t(579), c);
+ EXPECT_TRUE(iss >> EatChar(','));
+ EXPECT_TRUE(iss >> StrictUInt<uint32_t>(a) >> EatChar('-') >>
+ StrictUInt<uint32_t>(b) >> EatChar('=') >>
+ StrictUInt<uint32_t>(c));
+ EXPECT_EQ(uint32_t(4), a);
+ EXPECT_EQ(uint32_t(3), b);
+ EXPECT_EQ(uint32_t(1), c);
+}
+
+} // namespace zucchini
diff --git a/main_utils.cc b/main_utils.cc
new file mode 100644
index 0000000..8c47c91
--- /dev/null
+++ b/main_utils.cc
@@ -0,0 +1,255 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/main_utils.h"
+
+#include <stddef.h>
+
+#include <memory>
+#include <ostream>
+#include <string>
+#include <vector>
+
+#include "base/command_line.h"
+#include "base/files/file_path.h"
+#include "base/files/file_util.h"
+#include "base/logging.h"
+#include "base/process/process_handle.h"
+#include "base/strings/string_number_conversions.h"
+#include "base/strings/string_split.h"
+#include "base/strings/string_util.h"
+#include "base/time/time.h"
+#include "build/build_config.h"
+#include "components/zucchini/io_utils.h"
+#include "components/zucchini/zucchini_commands.h"
+
+#if defined(OS_WIN)
+#include <windows.h> // This include must come first.
+
+#include <psapi.h>
+#endif
+
+namespace {
+
+/******** Command ********/
+
+// Specifications for a Zucchini command.
+struct Command {
+ constexpr Command(const char* name_in,
+ const char* usage_in,
+ int num_args_in,
+ CommandFunction command_function_in)
+ : name(name_in),
+ usage(usage_in),
+ num_args(num_args_in),
+ command_function(command_function_in) {}
+ Command(const Command&) = default;
+ ~Command() = default;
+
+ // Unique name of command. |-name| is used to select from command-line.
+ const char* const name;
+
+ // Usage help text of command.
+ const char* const usage;
+
+ // Number of arguments (assumed to be filenames) used by the command.
+ const int num_args;
+
+ // Main function to run for the command.
+ const CommandFunction command_function;
+};
+
+/******** List of Zucchini commands ********/
+
+constexpr Command kCommands[] = {
+ {"gen",
+ "-gen <old_file> <new_file> <patch_file> [-raw] [-keep]"
+ " [-impose=#+#=#+#,#+#=#+#,...]",
+ 3, &MainGen},
+ {"apply", "-apply <old_file> <patch_file> <new_file> [-keep]", 3,
+ &MainApply},
+ {"read", "-read <exe> [-dump]", 1, &MainRead},
+ {"detect", "-detect <archive_file>", 1, &MainDetect},
+ {"match", "-match <old_file> <new_file> [-impose=#+#=#+#,#+#=#+#,...]", 2,
+ &MainMatch},
+ {"crc32", "-crc32 <file>", 1, &MainCrc32},
+};
+
+/******** GetPeakMemoryMetrics ********/
+
+#if defined(OS_LINUX) || defined(OS_CHROMEOS)
+// Linux does not have an exact mapping to the values used on Windows so use a
+// close approximation:
+// peak_virtual_memory ~= peak_page_file_usage
+// resident_set_size_hwm (high water mark) ~= peak_working_set_size
+//
+// On failure the input values will be set to 0.
+void GetPeakMemoryMetrics(size_t* peak_virtual_memory,
+ size_t* resident_set_size_hwm) {
+ *peak_virtual_memory = 0;
+ *resident_set_size_hwm = 0;
+ auto status_path =
+ base::FilePath("/proc")
+ .Append(base::NumberToString(base::GetCurrentProcessHandle()))
+ .Append("status");
+ std::string contents_string;
+ base::ReadFileToString(status_path, &contents_string);
+ std::vector<base::StringPiece> lines = base::SplitStringPiece(
+ contents_string, "\n", base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY);
+
+ for (const auto& line : lines) {
+ // Tokens should generally be of the form "Metric: <val> kB"
+ std::vector<base::StringPiece> tokens = base::SplitStringPiece(
+ line, " ", base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY);
+ if (tokens.size() < 2)
+ continue;
+
+ if (tokens[0] == "VmPeak:") {
+ if (base::StringToSizeT(tokens[1], peak_virtual_memory)) {
+ *peak_virtual_memory *= 1024; // in kiB
+ if (*resident_set_size_hwm)
+ return;
+ }
+ } else if (tokens[0] == "VmHWM:") {
+ if (base::StringToSizeT(tokens[1], resident_set_size_hwm)) {
+ *resident_set_size_hwm *= 1024; // in kiB
+ if (*peak_virtual_memory)
+ return;
+ }
+ }
+ }
+}
+#endif // defined(OS_LINUX) || defined(OS_CHROMEOS)
+
+#if defined(OS_WIN)
+// On failure the input values will be set to 0.
+void GetPeakMemoryMetrics(size_t* peak_page_file_usage,
+ size_t* peak_working_set_size) {
+ *peak_page_file_usage = 0;
+ *peak_working_set_size = 0;
+ PROCESS_MEMORY_COUNTERS pmc;
+ if (::GetProcessMemoryInfo(::GetCurrentProcess(), &pmc, sizeof(pmc))) {
+ *peak_page_file_usage = pmc.PeakPagefileUsage;
+ *peak_working_set_size = pmc.PeakWorkingSetSize;
+ }
+}
+#endif // defined(OS_WIN)
+
+/******** ScopedResourceUsageTracker ********/
+
+// A class to track and log system resource usage.
+class ScopedResourceUsageTracker {
+ public:
+ // Initializes states for tracking.
+ ScopedResourceUsageTracker() {
+ start_time_ = base::TimeTicks::Now();
+
+#if defined(OS_LINUX) || defined(OS_CHROMEOS) || defined(OS_WIN)
+ GetPeakMemoryMetrics(&start_peak_page_file_usage_,
+ &start_peak_working_set_size_);
+#endif // defined(OS_LINUX) || defined(OS_CHROMEOS) || defined(OS_WIN)
+ }
+
+ // Computes and prints usage.
+ ~ScopedResourceUsageTracker() {
+ base::TimeTicks end_time = base::TimeTicks::Now();
+
+#if defined(OS_LINUX) || defined(OS_CHROMEOS) || defined(OS_WIN)
+ size_t cur_peak_page_file_usage = 0;
+ size_t cur_peak_working_set_size = 0;
+ GetPeakMemoryMetrics(&cur_peak_page_file_usage, &cur_peak_working_set_size);
+
+ LOG(INFO) << "Zucchini.PeakPagefileUsage "
+ << cur_peak_page_file_usage / 1024 << " KiB";
+ LOG(INFO) << "Zucchini.PeakPagefileUsageChange "
+ << (cur_peak_page_file_usage - start_peak_page_file_usage_) / 1024
+ << " KiB";
+ LOG(INFO) << "Zucchini.PeakWorkingSetSize "
+ << cur_peak_working_set_size / 1024 << " KiB";
+ LOG(INFO) << "Zucchini.PeakWorkingSetSizeChange "
+ << (cur_peak_working_set_size - start_peak_working_set_size_) /
+ 1024
+ << " KiB";
+#endif // defined(OS_LINUX) || defined(OS_CHROMEOS) || defined(OS_WIN)
+
+ LOG(INFO) << "Zucchini.TotalTime " << (end_time - start_time_).InSecondsF()
+ << " s";
+ }
+
+ private:
+ base::TimeTicks start_time_;
+#if defined(OS_LINUX) || defined(OS_CHROMEOS) || defined(OS_WIN)
+ size_t start_peak_page_file_usage_ = 0;
+ size_t start_peak_working_set_size_ = 0;
+#endif // defined(OS_LINUX) || defined(OS_CHROMEOS) || defined(OS_WIN)
+};
+
+/******** Helper functions ********/
+
+// Translates |command_line| arguments to a vector of base::FilePath (expecting
+// exactly |expected_count|). On success, writes the results to |paths| and
+// returns true. Otherwise returns false.
+bool CheckAndGetFilePathParams(const base::CommandLine& command_line,
+ size_t expected_count,
+ std::vector<base::FilePath>* paths) {
+ const base::CommandLine::StringVector& args = command_line.GetArgs();
+ if (args.size() != expected_count)
+ return false;
+
+ paths->clear();
+ paths->reserve(args.size());
+ for (const auto& arg : args)
+ paths->emplace_back(arg);
+ return true;
+}
+
+// Prints main Zucchini usage text.
+void PrintUsage(std::ostream& err) {
+ err << "Usage:" << std::endl;
+ for (const Command& command : kCommands)
+ err << " zucchini " << command.usage << std::endl;
+}
+
+} // namespace
+
+/******** Exported Functions ********/
+
+zucchini::status::Code RunZucchiniCommand(const base::CommandLine& command_line,
+ std::ostream& out,
+ std::ostream& err) {
+ // Look for a command with name that matches input.
+ const Command* command_use = nullptr;
+ for (const Command& command : kCommands) {
+ if (command_line.HasSwitch(command.name)) {
+ if (command_use) { // Too many commands found.
+ command_use = nullptr; // Set to null to flag error.
+ break;
+ }
+ command_use = &command;
+ }
+ }
+
+ // Expect exactly 1 matching command. If 0 or >= 2, print usage and quit.
+ if (!command_use) {
+ err << "Must have exactly one of:" << std::endl;
+ err << " [";
+ zucchini::PrefixSep sep(", ");
+ for (const Command& command : kCommands)
+ err << sep << "-" << command.name;
+ err << "]" << std::endl;
+ PrintUsage(err);
+ return zucchini::status::kStatusInvalidParam;
+ }
+
+ // Try to parse filename arguments. On failure, print usage and quit.
+ std::vector<base::FilePath> paths;
+ if (!CheckAndGetFilePathParams(command_line, command_use->num_args, &paths)) {
+ err << command_use->usage << std::endl;
+ PrintUsage(err);
+ return zucchini::status::kStatusInvalidParam;
+ }
+
+ ScopedResourceUsageTracker resource_usage_tracker;
+ return command_use->command_function({command_line, paths, out, err});
+}
diff --git a/main_utils.h b/main_utils.h
new file mode 100644
index 0000000..6c97aad
--- /dev/null
+++ b/main_utils.h
@@ -0,0 +1,34 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_MAIN_UTILS_H_
+#define COMPONENTS_ZUCCHINI_MAIN_UTILS_H_
+
+#include <iosfwd>
+
+#include "components/zucchini/zucchini.h"
+
+// Utilities to run Zucchini command based on command-line input, and to print
+// help messages.
+
+namespace base {
+
+class CommandLine;
+
+} // namespace base
+
+// To add a new Zucchini command:
+// 1. Declare the command's main function in zucchini_command.h. Its signature
+// must match CommandFunction.
+// 2. Define the command's main function in zucchini_command.cc.
+// 3. Add a new entry into |kCommands| in main_utils.cc.
+
+// Searches |command_line| for Zucchini commands. If a unique command is found,
+// runs it (passes |out| and |err|), and logs resource usage. Otherwise prints
+// help message to |err|. Returns Zucchini status code for error handling.
+zucchini::status::Code RunZucchiniCommand(const base::CommandLine& command_line,
+ std::ostream& out,
+ std::ostream& err);
+
+#endif // COMPONENTS_ZUCCHINI_MAIN_UTILS_H_
diff --git a/mapped_file.cc b/mapped_file.cc
new file mode 100644
index 0000000..a742414
--- /dev/null
+++ b/mapped_file.cc
@@ -0,0 +1,69 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/mapped_file.h"
+
+#include <utility>
+
+#include "base/files/file_util.h"
+#include "build/build_config.h"
+
+namespace zucchini {
+
+MappedFileReader::MappedFileReader(base::File file) {
+ if (!file.IsValid()) {
+ error_ = "Invalid file.";
+ return; // |buffer_| will be uninitialized, and therefore invalid.
+ }
+ if (!buffer_.Initialize(std::move(file))) {
+ error_ = "Can't map file to memory.";
+ }
+}
+
+MappedFileWriter::MappedFileWriter(const base::FilePath& file_path,
+ base::File file,
+ size_t length)
+ : file_path_(file_path), delete_behavior_(kManualDeleteOnClose) {
+ if (!file.IsValid()) {
+ error_ = "Invalid file.";
+ return; // |buffer_| will be uninitialized, and therefore invalid.
+ }
+
+#if defined(OS_WIN)
+ file_handle_ = file.Duplicate();
+ // Tell the OS to delete the file when all handles are closed.
+ if (file_handle_.DeleteOnClose(true)) {
+ delete_behavior_ = kAutoDeleteOnClose;
+ } else {
+ error_ = "Failed to mark file for delete-on-close.";
+ }
+#endif // defined(OS_WIN)
+
+ bool is_ok = buffer_.Initialize(std::move(file), {0, length},
+ base::MemoryMappedFile::READ_WRITE_EXTEND);
+ if (!is_ok) {
+ error_ = "Can't map file to memory.";
+ }
+}
+
+MappedFileWriter::~MappedFileWriter() {
+ if (!HasError() && delete_behavior_ == kManualDeleteOnClose &&
+ !file_path_.empty() && !base::DeleteFile(file_path_)) {
+ error_ = "Failed to delete file.";
+ }
+}
+
+bool MappedFileWriter::Keep() {
+#if defined(OS_WIN)
+ if (delete_behavior_ == kAutoDeleteOnClose &&
+ !file_handle_.DeleteOnClose(false)) {
+ error_ = "Failed to prevent deletion of file.";
+ return false;
+ }
+#endif // defined(OS_WIN)
+ delete_behavior_ = kKeep;
+ return true;
+}
+
+} // namespace zucchini
diff --git a/mapped_file.h b/mapped_file.h
new file mode 100644
index 0000000..f15e09a
--- /dev/null
+++ b/mapped_file.h
@@ -0,0 +1,82 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_MAPPED_FILE_H_
+#define COMPONENTS_ZUCCHINI_MAPPED_FILE_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <string>
+
+#include "base/files/file.h"
+#include "base/files/file_path.h"
+#include "base/files/memory_mapped_file.h"
+#include "components/zucchini/buffer_view.h"
+
+namespace zucchini {
+
+// A file reader wrapper.
+class MappedFileReader {
+ public:
+ // Maps |file| to memory for reading. Also validates |file|. Errors are
+ // available via HasError() and error().
+ explicit MappedFileReader(base::File file);
+ MappedFileReader(const MappedFileReader&) = delete;
+ const MappedFileReader& operator=(const MappedFileReader&) = delete;
+
+ const uint8_t* data() const { return buffer_.data(); }
+ size_t length() const { return buffer_.length(); }
+ zucchini::ConstBufferView region() const { return {data(), length()}; }
+
+ bool HasError() { return !error_.empty() || !buffer_.IsValid(); }
+ const std::string& error() { return error_; }
+
+ private:
+ std::string error_;
+ base::MemoryMappedFile buffer_;
+};
+
+// A file writer wrapper. The target file is deleted on destruction unless
+// Keep() is called.
+class MappedFileWriter {
+ public:
+ // Maps |file| to memory for writing. |file_path| is needed for auto delete on
+ // UNIX systems, but can be empty if auto delete is not needed. Errors are
+ // available via HasError() and error().
+ MappedFileWriter(const base::FilePath& file_path,
+ base::File file,
+ size_t length);
+ MappedFileWriter(const MappedFileWriter&) = delete;
+ const MappedFileWriter& operator=(const MappedFileWriter&) = delete;
+ ~MappedFileWriter();
+
+ uint8_t* data() { return buffer_.data(); }
+ size_t length() const { return buffer_.length(); }
+ zucchini::MutableBufferView region() { return {data(), length()}; }
+
+ bool HasError() { return !error_.empty() || !buffer_.IsValid(); }
+ const std::string& error() { return error_; }
+
+ // Indicates that the file should not be deleted on destruction. Returns true
+ // iff the operation succeeds.
+ bool Keep();
+
+ private:
+ enum OnCloseDeleteBehavior {
+ kKeep,
+ kAutoDeleteOnClose,
+ kManualDeleteOnClose
+ };
+
+ std::string error_;
+ base::FilePath file_path_;
+ base::File file_handle_;
+ base::MemoryMappedFile buffer_;
+ OnCloseDeleteBehavior delete_behavior_;
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_MAPPED_FILE_H_
diff --git a/mapped_file_unittest.cc b/mapped_file_unittest.cc
new file mode 100644
index 0000000..e3ee6dc
--- /dev/null
+++ b/mapped_file_unittest.cc
@@ -0,0 +1,61 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/mapped_file.h"
+
+#include <utility>
+
+#include "base/files/file.h"
+#include "base/files/file_path.h"
+#include "base/files/file_util.h"
+#include "base/files/scoped_temp_dir.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+class MappedFileWriterTest : public testing::Test {
+ protected:
+ MappedFileWriterTest() = default;
+ void SetUp() override {
+ ASSERT_TRUE(temp_dir_.CreateUniqueTempDir());
+ file_path_ = temp_dir_.GetPath().AppendASCII("test-file");
+ }
+
+ base::FilePath file_path_;
+
+ private:
+ base::ScopedTempDir temp_dir_;
+};
+
+TEST_F(MappedFileWriterTest, Keep) {
+ EXPECT_FALSE(base::PathExists(file_path_));
+ {
+ using base::File;
+ File file(file_path_, File::FLAG_CREATE_ALWAYS | File::FLAG_READ |
+ File::FLAG_WRITE | File::FLAG_SHARE_DELETE |
+ File::FLAG_CAN_DELETE_ON_CLOSE);
+ MappedFileWriter file_writer(file_path_, std::move(file), 10);
+ EXPECT_FALSE(file_writer.HasError());
+ EXPECT_TRUE(file_writer.Keep());
+ EXPECT_FALSE(file_writer.HasError());
+ EXPECT_TRUE(file_writer.error().empty());
+ }
+ EXPECT_TRUE(base::PathExists(file_path_));
+}
+
+TEST_F(MappedFileWriterTest, DeleteOnClose) {
+ EXPECT_FALSE(base::PathExists(file_path_));
+ {
+ using base::File;
+ File file(file_path_, File::FLAG_CREATE_ALWAYS | File::FLAG_READ |
+ File::FLAG_WRITE | File::FLAG_SHARE_DELETE |
+ File::FLAG_CAN_DELETE_ON_CLOSE);
+ MappedFileWriter file_writer(file_path_, std::move(file), 10);
+ EXPECT_FALSE(file_writer.HasError());
+ EXPECT_TRUE(file_writer.error().empty());
+ }
+ EXPECT_FALSE(base::PathExists(file_path_));
+}
+
+} // namespace zucchini
diff --git a/patch_read_write_unittest.cc b/patch_read_write_unittest.cc
new file mode 100644
index 0000000..627513c
--- /dev/null
+++ b/patch_read_write_unittest.cc
@@ -0,0 +1,730 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/patch_reader.h"
+#include "components/zucchini/patch_writer.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <utility>
+#include <vector>
+
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+namespace {
+
+// Used for initialization of raw test data.
+using ByteVector = std::vector<uint8_t>;
+
+// Helper function that creates an object of type |T| and intializes it from
+// data in |buffer|. Ensures initialization is successful. |buffer| is passed as
+// pointer to avoid passing a temporay, which can causes dangling references.
+template <class T>
+T TestInitialize(const ByteVector* buffer) {
+ T value;
+ BufferSource buffer_source(buffer->data(), buffer->size());
+ EXPECT_TRUE(value.Initialize(&buffer_source));
+ EXPECT_TRUE(buffer_source.empty()); // Make sure all data has been consumed
+ return value;
+}
+
+// Helper function that creates an object of type |T| and tries to intialize it
+// from invalid data in |buffer|, expecting the operation to fail. |buffer| is
+// passed as pointer to avoid passing a temporary, which can causes dangling
+// references.
+template <class T>
+void TestInvalidInitialize(const ByteVector* buffer) {
+ T value;
+ BufferSource buffer_source(buffer->data(), buffer->size());
+ EXPECT_FALSE(value.Initialize(&buffer_source));
+}
+
+// Helper function that serializes |value| into a buffer. Ensures that
+// serialization is successful and that the result matches |expected|.
+template <class T>
+void TestSerialize(const ByteVector& expected, const T& value) {
+ size_t size = value.SerializedSize();
+ EXPECT_EQ(expected.size(), size);
+ ByteVector buffer(size);
+ BufferSink buffer_sink(buffer.data(), buffer.size());
+ EXPECT_TRUE(value.SerializeInto(&buffer_sink));
+ EXPECT_EQ(expected, buffer);
+}
+
+ByteVector CreatePatchElement() {
+ return {
+ // PatchElementHeader
+ 0x01, 0, 0, 0, // old_offset
+ 0x51, 0, 0, 0, // old_length
+ 0x03, 0, 0, 0, // new_offset
+ 0x13, 0, 0, 0, // new_length
+ 'P', 'x', '8', '6', // exe_type = EXE_TYPE_WIN32_X86
+ // EquivalenceSource
+ 1, 0, 0, 0, // src_skip size
+ 0x10, // src_skip content
+ 1, 0, 0, 0, // dst_skip size
+ 0x00, // dst_skip content
+ 1, 0, 0, 0, // copy_count size
+ 0x12, // copy_count content
+ // ExtraDataSource
+ 1, 0, 0, 0, // extra_data size
+ 0x13, // extra_data content
+ // RawDeltaSource
+ 1, 0, 0, 0, // raw_delta_skip size
+ 0x14, // raw_delta_skip content
+ 1, 0, 0, 0, // raw_delta_diff size
+ 0x15, // raw_delta_diff content
+ // ReferenceDeltaSource
+ 1, 0, 0, 0, // reference_delta size
+ 0x16, // reference_delta content
+ // PatchElementReader
+ 2, 0, 0, 0, // pool count
+ 0, // pool_tag
+ 1, 0, 0, 0, // extra_targets size
+ 0x17, // extra_targets content
+ 2, // pool_tag
+ 1, 0, 0, 0, // extra_targets size
+ 0x18, // extra_targets content
+ };
+}
+
+ByteVector CreateElementMatch() {
+ return {
+ // PatchElementHeader
+ 0x01, 0, 0, 0, // old_offset
+ 0x02, 0, 0, 0, // old_length
+ 0x03, 0, 0, 0, // new_offset
+ 0x04, 0, 0, 0, // new_length
+ 'D', 'E', 'X', ' ', // exe_type = kExeTypeDex
+ };
+}
+
+// Helper to mutate test |data| (e.g., from CreatePatchElement()) at |idx| from
+// |from_val| (as sanity check) to |to_val|.
+void ModifyByte(size_t idx,
+ uint8_t from_val,
+ uint8_t to_val,
+ std::vector<uint8_t>* data) {
+ ASSERT_EQ(from_val, (*data)[idx]);
+ (*data)[idx] = to_val;
+}
+
+} // namespace
+
+bool operator==(const ByteVector& a, ConstBufferView b) {
+ return a == ByteVector(b.begin(), b.end());
+}
+
+TEST(PatchTest, ParseSerializeElementMatch) {
+ ByteVector data = CreateElementMatch();
+ BufferSource buffer_source(data.data(), data.size());
+ ElementMatch element_match = {};
+ EXPECT_TRUE(patch::ParseElementMatch(&buffer_source, &element_match));
+ EXPECT_EQ(kExeTypeDex, element_match.exe_type());
+ EXPECT_EQ(kExeTypeDex, element_match.old_element.exe_type);
+ EXPECT_EQ(kExeTypeDex, element_match.new_element.exe_type);
+ EXPECT_EQ(0x1U, element_match.old_element.offset);
+ EXPECT_EQ(0x2U, element_match.old_element.size);
+ EXPECT_EQ(0x3U, element_match.new_element.offset);
+ EXPECT_EQ(0x4U, element_match.new_element.size);
+
+ size_t size = patch::SerializedElementMatchSize(element_match);
+ EXPECT_EQ(data.size(), size);
+ ByteVector buffer(size);
+ BufferSink buffer_sink(buffer.data(), buffer.size());
+ EXPECT_TRUE(patch::SerializeElementMatch(element_match, &buffer_sink));
+ EXPECT_EQ(data, buffer);
+}
+
+TEST(PatchTest, ParseElementMatchTooSmall) {
+ ByteVector data = {4};
+ BufferSource buffer_source(data.data(), data.size());
+ ElementMatch element_match = {};
+ EXPECT_FALSE(patch::ParseElementMatch(&buffer_source, &element_match));
+}
+
+TEST(PatchTest, ParseElementMatchNoLength) {
+ // Set old_length to 0 to trigger an error.
+ {
+ ByteVector data = CreateElementMatch();
+ // old_length := 0.
+ ModifyByte(offsetof(PatchElementHeader, old_length), 0x02, 0x00, &data);
+ BufferSource buffer_source(data.data(), data.size());
+ ElementMatch element_match = {};
+ EXPECT_FALSE(patch::ParseElementMatch(&buffer_source, &element_match));
+ }
+ // Set new_length to 0 to trigger an error.
+ {
+ ByteVector data = CreateElementMatch();
+ // new_length := 0.
+ ModifyByte(offsetof(PatchElementHeader, new_length), 0x04, 0x00, &data);
+ BufferSource buffer_source(data.data(), data.size());
+ ElementMatch element_match = {};
+ EXPECT_FALSE(patch::ParseElementMatch(&buffer_source, &element_match));
+ }
+ // Set both new_length and old_length to 0 to trigger an error.
+ {
+ ByteVector data = CreateElementMatch();
+ // old_length := 0.
+ ModifyByte(offsetof(PatchElementHeader, old_length), 0x02, 0x00, &data);
+ // new_length := 0.
+ ModifyByte(offsetof(PatchElementHeader, new_length), 0x04, 0x00, &data);
+ BufferSource buffer_source(data.data(), data.size());
+ ElementMatch element_match = {};
+ EXPECT_FALSE(patch::ParseElementMatch(&buffer_source, &element_match));
+ }
+}
+
+TEST(PatchTest, ParseSerializeElementMatchExeMismatch) {
+ ByteVector buffer(28);
+ BufferSink buffer_sink(buffer.data(), buffer.size());
+ EXPECT_FALSE(patch::SerializeElementMatch(
+ ElementMatch{{{1, 2}, kExeTypeNoOp}, {{3, 4}, kExeTypeWin32X86}},
+ &buffer_sink));
+}
+
+TEST(PatchTest, SerializeElementMatchTooSmall) {
+ ByteVector buffer(4);
+ BufferSink buffer_sink(buffer.data(), buffer.size());
+ EXPECT_FALSE(patch::SerializeElementMatch(
+ ElementMatch{{{1, 2}, kExeTypeDex}, {{3, 4}, kExeTypeDex}},
+ &buffer_sink));
+}
+
+TEST(PatchTest, ParseSerializeBuffer) {
+ auto TestSerialize = [](const ByteVector& expected, const ByteVector& value) {
+ size_t size = patch::SerializedBufferSize(value);
+ EXPECT_EQ(expected.size(), size);
+ ByteVector buffer(size);
+ BufferSink buffer_sink(buffer.data(), buffer.size());
+ EXPECT_TRUE(patch::SerializeBuffer(value, &buffer_sink));
+ EXPECT_EQ(expected, buffer);
+ };
+
+ // |data| is passed as pointer to avoid passing a temporay, which can causes
+ // dangling references.
+ auto TestParse = [](const ByteVector* data) {
+ BufferSource value;
+ BufferSource buffer_source(data->data(), data->size());
+ EXPECT_TRUE(patch::ParseBuffer(&buffer_source, &value));
+ // Make sure all data has been consumed.
+ EXPECT_TRUE(buffer_source.empty());
+ return value;
+ };
+
+ ByteVector data = {
+ 0, 0, 0, 0, // size
+ };
+ BufferSource buffer = TestParse(&data);
+ EXPECT_TRUE(buffer.empty());
+ TestSerialize(data, ByteVector({}));
+
+ data = {
+ 3, 0, 0, 0, // size
+ 1, 2, 3 // content
+ };
+ buffer = TestParse(&data);
+ EXPECT_EQ(3U, buffer.size());
+ EXPECT_EQ(ByteVector({1, 2, 3}), ByteVector(buffer.begin(), buffer.end()));
+ TestSerialize(data, ByteVector({1, 2, 3}));
+
+ // Ill-formed input.
+ data = {
+ 3, 0, 0, 0, // size
+ 1, 2 // insufficient content
+ };
+ BufferSource value;
+ BufferSource buffer_source(data.data(), data.size());
+ EXPECT_FALSE(patch::ParseBuffer(&buffer_source, &value));
+ EXPECT_TRUE(value.empty());
+}
+
+TEST(PatchTest, SerializeBufferTooSmall) {
+ ByteVector buffer(3);
+ BufferSink buffer_sink(buffer.data(), buffer.size());
+ EXPECT_FALSE(patch::SerializeBuffer(ByteVector(), &buffer_sink));
+}
+
+TEST(EquivalenceSinkSourceTest, Empty) {
+ ByteVector data = {
+ // EquivalenceSource
+ 0, 0, 0, 0, // src_skip size
+ 0, 0, 0, 0, // dst_skip size
+ 0, 0, 0, 0, // copy_count size
+ };
+ EquivalenceSource equivalence_source =
+ TestInitialize<EquivalenceSource>(&data);
+
+ EXPECT_FALSE(equivalence_source.GetNext());
+ EXPECT_TRUE(equivalence_source.Done());
+
+ TestSerialize(data, EquivalenceSink());
+}
+
+TEST(EquivalenceSourceSinkTest, Normal) {
+ ByteVector data = {
+ // EquivalenceSource
+ 2, 0, 0, 0, // src_skip size
+ 6, 7, // src_skip content
+ 2, 0, 0, 0, // dst_skip size
+ 7, 1, // dst_skip content
+ 2, 0, 0, 0, // copy_count size
+ 2, 1 // copy_count content
+ };
+ EquivalenceSource equivalence_source =
+ TestInitialize<EquivalenceSource>(&data);
+ auto equivalence = equivalence_source.GetNext();
+ EXPECT_FALSE(equivalence_source.Done());
+ EXPECT_TRUE(equivalence.has_value());
+ EXPECT_EQ(offset_t(3), equivalence->src_offset);
+ EXPECT_EQ(offset_t(7), equivalence->dst_offset);
+ EXPECT_EQ(offset_t(2), equivalence->length);
+
+ equivalence = equivalence_source.GetNext();
+ EXPECT_TRUE(equivalence_source.Done());
+ EXPECT_TRUE(equivalence.has_value());
+ EXPECT_EQ(offset_t(1), equivalence->src_offset);
+ EXPECT_EQ(offset_t(10), equivalence->dst_offset);
+ EXPECT_EQ(offset_t(1), equivalence->length);
+
+ equivalence = equivalence_source.GetNext();
+ EXPECT_FALSE(equivalence.has_value());
+
+ EquivalenceSink equivalence_sink;
+ equivalence_sink.PutNext(Equivalence{3, 7, 2});
+ equivalence_sink.PutNext(Equivalence{1, 10, 1});
+ TestSerialize(data, equivalence_sink);
+}
+
+TEST(ExtraDataSourceSinkTest, Empty) {
+ ByteVector data = {
+ // ExtraDataSource
+ 0, 0, 0, 0, // extra_data size
+ };
+ ExtraDataSource extra_data_source = TestInitialize<ExtraDataSource>(&data);
+
+ EXPECT_FALSE(extra_data_source.GetNext(2));
+ EXPECT_TRUE(extra_data_source.Done());
+
+ TestSerialize(data, ExtraDataSink());
+}
+
+TEST(ExtraDataSourceSinkTest, Normal) {
+ ByteVector data = {
+ // ExtraDataSource
+ 5, 0, 0, 0, // extra_data size
+ 1, 2, 3, 4, 5, // extra_data content
+ };
+ ExtraDataSource extra_data_source = TestInitialize<ExtraDataSource>(&data);
+ EXPECT_FALSE(extra_data_source.Done());
+
+ auto extra_data = extra_data_source.GetNext(3);
+ EXPECT_FALSE(extra_data_source.Done());
+ EXPECT_TRUE(extra_data.has_value());
+ EXPECT_EQ(size_t(3), extra_data->size());
+ EXPECT_EQ(ByteVector({1, 2, 3}),
+ ByteVector(extra_data->begin(), extra_data->end()));
+
+ extra_data = extra_data_source.GetNext(2);
+ EXPECT_TRUE(extra_data_source.Done());
+ EXPECT_TRUE(extra_data.has_value());
+ EXPECT_EQ(ByteVector({4, 5}),
+ ByteVector(extra_data->begin(), extra_data->end()));
+
+ extra_data = extra_data_source.GetNext(2);
+ EXPECT_FALSE(extra_data.has_value());
+
+ ExtraDataSink extra_data_sink;
+
+ ByteVector content = {1, 2, 3};
+ extra_data_sink.PutNext({content.data(), content.size()});
+ content = {4, 5};
+ extra_data_sink.PutNext({content.data(), content.size()});
+ TestSerialize(data, extra_data_sink);
+}
+
+TEST(RawDeltaSourceSinkTest, Empty) {
+ ByteVector data = {
+ // RawDeltaSource
+ 0, 0, 0, 0, // raw_delta_skip size
+ 0, 0, 0, 0, // raw_delta_diff size
+ };
+ RawDeltaSource raw_delta_source = TestInitialize<RawDeltaSource>(&data);
+
+ EXPECT_FALSE(raw_delta_source.GetNext());
+ EXPECT_TRUE(raw_delta_source.Done());
+
+ TestSerialize(data, RawDeltaSink());
+}
+
+TEST(RawDeltaSinkSourceSinkTest, Normal) {
+ ByteVector data = {
+ // RawDeltaSource
+ 3, 0, 0, 0, // raw_delta_skip size
+ 1, 3, 0, // raw_delta_skip content
+ 3, 0, 0, 0, // raw_delta_diff size
+ 42, 24, 235, // raw_delta_diff content
+ };
+ RawDeltaSource raw_delta_source = TestInitialize<RawDeltaSource>(&data);
+ EXPECT_FALSE(raw_delta_source.Done());
+
+ auto raw_delta = raw_delta_source.GetNext();
+ EXPECT_FALSE(raw_delta_source.Done());
+ EXPECT_TRUE(raw_delta.has_value());
+ EXPECT_EQ(1U, raw_delta->copy_offset);
+ EXPECT_EQ(42, raw_delta->diff);
+
+ raw_delta = raw_delta_source.GetNext();
+ EXPECT_FALSE(raw_delta_source.Done());
+ EXPECT_TRUE(raw_delta.has_value());
+ EXPECT_EQ(5U, raw_delta->copy_offset);
+ EXPECT_EQ(24, raw_delta->diff);
+
+ raw_delta = raw_delta_source.GetNext();
+ EXPECT_TRUE(raw_delta_source.Done());
+ EXPECT_TRUE(raw_delta.has_value());
+ EXPECT_EQ(6U, raw_delta->copy_offset);
+ EXPECT_EQ(-21, raw_delta->diff);
+
+ EXPECT_FALSE(raw_delta_source.GetNext());
+ EXPECT_TRUE(raw_delta_source.Done());
+
+ RawDeltaSink raw_delta_sink;
+ raw_delta_sink.PutNext({1, 42});
+ raw_delta_sink.PutNext({5, 24});
+ raw_delta_sink.PutNext({6, -21});
+ TestSerialize(data, raw_delta_sink);
+}
+
+TEST(RawDeltaSourceSinkTest, InvalidContent) {
+ ByteVector data = {
+ // RawDeltaSource
+ 2, 0, 0, 0, // raw_delta_skip size
+ 1, 3, // raw_delta_skip content
+ 2, 0, 0, 0, // raw_delta_diff size
+ 0, 4, // raw_delta_diff content
+ };
+ RawDeltaSource raw_delta_source = TestInitialize<RawDeltaSource>(&data);
+ EXPECT_FALSE(raw_delta_source.GetNext());
+ EXPECT_FALSE(raw_delta_source.Done());
+}
+
+TEST(ReferenceDeltaSourceSinkTest, Empty) {
+ ByteVector data = {
+ // ReferenceDeltaSource
+ 0, 0, 0, 0, // reference_delta size
+ };
+ ReferenceDeltaSource reference_delta_source =
+ TestInitialize<ReferenceDeltaSource>(&data);
+
+ EXPECT_FALSE(reference_delta_source.GetNext());
+ EXPECT_TRUE(reference_delta_source.Done());
+
+ TestSerialize(data, ReferenceDeltaSink());
+}
+
+TEST(ReferenceDeltaSourceSinkTest, Normal) {
+ ByteVector data = {
+ // ReferenceDeltaSource
+ 2, 0, 0, 0, // reference_delta size
+ 84, 47, // reference_delta content
+ };
+ ReferenceDeltaSource reference_delta_source =
+ TestInitialize<ReferenceDeltaSource>(&data);
+ EXPECT_FALSE(reference_delta_source.Done());
+
+ auto delta = reference_delta_source.GetNext();
+ EXPECT_FALSE(reference_delta_source.Done());
+ EXPECT_TRUE(delta.has_value());
+ EXPECT_EQ(42, *delta);
+
+ delta = reference_delta_source.GetNext();
+ EXPECT_TRUE(reference_delta_source.Done());
+ EXPECT_TRUE(delta.has_value());
+ EXPECT_EQ(-24, *delta);
+
+ EXPECT_FALSE(reference_delta_source.GetNext());
+ EXPECT_TRUE(reference_delta_source.Done());
+
+ ReferenceDeltaSink reference_delta;
+ reference_delta.PutNext(42);
+ reference_delta.PutNext(-24);
+ TestSerialize(data, reference_delta);
+}
+
+TEST(TargetSourceSinkTest, Empty) {
+ ByteVector data = {
+ // TargetSource
+ 0, 0, 0, 0, // extra_targets size
+ };
+ TargetSource target_source = TestInitialize<TargetSource>(&data);
+
+ EXPECT_FALSE(target_source.GetNext());
+ EXPECT_TRUE(target_source.Done());
+
+ TestSerialize(data, TargetSink());
+}
+
+TEST(TargetSourceSinkTest, Normal) {
+ ByteVector data = {
+ // TargetSource
+ 2, 0, 0, 0, // extra_targets size
+ 3, 1, // extra_targets content
+ };
+ TargetSource target_source = TestInitialize<TargetSource>(&data);
+ EXPECT_FALSE(target_source.Done());
+
+ auto target = target_source.GetNext();
+ EXPECT_FALSE(target_source.Done());
+ EXPECT_TRUE(target.has_value());
+ EXPECT_EQ(3U, *target);
+
+ target = target_source.GetNext();
+ EXPECT_TRUE(target_source.Done());
+ EXPECT_TRUE(target.has_value());
+ EXPECT_EQ(5U, *target);
+
+ EXPECT_FALSE(target_source.GetNext());
+ EXPECT_TRUE(target_source.Done());
+
+ TargetSink target_sink;
+ target_sink.PutNext(3);
+ target_sink.PutNext(5);
+ TestSerialize(data, target_sink);
+}
+
+TEST(PatchElementTest, Normal) {
+ ByteVector data = CreatePatchElement();
+
+ PatchElementReader patch_element_reader =
+ TestInitialize<PatchElementReader>(&data);
+
+ ElementMatch element_match = patch_element_reader.element_match();
+ EXPECT_EQ(kExeTypeWin32X86, element_match.exe_type());
+ EXPECT_EQ(kExeTypeWin32X86, element_match.old_element.exe_type);
+ EXPECT_EQ(kExeTypeWin32X86, element_match.new_element.exe_type);
+ EXPECT_EQ(0x1U, element_match.old_element.offset);
+ EXPECT_EQ(0x51U, element_match.old_element.size);
+ EXPECT_EQ(0x3U, element_match.new_element.offset);
+ EXPECT_EQ(0x13U, element_match.new_element.size);
+
+ EquivalenceSource equivalence_source =
+ patch_element_reader.GetEquivalenceSource();
+ EXPECT_EQ(ByteVector({0x10}), equivalence_source.src_skip());
+ EXPECT_EQ(ByteVector({0x00}), equivalence_source.dst_skip());
+ EXPECT_EQ(ByteVector({0x12}), equivalence_source.copy_count());
+
+ ExtraDataSource extra_data_source = patch_element_reader.GetExtraDataSource();
+ EXPECT_EQ(ByteVector({0x13}), extra_data_source.extra_data());
+
+ RawDeltaSource raw_delta_source = patch_element_reader.GetRawDeltaSource();
+ EXPECT_EQ(ByteVector({0x14}), raw_delta_source.raw_delta_skip());
+ EXPECT_EQ(ByteVector({0x15}), raw_delta_source.raw_delta_diff());
+
+ ReferenceDeltaSource reference_delta_source =
+ patch_element_reader.GetReferenceDeltaSource();
+ EXPECT_EQ(ByteVector({0x16}), reference_delta_source.reference_delta());
+
+ TargetSource target_source1 =
+ patch_element_reader.GetExtraTargetSource(PoolTag(0));
+ EXPECT_EQ(ByteVector({0x17}), target_source1.extra_targets());
+ TargetSource target_source2 =
+ patch_element_reader.GetExtraTargetSource(PoolTag(1));
+ EXPECT_EQ(ByteVector({}), target_source2.extra_targets());
+ TargetSource target_source3 =
+ patch_element_reader.GetExtraTargetSource(PoolTag(2));
+ EXPECT_EQ(ByteVector({0x18}), target_source3.extra_targets());
+
+ PatchElementWriter patch_element_writer(element_match);
+
+ patch_element_writer.SetEquivalenceSink(
+ EquivalenceSink({0x10}, {0x00}, {0x12}));
+ patch_element_writer.SetExtraDataSink(ExtraDataSink({0x13}));
+ patch_element_writer.SetRawDeltaSink(RawDeltaSink({0x14}, {0x15}));
+ patch_element_writer.SetReferenceDeltaSink(ReferenceDeltaSink({0x16}));
+ patch_element_writer.SetTargetSink(PoolTag(0), TargetSink({0x17}));
+ patch_element_writer.SetTargetSink(PoolTag(2), TargetSink({0x18}));
+ TestSerialize(data, patch_element_writer);
+}
+
+TEST(PatchElementTest, BadEquivalence) {
+ // If the "old" element is too small then the test should fail.
+ {
+ ByteVector data = CreatePatchElement();
+ // old_length := 0x4 (too small).
+ ModifyByte(offsetof(PatchElementHeader, old_length), 0x51, 0x04, &data);
+ TestInvalidInitialize<PatchElementReader>(&data);
+ }
+
+ // If the "new" element is too small then the test should fail.
+ {
+ ByteVector data = CreatePatchElement();
+ // new_length := 0x5 (too small).
+ ModifyByte(offsetof(PatchElementHeader, new_length), 0x13, 0x05, &data);
+ TestInvalidInitialize<PatchElementReader>(&data);
+ }
+}
+
+TEST(PatchElementTest, WrongExtraData) {
+ // Make "new" too large so insufficient extra data exists to cover the image.
+ {
+ ByteVector data = CreatePatchElement();
+ // new_length := 0x14 (too large).
+ ModifyByte(offsetof(PatchElementHeader, new_length), 0x13, 0x14, &data);
+ TestInvalidInitialize<PatchElementReader>(&data);
+ }
+ // Make "new" too small so there is too much extra data.
+ {
+ ByteVector data = CreatePatchElement();
+ // new_length := 0x12 (too small).
+ ModifyByte(offsetof(PatchElementHeader, new_length), 0x13, 0x12, &data);
+ TestInvalidInitialize<PatchElementReader>(&data);
+ }
+}
+
+TEST(EnsemblePatchTest, RawPatch) {
+ ByteVector data = {
+ // PatchHeader
+ 0x5A, 0x75, 0x63, 0x00, // magic
+ 0x10, 0x32, 0x54, 0x76, // old_size
+ 0x00, 0x11, 0x22, 0x33, // old_crc
+ 0x01, 0, 0, 0, // new_size
+ 0x44, 0x55, 0x66, 0x77, // new_crc
+
+ 1, 0, 0, 0, // number of element
+
+ // PatchElementHeader
+ 0x01, 0, 0, 0, // old_offset
+ 0x02, 0, 0, 0, // old_length
+ 0x00, 0, 0, 0, // new_offset
+ 0x01, 0, 0, 0, // new_length
+ 'P', 'x', '8', '6', // exe_type = EXE_TYPE_WIN32_X86
+ // EquivalenceSource
+ 0, 0, 0, 0, // src_skip size
+ 0, 0, 0, 0, // dst_skip size
+ 0, 0, 0, 0, // copy_count size
+ // ExtraDataSource
+ 0x01, 0, 0, 0, // extra_data size
+ 0x04, // extra_data content
+ // RawDeltaSource
+ 0, 0, 0, 0, // raw_delta_skip size
+ 0, 0, 0, 0, // raw_delta_diff size
+ // ReferenceDeltaSource
+ 0, 0, 0, 0, // reference_delta size
+ // PatchElementReader
+ 0, 0, 0, 0, // pool count
+ };
+
+ EnsemblePatchReader ensemble_patch_reader =
+ TestInitialize<EnsemblePatchReader>(&data);
+
+ PatchHeader header = ensemble_patch_reader.header();
+ EXPECT_EQ(PatchHeader::kMagic, header.magic);
+ EXPECT_EQ(0x76543210U, header.old_size);
+ EXPECT_EQ(0x33221100U, header.old_crc);
+ EXPECT_EQ(0x01U, header.new_size);
+ EXPECT_EQ(0x77665544U, header.new_crc);
+
+ const std::vector<PatchElementReader>& elements =
+ ensemble_patch_reader.elements();
+ EXPECT_EQ(size_t(1), elements.size());
+
+ EnsemblePatchWriter ensemble_patch_writer(header);
+ PatchElementWriter patch_element_writer(elements[0].element_match());
+ patch_element_writer.SetEquivalenceSink({});
+ patch_element_writer.SetExtraDataSink(ExtraDataSink({0x04}));
+ patch_element_writer.SetRawDeltaSink({});
+ patch_element_writer.SetReferenceDeltaSink({});
+ ensemble_patch_writer.AddElement(std::move(patch_element_writer));
+
+ TestSerialize(data, ensemble_patch_writer);
+}
+
+TEST(EnsemblePatchTest, CheckFile) {
+ ByteVector data = {
+ // PatchHeader
+ 0x5A, 0x75, 0x63, 0x00, // magic
+ 0x05, 0x00, 0x00, 0x00, // old_size
+ 0xDF, 0x13, 0xE4, 0x10, // old_crc
+ 0x03, 0x00, 0x00, 0x00, // new_size
+ 0xDC, 0xF7, 0x00, 0x40, // new_crc
+
+ 1, 0, 0, 0, // number of element
+
+ // PatchElementHeader
+ 0x01, 0, 0, 0, // old_offset
+ 0x02, 0, 0, 0, // old_length
+ 0x00, 0, 0, 0, // new_offset
+ 0x03, 0, 0, 0, // new_length
+ 'P', 'x', '8', '6', // exe_type = EXE_TYPE_WIN32_X86
+ // EquivalenceSource
+ 0, 0, 0, 0, // src_skip size
+ 0, 0, 0, 0, // dst_skip size
+ 0, 0, 0, 0, // copy_count size
+ // ExtraDataSource
+ 0x03, 0, 0, 0, // extra_data size
+ 'A', 'B', 'C', // extra_data content
+ // RawDeltaSource
+ 0, 0, 0, 0, // raw_delta_skip size
+ 0, 0, 0, 0, // raw_delta_diff size
+ // ReferenceDeltaSource
+ 0, 0, 0, 0, // reference_delta size
+ // PatchElementReader
+ 0, 0, 0, 0, // pool count
+ };
+
+ EnsemblePatchReader ensemble_patch_reader =
+ TestInitialize<EnsemblePatchReader>(&data);
+
+ ByteVector old_file = {0x10, 0x32, 0x54, 0x76, 0x98};
+ ByteVector new_file = {0xBA, 0xDC, 0xFE};
+
+ ConstBufferView old_image(old_file.data(), old_file.size());
+ ConstBufferView new_image(new_file.data(), new_file.size());
+
+ EXPECT_TRUE(ensemble_patch_reader.CheckOldFile(old_image));
+ EXPECT_TRUE(ensemble_patch_reader.CheckNewFile(new_image));
+ EXPECT_FALSE(ensemble_patch_reader.CheckOldFile(new_image));
+ EXPECT_FALSE(ensemble_patch_reader.CheckNewFile(old_image));
+}
+
+TEST(EnsemblePatchTest, InvalidMagic) {
+ ByteVector data = {
+ // PatchHeader
+ 0x42, 0x42, 0x42, 0x00, // magic
+ 0x10, 0x32, 0x54, 0x76, // old_size
+ 0x00, 0x11, 0x22, 0x33, // old_crc
+ 0x03, 0x00, 0x00, 0x00, // new_size
+ 0x44, 0x55, 0x66, 0x77, // new_crc
+
+ 1, 0, 0, 0, // number of element
+
+ // PatchElementHeader
+ 0x01, 0, 0, 0, // old_offset
+ 0x02, 0, 0, 0, // old_length
+ 0x00, 0, 0, 0, // new_offset
+ 0x03, 0, 0, 0, // new_length
+ 'P', 'x', '8', '6', // exe_type = EXE_TYPE_WIN32_X86
+ // EquivalenceSource
+ 0, 0, 0, 0, // src_skip size
+ 0, 0, 0, 0, // dst_skip size
+ 0, 0, 0, 0, // copy_count size
+ // ExtraDataSource
+ 0, 0, 0, 0, // extra_data size
+ // RawDeltaSource
+ 0, 0, 0, 0, // raw_delta_skip size
+ 0, 0, 0, 0, // raw_delta_diff size
+ // ReferenceDeltaSource
+ 0, 0, 0, 0, // reference_delta size
+ // PatchElementReader
+ 0, 0, 0, 0, // pool count
+ };
+
+ TestInvalidInitialize<EnsemblePatchReader>(&data);
+}
+
+} // namespace zucchini
diff --git a/patch_reader.cc b/patch_reader.cc
new file mode 100644
index 0000000..99951da
--- /dev/null
+++ b/patch_reader.cc
@@ -0,0 +1,388 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/patch_reader.h"
+
+#include <type_traits>
+#include <utility>
+
+#include "base/numerics/safe_conversions.h"
+#include "components/zucchini/algorithm.h"
+#include "components/zucchini/crc32.h"
+
+namespace zucchini {
+
+namespace patch {
+
+bool ParseElementMatch(BufferSource* source, ElementMatch* element_match) {
+ PatchElementHeader unsafe_element_header;
+ if (!source->GetValue(&unsafe_element_header)) {
+ LOG(ERROR) << "Impossible to read ElementMatch from source.";
+ return false;
+ }
+ ExecutableType exe_type =
+ CastToExecutableType(unsafe_element_header.exe_type);
+ if (exe_type == kExeTypeUnknown) {
+ LOG(ERROR) << "Invalid ExecutableType found.";
+ return false;
+ }
+ if (!unsafe_element_header.old_length || !unsafe_element_header.new_length) {
+ LOG(ERROR) << "Empty patch element found.";
+ return false;
+ }
+ // |unsafe_element_header| is now considered to be safe as it has a valid
+ // |exe_type| and the length fields are of sufficient size.
+ const auto& element_header = unsafe_element_header;
+
+ // Caveat: Element offsets and lengths can still be invalid (e.g., exceeding
+ // archive bounds), but this will be checked later.
+ element_match->old_element.offset = element_header.old_offset;
+ element_match->old_element.size = element_header.old_length;
+ element_match->new_element.offset = element_header.new_offset;
+ element_match->new_element.size = element_header.new_length;
+ element_match->old_element.exe_type = exe_type;
+ element_match->new_element.exe_type = exe_type;
+ return true;
+}
+
+bool ParseBuffer(BufferSource* source, BufferSource* buffer) {
+ uint32_t unsafe_size = 0; // Bytes.
+ static_assert(sizeof(size_t) >= sizeof(unsafe_size),
+ "size_t is expected to be larger than uint32_t.");
+ if (!source->GetValue(&unsafe_size)) {
+ LOG(ERROR) << "Impossible to read buffer size from source.";
+ return false;
+ }
+ if (!source->GetRegion(static_cast<size_t>(unsafe_size), buffer)) {
+ LOG(ERROR) << "Impossible to read buffer content from source.";
+ return false;
+ }
+ // Caveat: |buffer| is considered to be safe as it was possible to extract it
+ // from the patch. However, this does not mean its contents are safe and when
+ // parsed must be validated if possible.
+ return true;
+}
+
+} // namespace patch
+
+/******** EquivalenceSource ********/
+
+EquivalenceSource::EquivalenceSource() = default;
+EquivalenceSource::EquivalenceSource(const EquivalenceSource&) = default;
+EquivalenceSource::~EquivalenceSource() = default;
+
+bool EquivalenceSource::Initialize(BufferSource* source) {
+ return patch::ParseBuffer(source, &src_skip_) &&
+ patch::ParseBuffer(source, &dst_skip_) &&
+ patch::ParseBuffer(source, &copy_count_);
+}
+
+absl::optional<Equivalence> EquivalenceSource::GetNext() {
+ if (src_skip_.empty() || dst_skip_.empty() || copy_count_.empty())
+ return absl::nullopt;
+
+ Equivalence equivalence = {};
+
+ uint32_t length = 0;
+ if (!patch::ParseVarUInt<uint32_t>(&copy_count_, &length))
+ return absl::nullopt;
+ equivalence.length = base::strict_cast<offset_t>(length);
+
+ int32_t src_offset_diff = 0; // Intentionally signed.
+ if (!patch::ParseVarInt<int32_t>(&src_skip_, &src_offset_diff))
+ return absl::nullopt;
+ base::CheckedNumeric<offset_t> src_offset =
+ previous_src_offset_ + src_offset_diff;
+ if (!src_offset.IsValid())
+ return absl::nullopt;
+
+ equivalence.src_offset = src_offset.ValueOrDie();
+ previous_src_offset_ = src_offset + equivalence.length;
+ if (!previous_src_offset_.IsValid())
+ return absl::nullopt;
+
+ uint32_t dst_offset_diff = 0; // Intentionally unsigned.
+ if (!patch::ParseVarUInt<uint32_t>(&dst_skip_, &dst_offset_diff))
+ return absl::nullopt;
+ base::CheckedNumeric<offset_t> dst_offset =
+ previous_dst_offset_ + dst_offset_diff;
+ if (!dst_offset.IsValid())
+ return absl::nullopt;
+
+ equivalence.dst_offset = dst_offset.ValueOrDie();
+ previous_dst_offset_ = equivalence.dst_offset + equivalence.length;
+ if (!previous_dst_offset_.IsValid())
+ return absl::nullopt;
+
+ // Caveat: |equivalence| is assumed to be safe only once the
+ // ValidateEquivalencesAndExtraData() method has returned true. Prior to this
+ // any equivalence returned is assumed to be unsafe.
+ return equivalence;
+}
+
+/******** ExtraDataSource ********/
+
+ExtraDataSource::ExtraDataSource() = default;
+ExtraDataSource::ExtraDataSource(const ExtraDataSource&) = default;
+ExtraDataSource::~ExtraDataSource() = default;
+
+bool ExtraDataSource::Initialize(BufferSource* source) {
+ return patch::ParseBuffer(source, &extra_data_);
+}
+
+absl::optional<ConstBufferView> ExtraDataSource::GetNext(offset_t size) {
+ ConstBufferView buffer;
+ if (!extra_data_.GetRegion(size, &buffer))
+ return absl::nullopt;
+ // |buffer| is assumed to always be safe/valid.
+ return buffer;
+}
+
+/******** RawDeltaSource ********/
+
+RawDeltaSource::RawDeltaSource() = default;
+RawDeltaSource::RawDeltaSource(const RawDeltaSource&) = default;
+RawDeltaSource::~RawDeltaSource() = default;
+
+bool RawDeltaSource::Initialize(BufferSource* source) {
+ return patch::ParseBuffer(source, &raw_delta_skip_) &&
+ patch::ParseBuffer(source, &raw_delta_diff_);
+}
+
+absl::optional<RawDeltaUnit> RawDeltaSource::GetNext() {
+ if (raw_delta_skip_.empty() || raw_delta_diff_.empty())
+ return absl::nullopt;
+
+ RawDeltaUnit raw_delta = {};
+ uint32_t copy_offset_diff = 0;
+ if (!patch::ParseVarUInt<uint32_t>(&raw_delta_skip_, &copy_offset_diff))
+ return absl::nullopt;
+ base::CheckedNumeric<offset_t> copy_offset =
+ copy_offset_diff + copy_offset_compensation_;
+ if (!copy_offset.IsValid())
+ return absl::nullopt;
+ raw_delta.copy_offset = copy_offset.ValueOrDie();
+
+ if (!raw_delta_diff_.GetValue<int8_t>(&raw_delta.diff))
+ return absl::nullopt;
+
+ // A 0 value for a delta.diff is considered invalid since it has no meaning.
+ if (!raw_delta.diff)
+ return absl::nullopt;
+
+ // We keep track of the compensation needed for next offset, taking into
+ // account delta encoding and bias of -1.
+ copy_offset_compensation_ = copy_offset + 1;
+ if (!copy_offset_compensation_.IsValid())
+ return absl::nullopt;
+ // |raw_delta| is assumed to always be safe/valid.
+ return raw_delta;
+}
+
+/******** ReferenceDeltaSource ********/
+
+ReferenceDeltaSource::ReferenceDeltaSource() = default;
+ReferenceDeltaSource::ReferenceDeltaSource(const ReferenceDeltaSource&) =
+ default;
+ReferenceDeltaSource::~ReferenceDeltaSource() = default;
+
+bool ReferenceDeltaSource::Initialize(BufferSource* source) {
+ return patch::ParseBuffer(source, &source_);
+}
+
+absl::optional<int32_t> ReferenceDeltaSource::GetNext() {
+ if (source_.empty())
+ return absl::nullopt;
+ int32_t ref_delta = 0;
+ if (!patch::ParseVarInt<int32_t>(&source_, &ref_delta))
+ return absl::nullopt;
+ // |ref_delta| is assumed to always be safe/valid.
+ return ref_delta;
+}
+
+/******** TargetSource ********/
+
+TargetSource::TargetSource() = default;
+TargetSource::TargetSource(const TargetSource&) = default;
+TargetSource::~TargetSource() = default;
+
+bool TargetSource::Initialize(BufferSource* source) {
+ return patch::ParseBuffer(source, &extra_targets_);
+}
+
+absl::optional<offset_t> TargetSource::GetNext() {
+ if (extra_targets_.empty())
+ return absl::nullopt;
+
+ uint32_t target_diff = 0;
+ if (!patch::ParseVarUInt<uint32_t>(&extra_targets_, &target_diff))
+ return absl::nullopt;
+ base::CheckedNumeric<offset_t> target = target_diff + target_compensation_;
+ if (!target.IsValid())
+ return absl::nullopt;
+
+ // We keep track of the compensation needed for next target, taking into
+ // account delta encoding and bias of -1.
+ target_compensation_ = target + 1;
+ if (!target_compensation_.IsValid())
+ return absl::nullopt;
+ // Caveat: |target| will be a valid offset_t, but it's up to the caller to
+ // check whether it's a valid offset for an image.
+ return offset_t(target.ValueOrDie());
+}
+
+/******** PatchElementReader ********/
+
+PatchElementReader::PatchElementReader() = default;
+PatchElementReader::PatchElementReader(PatchElementReader&&) = default;
+PatchElementReader::~PatchElementReader() = default;
+
+bool PatchElementReader::Initialize(BufferSource* source) {
+ bool ok =
+ patch::ParseElementMatch(source, &element_match_) &&
+ equivalences_.Initialize(source) && extra_data_.Initialize(source) &&
+ ValidateEquivalencesAndExtraData() && raw_delta_.Initialize(source) &&
+ reference_delta_.Initialize(source);
+ if (!ok)
+ return false;
+ uint32_t pool_count = 0;
+ if (!source->GetValue(&pool_count)) {
+ LOG(ERROR) << "Impossible to read pool_count from source.";
+ return false;
+ }
+ for (uint32_t i = 0; i < pool_count; ++i) {
+ uint8_t pool_tag_value = 0;
+ if (!source->GetValue(&pool_tag_value)) {
+ LOG(ERROR) << "Impossible to read pool_tag from source.";
+ return false;
+ }
+ PoolTag pool_tag(pool_tag_value);
+ if (pool_tag == kNoPoolTag) {
+ LOG(ERROR) << "Invalid pool_tag encountered in ExtraTargetList.";
+ return false;
+ }
+ auto insert_result = extra_targets_.insert({pool_tag, {}});
+ if (!insert_result.second) { // Element already present.
+ LOG(ERROR) << "Multiple ExtraTargetList found for the same pool_tag.";
+ return false;
+ }
+ if (!insert_result.first->second.Initialize(source))
+ return false;
+ }
+ return true;
+}
+
+bool PatchElementReader::ValidateEquivalencesAndExtraData() {
+ EquivalenceSource equivalences_copy = equivalences_;
+
+ const size_t old_region_size = element_match_.old_element.size;
+ const size_t new_region_size = element_match_.new_element.size;
+
+ base::CheckedNumeric<uint32_t> total_length = 0;
+ // Validate that each |equivalence| falls within the bounds of the
+ // |element_match_| and are in order.
+ offset_t prev_dst_end = 0;
+ for (auto equivalence = equivalences_copy.GetNext(); equivalence.has_value();
+ equivalence = equivalences_copy.GetNext()) {
+ if (!RangeIsBounded(equivalence->src_offset, equivalence->length,
+ old_region_size) ||
+ !RangeIsBounded(equivalence->dst_offset, equivalence->length,
+ new_region_size)) {
+ LOG(ERROR) << "Out of bounds equivalence detected.";
+ return false;
+ }
+ if (prev_dst_end > equivalence->dst_end()) {
+ LOG(ERROR) << "Out of order equivalence detected.";
+ return false;
+ }
+ prev_dst_end = equivalence->dst_end();
+ total_length += equivalence->length;
+ }
+ if (!total_length.IsValid() ||
+ element_match_.new_element.region().size < total_length.ValueOrDie() ||
+ extra_data_.extra_data().size() !=
+ element_match_.new_element.region().size -
+ static_cast<size_t>(total_length.ValueOrDie())) {
+ LOG(ERROR) << "Incorrect amount of extra_data.";
+ return false;
+ }
+ return true;
+}
+
+/******** EnsemblePatchReader ********/
+
+absl::optional<EnsemblePatchReader> EnsemblePatchReader::Create(
+ ConstBufferView buffer) {
+ BufferSource source(buffer);
+ EnsemblePatchReader patch;
+ if (!patch.Initialize(&source))
+ return absl::nullopt;
+ return patch;
+}
+
+EnsemblePatchReader::EnsemblePatchReader() = default;
+EnsemblePatchReader::EnsemblePatchReader(EnsemblePatchReader&&) = default;
+EnsemblePatchReader::~EnsemblePatchReader() = default;
+
+bool EnsemblePatchReader::Initialize(BufferSource* source) {
+ if (!source->GetValue(&header_)) {
+ LOG(ERROR) << "Impossible to read header from source.";
+ return false;
+ }
+ if (header_.magic != PatchHeader::kMagic) {
+ LOG(ERROR) << "Patch contains invalid magic.";
+ return false;
+ }
+ // |header_| is assumed to be safe from this point forward.
+
+ uint32_t element_count = 0;
+ if (!source->GetValue(&element_count)) {
+ LOG(ERROR) << "Impossible to read element_count from source.";
+ return false;
+ }
+
+ offset_t current_dst_offset = 0;
+ for (uint32_t i = 0; i < element_count; ++i) {
+ PatchElementReader element_patch;
+ if (!element_patch.Initialize(source))
+ return false;
+
+ if (!element_patch.old_element().FitsIn(header_.old_size) ||
+ !element_patch.new_element().FitsIn(header_.new_size)) {
+ LOG(ERROR) << "Invalid element encountered.";
+ return false;
+ }
+
+ if (element_patch.new_element().offset != current_dst_offset) {
+ LOG(ERROR) << "Invalid element encountered.";
+ return false;
+ }
+ current_dst_offset = element_patch.new_element().EndOffset();
+
+ elements_.push_back(std::move(element_patch));
+ }
+ if (current_dst_offset != header_.new_size) {
+ LOG(ERROR) << "Patch elements don't fully cover new image file.";
+ return false;
+ }
+
+ if (!source->empty()) {
+ LOG(ERROR) << "Patch was not fully consumed.";
+ return false;
+ }
+
+ return true;
+}
+
+bool EnsemblePatchReader::CheckOldFile(ConstBufferView old_image) const {
+ return old_image.size() == header_.old_size &&
+ CalculateCrc32(old_image.begin(), old_image.end()) == header_.old_crc;
+}
+
+bool EnsemblePatchReader::CheckNewFile(ConstBufferView new_image) const {
+ return new_image.size() == header_.new_size &&
+ CalculateCrc32(new_image.begin(), new_image.end()) == header_.new_crc;
+}
+
+} // namespace zucchini
diff --git a/patch_reader.h b/patch_reader.h
new file mode 100644
index 0000000..93d64b0
--- /dev/null
+++ b/patch_reader.h
@@ -0,0 +1,285 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_PATCH_READER_H_
+#define COMPONENTS_ZUCCHINI_PATCH_READER_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <map>
+#include <vector>
+
+#include "base/debug/stack_trace.h"
+#include "base/logging.h"
+#include "base/numerics/checked_math.h"
+#include "components/zucchini/buffer_source.h"
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/image_utils.h"
+#include "components/zucchini/patch_utils.h"
+#include "third_party/abseil-cpp/absl/types/optional.h"
+
+namespace zucchini {
+
+namespace patch {
+
+// The Parse*() functions below attempt to extract data of a specific type from
+// the beginning of |source|. A parse function: On success, consumes the used
+// portion of |source|, writes data into the output parameter, and returns
+// true. Otherwise returns false and does not consume |source|.
+
+// Parses |source| for the next ElementMatch.
+bool ParseElementMatch(BufferSource* source, ElementMatch* element_match);
+
+// Parses |source| for the next embedded BufferSource.
+bool ParseBuffer(BufferSource* source, BufferSource* buffer);
+
+// Parses |source| for the next VarUInt.
+template <class T>
+bool ParseVarUInt(BufferSource* source, T* value) {
+ auto bytes_read = DecodeVarUInt(source->begin(), source->end(), value);
+ if (!bytes_read) {
+ LOG(ERROR) << "Impossible to read VarUInt from source.";
+ LOG(ERROR) << base::debug::StackTrace().ToString();
+ return false;
+ }
+ // Advance |source| beyond the VarUInt value.
+ source->Skip(bytes_read);
+ return true;
+}
+
+// Parses |source| for the next VarInt.
+template <class T>
+bool ParseVarInt(BufferSource* source, T* value) {
+ auto bytes_read = DecodeVarInt(source->begin(), source->end(), value);
+ if (!bytes_read) {
+ LOG(ERROR) << "Impossible to read VarInt from source.";
+ LOG(ERROR) << base::debug::StackTrace().ToString();
+ return false;
+ }
+ // Advance |source| beyond the VarInt value.
+ source->Skip(bytes_read);
+ return true;
+}
+
+} // namespace patch
+
+// The *Source classes below are light-weight (i.e., allows copying) visitors to
+// read patch data. Each of them has an associated "main type", and performs the
+// following:
+// - Consumes portions of a BufferSource (required to remain valid for the
+// lifetime of the object).
+// - Decodes consumed data, which represent a list of items with "main type".
+// - Dispenses "main type" elements (hence "Source" in the name).
+//
+// Common "core functions" implemented by *Source classes are:
+// - bool Initialize(BufferSource* source): Consumes data from BufferSource and
+// initializes internal states. Returns true if successful, and false
+// otherwise (|source| may be partially consumed).
+// - absl::optional<MAIN_TYPE> GetNext(OPT_PARAMS): Decodes consumed data and
+// returns the next item as absl::optional (returns absl::nullopt on failure).
+// - bool Done() const: Returns true if no more items remain; otherwise false.
+//
+// Usage of *Source instances don't mix, and GetNext() have dissimilar
+// interfaces. Therefore we do not use inheritance to relate *Source classes,
+// and simply implement "core functions" with matching names.
+
+// Source for Equivalences.
+class EquivalenceSource {
+ public:
+ EquivalenceSource();
+ EquivalenceSource(const EquivalenceSource&);
+ ~EquivalenceSource();
+
+ // Core functions.
+ bool Initialize(BufferSource* source);
+ absl::optional<Equivalence> GetNext();
+ bool Done() const {
+ return src_skip_.empty() && dst_skip_.empty() && copy_count_.empty();
+ }
+
+ // Accessors for unittest.
+ BufferSource src_skip() const { return src_skip_; }
+ BufferSource dst_skip() const { return dst_skip_; }
+ BufferSource copy_count() const { return copy_count_; }
+
+ private:
+ BufferSource src_skip_;
+ BufferSource dst_skip_;
+ BufferSource copy_count_;
+
+ base::CheckedNumeric<offset_t> previous_src_offset_ = 0;
+ base::CheckedNumeric<offset_t> previous_dst_offset_ = 0;
+};
+
+// Source for extra data.
+class ExtraDataSource {
+ public:
+ ExtraDataSource();
+ ExtraDataSource(const ExtraDataSource&);
+ ~ExtraDataSource();
+
+ // Core functions.
+ bool Initialize(BufferSource* source);
+ // |size| is the size in bytes of the buffer requested.
+ absl::optional<ConstBufferView> GetNext(offset_t size);
+ bool Done() const { return extra_data_.empty(); }
+
+ // Accessors for unittest.
+ BufferSource extra_data() const { return extra_data_; }
+
+ private:
+ BufferSource extra_data_;
+};
+
+// Source for raw delta.
+class RawDeltaSource {
+ public:
+ RawDeltaSource();
+ RawDeltaSource(const RawDeltaSource&);
+ ~RawDeltaSource();
+
+ // Core functions.
+ bool Initialize(BufferSource* source);
+ absl::optional<RawDeltaUnit> GetNext();
+ bool Done() const {
+ return raw_delta_skip_.empty() && raw_delta_diff_.empty();
+ }
+
+ // Accessors for unittest.
+ BufferSource raw_delta_skip() const { return raw_delta_skip_; }
+ BufferSource raw_delta_diff() const { return raw_delta_diff_; }
+
+ private:
+ BufferSource raw_delta_skip_;
+ BufferSource raw_delta_diff_;
+
+ base::CheckedNumeric<offset_t> copy_offset_compensation_ = 0;
+};
+
+// Source for reference delta.
+class ReferenceDeltaSource {
+ public:
+ ReferenceDeltaSource();
+ ReferenceDeltaSource(const ReferenceDeltaSource&);
+ ~ReferenceDeltaSource();
+
+ // Core functions.
+ bool Initialize(BufferSource* source);
+ absl::optional<int32_t> GetNext();
+ bool Done() const { return source_.empty(); }
+
+ // Accessors for unittest.
+ BufferSource reference_delta() const { return source_; }
+
+ private:
+ BufferSource source_;
+};
+
+// Source for additional targets.
+class TargetSource {
+ public:
+ TargetSource();
+ TargetSource(const TargetSource&);
+ ~TargetSource();
+
+ // Core functions.
+ bool Initialize(BufferSource* source);
+ absl::optional<offset_t> GetNext();
+ bool Done() const { return extra_targets_.empty(); }
+
+ // Accessors for unittest.
+ BufferSource extra_targets() const { return extra_targets_; }
+
+ private:
+ BufferSource extra_targets_;
+
+ base::CheckedNumeric<offset_t> target_compensation_ = 0;
+};
+
+// Following are utility classes providing a structured view on data forming a
+// patch.
+
+// Utility to read a patch element. A patch element contains all the information
+// necessary to patch a single element. This class provide access
+// to the multiple streams of data forming the patch element.
+class PatchElementReader {
+ public:
+ PatchElementReader();
+ PatchElementReader(PatchElementReader&&);
+ ~PatchElementReader();
+
+ // If data read from |source| is well-formed, initialize cached sources to
+ // read from it, and returns true. Otherwise returns false.
+ bool Initialize(BufferSource* source);
+
+ const ElementMatch& element_match() const { return element_match_; }
+ const Element& old_element() const { return element_match_.old_element; }
+ const Element& new_element() const { return element_match_.new_element; }
+
+ // The Get*() functions below return copies of cached sources. Callers may
+ // assume the following:
+ // - Equivalences satisfy basic boundary constraints
+ // - "Old" / "new" blocks lie entirely in "old" / "new" images.
+ // - "New" blocks are sorted.
+ EquivalenceSource GetEquivalenceSource() const { return equivalences_; }
+ ExtraDataSource GetExtraDataSource() const { return extra_data_; }
+ RawDeltaSource GetRawDeltaSource() const { return raw_delta_; }
+ ReferenceDeltaSource GetReferenceDeltaSource() const {
+ return reference_delta_;
+ }
+ TargetSource GetExtraTargetSource(PoolTag tag) const {
+ auto pos = extra_targets_.find(tag);
+ return pos != extra_targets_.end() ? pos->second : TargetSource();
+ }
+
+ private:
+ // Checks that "old" and "new" blocks of each item in |equivalences_| satisfy
+ // basic order and image bound constraints (using |element_match_| data). Also
+ // validates that the amount of extra data is correct. Returns true if
+ // successful.
+ bool ValidateEquivalencesAndExtraData();
+
+ ElementMatch element_match_;
+
+ // Cached sources.
+ EquivalenceSource equivalences_;
+ ExtraDataSource extra_data_;
+ RawDeltaSource raw_delta_;
+ ReferenceDeltaSource reference_delta_;
+ std::map<PoolTag, TargetSource> extra_targets_;
+};
+
+// Utility to read a Zucchini ensemble patch. An ensemble patch is the
+// concatenation of a patch header with a vector of patch elements.
+class EnsemblePatchReader {
+ public:
+ // If data read from |buffer| is well-formed, initializes and returns
+ // an instance of EnsemblePatchReader. Otherwise returns absl::nullopt.
+ static absl::optional<EnsemblePatchReader> Create(ConstBufferView buffer);
+
+ EnsemblePatchReader();
+ EnsemblePatchReader(EnsemblePatchReader&&);
+ ~EnsemblePatchReader();
+
+ // If data read from |source| is well-formed, initialize internal state to
+ // read from it, and returns true. Otherwise returns false.
+ bool Initialize(BufferSource* source);
+
+ // Check old / new image file validity, comparing against expected size and
+ // CRC32. Return true if file matches expectations, false otherwise.
+ bool CheckOldFile(ConstBufferView old_image) const;
+ bool CheckNewFile(ConstBufferView new_image) const;
+
+ const PatchHeader& header() const { return header_; }
+ const std::vector<PatchElementReader>& elements() const { return elements_; }
+
+ private:
+ PatchHeader header_;
+ std::vector<PatchElementReader> elements_;
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_PATCH_READER_H_
diff --git a/patch_utils.h b/patch_utils.h
new file mode 100644
index 0000000..5f49195
--- /dev/null
+++ b/patch_utils.h
@@ -0,0 +1,135 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_PATCH_UTILS_H_
+#define COMPONENTS_ZUCCHINI_PATCH_UTILS_H_
+
+#include <stdint.h>
+
+#include <iterator>
+#include <type_traits>
+
+#include "components/zucchini/image_utils.h"
+
+namespace zucchini {
+
+// A Zucchini 'ensemble' patch is the concatenation of a patch header with a
+// list of patch 'elements', each containing data for patching individual
+// elements.
+
+// Supported by MSVC, g++, and clang++. Ensures no gaps in packing.
+#pragma pack(push, 1)
+
+// Header for a Zucchini patch, found at the beginning of an ensemble patch.
+struct PatchHeader {
+ // Magic signature at the beginning of a Zucchini patch file.
+ enum : uint32_t { kMagic = 'Z' | ('u' << 8) | ('c' << 16) };
+
+ uint32_t magic = 0;
+ uint32_t old_size = 0;
+ uint32_t old_crc = 0;
+ uint32_t new_size = 0;
+ uint32_t new_crc = 0;
+};
+
+// Sanity check.
+static_assert(sizeof(PatchHeader) == 20, "PatchHeader must be 20 bytes");
+
+// Header for a patch element, found at the beginning of every patch element.
+struct PatchElementHeader {
+ uint32_t old_offset;
+ uint32_t old_length;
+ uint32_t new_offset;
+ uint32_t new_length;
+ uint32_t exe_type; // ExecutableType.
+};
+
+// Sanity check.
+static_assert(sizeof(PatchElementHeader) == 20,
+ "PatchElementHeader must be 20 bytes");
+
+#pragma pack(pop)
+
+// Descibes a raw FIX operation.
+struct RawDeltaUnit {
+ offset_t copy_offset; // Offset in copy regions.
+ int8_t diff; // Bytewise difference.
+};
+
+// A Zucchini patch contains data streams encoded using varint format to reduce
+// uncompressed size.
+
+// Writes |value| as a varint in |dst| and returns an iterator pointing beyond
+// the written region. |dst| is assumed to hold enough space. Typically, this
+// will write to a vector using back insertion, e.g.:
+// EncodeVarUInt(value, std::back_inserter(vector));
+template <class T, class It>
+It EncodeVarUInt(T value, It dst) {
+ static_assert(std::is_unsigned<T>::value, "Value type must be unsigned");
+
+ while (value >= 0x80) {
+ *dst++ = static_cast<uint8_t>(value) | 0x80;
+ value >>= 7;
+ }
+ *dst++ = static_cast<uint8_t>(value);
+ return dst;
+}
+
+// Same as EncodeVarUInt(), but for signed values.
+template <class T, class It>
+It EncodeVarInt(T value, It dst) {
+ static_assert(std::is_signed<T>::value, "Value type must be signed");
+
+ using unsigned_value_type = typename std::make_unsigned<T>::type;
+ if (value < 0)
+ return EncodeVarUInt((unsigned_value_type(~value) << 1) | 1, dst);
+ else
+ return EncodeVarUInt(unsigned_value_type(value) << 1, dst);
+}
+
+// Tries to read a varint unsigned integer from |[first, last)|. If
+// succesful, writes result into |value| and returns the number of bytes
+// read from |[first, last)|. Otherwise returns 0.
+template <class T, class It>
+typename std::iterator_traits<It>::difference_type DecodeVarUInt(It first,
+ It last,
+ T* value) {
+ static_assert(std::is_unsigned<T>::value, "Value type must be unsigned");
+
+ uint8_t sh = 0;
+ T val = 0;
+ for (auto it = first; it != last;) {
+ val |= T(*it & 0x7F) << sh;
+ if (*(it++) < 0x80) {
+ *value = val;
+ return it - first;
+ }
+ sh += 7;
+ if (sh >= sizeof(T) * 8) // Overflow!
+ return 0;
+ }
+ return 0;
+}
+
+// Same as DecodeVarUInt(), but for signed values.
+template <class T, class It>
+typename std::iterator_traits<It>::difference_type DecodeVarInt(It first,
+ It last,
+ T* value) {
+ static_assert(std::is_signed<T>::value, "Value type must be signed");
+
+ typename std::make_unsigned<T>::type tmp = 0;
+ auto res = DecodeVarUInt(first, last, &tmp);
+ if (res) {
+ if (tmp & 1)
+ *value = ~static_cast<T>(tmp >> 1);
+ else
+ *value = static_cast<T>(tmp >> 1);
+ }
+ return res;
+}
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_PATCH_UTILS_H_
diff --git a/patch_utils_unittest.cc b/patch_utils_unittest.cc
new file mode 100644
index 0000000..81e4e38
--- /dev/null
+++ b/patch_utils_unittest.cc
@@ -0,0 +1,169 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/patch_utils.h"
+
+#include <stdint.h>
+
+#include <iterator>
+#include <vector>
+
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+template <class T>
+void TestEncodeDecodeVarUInt(const std::vector<T>& data) {
+ std::vector<uint8_t> buffer;
+
+ std::vector<T> values;
+ for (T basis : data) {
+ // For variety, test the neighborhood values for each case in |data|. Some
+ // test cases may result in overflow when computing |value|, but we don't
+ // care about that.
+ for (int delta = -4; delta <= 4; ++delta) {
+ T value = delta + basis;
+ EncodeVarUInt<T>(value, std::back_inserter(buffer));
+ values.push_back(value);
+
+ value = delta - basis;
+ EncodeVarUInt<T>(value, std::back_inserter(buffer));
+ values.push_back(value);
+ }
+ }
+
+ auto it = buffer.begin();
+ for (T expected : values) {
+ T value = T(-1);
+ auto res = DecodeVarUInt(it, buffer.end(), &value);
+ EXPECT_NE(0, res);
+ EXPECT_EQ(expected, value);
+ it += res;
+ }
+ EXPECT_EQ(it, buffer.end());
+
+ T value = T(-1);
+ auto res = DecodeVarUInt(it, buffer.end(), &value);
+ EXPECT_EQ(0, res);
+ EXPECT_EQ(T(-1), value);
+}
+
+template <class T>
+void TestEncodeDecodeVarInt(const std::vector<T>& data) {
+ std::vector<uint8_t> buffer;
+
+ std::vector<T> values;
+ for (T basis : data) {
+ // For variety, test the neighborhood values for each case in |data|. Some
+ // test cases may result in overflow when computing |value|, but we don't
+ // care about that.
+ for (int delta = -4; delta <= 4; ++delta) {
+ T value = delta + basis;
+ EncodeVarInt(value, std::back_inserter(buffer));
+ values.push_back(value);
+
+ value = delta - basis;
+ EncodeVarInt(value, std::back_inserter(buffer));
+ values.push_back(value);
+ }
+ }
+
+ auto it = buffer.begin();
+ for (T expected : values) {
+ T value = T(-1);
+ auto res = DecodeVarInt(it, buffer.end(), &value);
+ EXPECT_NE(0, res);
+ EXPECT_EQ(expected, value);
+ it += res;
+ }
+ EXPECT_EQ(it, buffer.end());
+
+ T value = T(-1);
+ auto res = DecodeVarInt(it, buffer.end(), &value);
+ EXPECT_EQ(0, res);
+ EXPECT_EQ(T(-1), value);
+}
+
+TEST(PatchUtilsTest, EncodeDecodeVarUInt32) {
+ TestEncodeDecodeVarUInt<uint32_t>({0, 64, 128, 8192, 16384, 1 << 20, 1 << 21,
+ 1 << 22, 1 << 27, 1 << 28, 0x7FFFFFFFU,
+ UINT32_MAX});
+}
+
+TEST(PatchUtilsTest, EncodeDecodeVarInt32) {
+ TestEncodeDecodeVarInt<int32_t>({0, 64, 128, 8192, 16384, 1 << 20, 1 << 21,
+ 1 << 22, 1 << 27, 1 << 28, -1, INT32_MIN,
+ INT32_MAX});
+}
+
+TEST(PatchUtilsTest, EncodeDecodeVarUInt64) {
+ TestEncodeDecodeVarUInt<uint64_t>({0, 64, 128, 8192, 16384, 1 << 20, 1 << 21,
+ 1 << 22, 1ULL << 55, 1ULL << 56,
+ 0x7FFFFFFFFFFFFFFFULL, UINT64_MAX});
+}
+
+TEST(PatchUtilsTest, EncodeDecodeVarInt64) {
+ TestEncodeDecodeVarInt<int64_t>({0, 64, 128, 8192, 16384, 1 << 20, 1 << 21,
+ 1 << 22, 1LL << 55, 1LL << 56, -1, INT64_MIN,
+ INT64_MAX});
+}
+
+TEST(PatchUtilsTest, DecodeVarUInt32Malformed) {
+ constexpr uint32_t kUninit = static_cast<uint32_t>(-1LL);
+
+ // Output variable to ensure that on failure, the output variable is not
+ // written to.
+ uint32_t value = uint32_t(-1);
+
+ auto TestDecodeVarInt = [&value](const std::vector<uint8_t>& buffer) {
+ value = kUninit;
+ return DecodeVarUInt(buffer.begin(), buffer.end(), &value);
+ };
+
+ // Exhausted.
+ EXPECT_EQ(0, TestDecodeVarInt(std::vector<uint8_t>{}));
+ EXPECT_EQ(kUninit, value);
+ EXPECT_EQ(0, TestDecodeVarInt(std::vector<uint8_t>(4, 128)));
+ EXPECT_EQ(kUninit, value);
+
+ // Overflow.
+ EXPECT_EQ(0, TestDecodeVarInt(std::vector<uint8_t>(6, 128)));
+ EXPECT_EQ(kUninit, value);
+ EXPECT_EQ(0, TestDecodeVarInt({128, 128, 128, 128, 128, 42}));
+ EXPECT_EQ(kUninit, value);
+
+ // Following are pathological cases that are not handled for simplicity,
+ // hence decoding is expected to be successful.
+ EXPECT_NE(0, TestDecodeVarInt({128, 128, 128, 128, 16}));
+ EXPECT_EQ(uint32_t(0), value);
+ EXPECT_NE(0, TestDecodeVarInt({128, 128, 128, 128, 32}));
+ EXPECT_EQ(uint32_t(0), value);
+ EXPECT_NE(0, TestDecodeVarInt({128, 128, 128, 128, 64}));
+ EXPECT_EQ(uint32_t(0), value);
+}
+
+TEST(PatchUtilsTest, DecodeVarUInt64Malformed) {
+ constexpr uint64_t kUninit = static_cast<uint64_t>(-1);
+
+ uint64_t value = kUninit;
+ auto TestDecodeVarInt = [&value](const std::vector<uint8_t>& buffer) {
+ value = kUninit;
+ return DecodeVarUInt(buffer.begin(), buffer.end(), &value);
+ };
+
+ // Exhausted.
+ EXPECT_EQ(0, TestDecodeVarInt(std::vector<uint8_t>{}));
+ EXPECT_EQ(kUninit, value);
+ EXPECT_EQ(0, TestDecodeVarInt(std::vector<uint8_t>(9, 128)));
+ EXPECT_EQ(kUninit, value);
+
+ // Overflow.
+ EXPECT_EQ(0, TestDecodeVarInt(std::vector<uint8_t>(10, 128)));
+ EXPECT_EQ(kUninit, value);
+ EXPECT_EQ(0, TestDecodeVarInt(
+ {128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 42}));
+ EXPECT_EQ(kUninit, value);
+}
+
+} // namespace zucchini
diff --git a/patch_writer.cc b/patch_writer.cc
new file mode 100644
index 0000000..1206208
--- /dev/null
+++ b/patch_writer.cc
@@ -0,0 +1,291 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/patch_writer.h"
+
+#include <algorithm>
+#include <iterator>
+
+#include "base/numerics/checked_math.h"
+#include "base/numerics/safe_conversions.h"
+#include "components/zucchini/crc32.h"
+
+namespace zucchini {
+
+namespace patch {
+
+bool SerializeElementMatch(const ElementMatch& element_match,
+ BufferSink* sink) {
+ if (!element_match.IsValid())
+ return false;
+
+ PatchElementHeader element_header;
+ element_header.old_offset =
+ base::checked_cast<uint32_t>(element_match.old_element.offset);
+ element_header.old_length =
+ base::checked_cast<uint32_t>(element_match.old_element.size);
+ element_header.new_offset =
+ base::checked_cast<uint32_t>(element_match.new_element.offset);
+ element_header.new_length =
+ base::checked_cast<uint32_t>(element_match.new_element.size);
+ element_header.exe_type = element_match.exe_type();
+
+ return sink->PutValue<PatchElementHeader>(element_header);
+}
+
+size_t SerializedElementMatchSize(const ElementMatch& element_match) {
+ return sizeof(PatchElementHeader);
+}
+
+bool SerializeBuffer(const std::vector<uint8_t>& buffer, BufferSink* sink) {
+ // buffer.size() is not encoded as varint to simplify SerializedBufferSize().
+ base::CheckedNumeric<uint32_t> size = buffer.size();
+ if (!size.IsValid())
+ return false;
+ return sink->PutValue<uint32_t>(size.ValueOrDie()) &&
+ sink->PutRange(buffer.begin(), buffer.end());
+}
+
+size_t SerializedBufferSize(const std::vector<uint8_t>& buffer) {
+ return sizeof(uint32_t) + buffer.size();
+}
+
+} // namespace patch
+
+/******** EquivalenceSink ********/
+
+EquivalenceSink::EquivalenceSink() = default;
+EquivalenceSink::EquivalenceSink(const std::vector<uint8_t>& src_skip,
+ const std::vector<uint8_t>& dst_skip,
+ const std::vector<uint8_t>& copy_count)
+ : src_skip_(src_skip), dst_skip_(dst_skip), copy_count_(copy_count) {}
+
+EquivalenceSink::EquivalenceSink(EquivalenceSink&&) = default;
+EquivalenceSink::~EquivalenceSink() = default;
+
+void EquivalenceSink::PutNext(const Equivalence& equivalence) {
+ // Equivalences are expected to be given ordered by |dst_offset|.
+ DCHECK_GE(equivalence.dst_offset, dst_offset_);
+ // Unsigned values are ensured by above check.
+
+ // Result of substracting 2 unsigned integers is unsigned. Overflow is allowed
+ // for negative values, as long as uint32_t can hold the result.
+ uint32_t src_offset_diff =
+ base::strict_cast<uint32_t>(equivalence.src_offset - src_offset_);
+ EncodeVarInt<int32_t>(static_cast<int32_t>(src_offset_diff),
+ std::back_inserter(src_skip_));
+
+ EncodeVarUInt<uint32_t>(
+ base::strict_cast<uint32_t>(equivalence.dst_offset - dst_offset_),
+ std::back_inserter(dst_skip_));
+
+ EncodeVarUInt<uint32_t>(base::strict_cast<uint32_t>(equivalence.length),
+ std::back_inserter(copy_count_));
+
+ src_offset_ = equivalence.src_offset + equivalence.length;
+ dst_offset_ = equivalence.dst_offset + equivalence.length;
+}
+
+size_t EquivalenceSink::SerializedSize() const {
+ return patch::SerializedBufferSize(src_skip_) +
+ patch::SerializedBufferSize(dst_skip_) +
+ patch::SerializedBufferSize(copy_count_);
+}
+
+bool EquivalenceSink::SerializeInto(BufferSink* sink) const {
+ return patch::SerializeBuffer(src_skip_, sink) &&
+ patch::SerializeBuffer(dst_skip_, sink) &&
+ patch::SerializeBuffer(copy_count_, sink);
+}
+
+/******** ExtraDataSink ********/
+
+ExtraDataSink::ExtraDataSink() = default;
+ExtraDataSink::ExtraDataSink(const std::vector<uint8_t>& extra_data)
+ : extra_data_(extra_data) {}
+
+ExtraDataSink::ExtraDataSink(ExtraDataSink&&) = default;
+ExtraDataSink::~ExtraDataSink() = default;
+
+void ExtraDataSink::PutNext(ConstBufferView region) {
+ extra_data_.insert(extra_data_.end(), region.begin(), region.end());
+}
+
+size_t ExtraDataSink::SerializedSize() const {
+ return patch::SerializedBufferSize(extra_data_);
+}
+
+bool ExtraDataSink::SerializeInto(BufferSink* sink) const {
+ return patch::SerializeBuffer(extra_data_, sink);
+}
+
+/******** RawDeltaSink ********/
+
+RawDeltaSink::RawDeltaSink() = default;
+RawDeltaSink::RawDeltaSink(const std::vector<uint8_t>& raw_delta_skip,
+ const std::vector<uint8_t>& raw_delta_diff)
+ : raw_delta_skip_(raw_delta_skip), raw_delta_diff_(raw_delta_diff) {}
+
+RawDeltaSink::RawDeltaSink(RawDeltaSink&&) = default;
+RawDeltaSink::~RawDeltaSink() = default;
+
+void RawDeltaSink::PutNext(const RawDeltaUnit& delta) {
+ DCHECK_GE(delta.copy_offset, copy_offset_compensation_);
+ EncodeVarUInt<uint32_t>(base::strict_cast<uint32_t>(
+ delta.copy_offset - copy_offset_compensation_),
+ std::back_inserter(raw_delta_skip_));
+
+ copy_offset_compensation_ = delta.copy_offset + 1;
+
+ raw_delta_diff_.push_back(delta.diff);
+}
+
+size_t RawDeltaSink::SerializedSize() const {
+ return patch::SerializedBufferSize(raw_delta_skip_) +
+ patch::SerializedBufferSize(raw_delta_diff_);
+}
+
+bool RawDeltaSink::SerializeInto(BufferSink* sink) const {
+ return patch::SerializeBuffer(raw_delta_skip_, sink) &&
+ patch::SerializeBuffer(raw_delta_diff_, sink);
+}
+
+/******** ReferenceDeltaSink ********/
+
+ReferenceDeltaSink::ReferenceDeltaSink() = default;
+ReferenceDeltaSink::ReferenceDeltaSink(
+ const std::vector<uint8_t>& reference_delta)
+ : reference_delta_(reference_delta) {}
+
+ReferenceDeltaSink::ReferenceDeltaSink(ReferenceDeltaSink&&) = default;
+ReferenceDeltaSink::~ReferenceDeltaSink() = default;
+
+void ReferenceDeltaSink::PutNext(int32_t diff) {
+ EncodeVarInt<int32_t>(diff, std::back_inserter(reference_delta_));
+}
+
+size_t ReferenceDeltaSink::SerializedSize() const {
+ return patch::SerializedBufferSize(reference_delta_);
+}
+
+bool ReferenceDeltaSink::SerializeInto(BufferSink* sink) const {
+ return patch::SerializeBuffer(reference_delta_, sink);
+}
+
+/******** TargetSink ********/
+
+TargetSink::TargetSink() = default;
+TargetSink::TargetSink(const std::vector<uint8_t>& extra_targets)
+ : extra_targets_(extra_targets) {}
+
+TargetSink::TargetSink(TargetSink&&) = default;
+TargetSink::~TargetSink() = default;
+
+void TargetSink::PutNext(uint32_t target) {
+ DCHECK_GE(target, target_compensation_);
+
+ EncodeVarUInt<uint32_t>(
+ base::strict_cast<uint32_t>(target - target_compensation_),
+ std::back_inserter(extra_targets_));
+
+ target_compensation_ = target + 1;
+}
+
+size_t TargetSink::SerializedSize() const {
+ return patch::SerializedBufferSize(extra_targets_);
+}
+
+bool TargetSink::SerializeInto(BufferSink* sink) const {
+ return patch::SerializeBuffer(extra_targets_, sink);
+}
+
+/******** PatchElementWriter ********/
+
+PatchElementWriter::PatchElementWriter() = default;
+PatchElementWriter::PatchElementWriter(ElementMatch element_match)
+ : element_match_(element_match) {}
+
+PatchElementWriter::PatchElementWriter(PatchElementWriter&&) = default;
+PatchElementWriter::~PatchElementWriter() = default;
+
+size_t PatchElementWriter::SerializedSize() const {
+ size_t serialized_size =
+ patch::SerializedElementMatchSize(element_match_) +
+ equivalences_->SerializedSize() + extra_data_->SerializedSize() +
+ raw_delta_->SerializedSize() + reference_delta_->SerializedSize();
+
+ serialized_size += sizeof(uint32_t);
+ for (const auto& extra_symbols : extra_targets_)
+ serialized_size += extra_symbols.second.SerializedSize() + 1;
+ return serialized_size;
+}
+
+bool PatchElementWriter::SerializeInto(BufferSink* sink) const {
+ bool ok =
+ patch::SerializeElementMatch(element_match_, sink) &&
+ equivalences_->SerializeInto(sink) && extra_data_->SerializeInto(sink) &&
+ raw_delta_->SerializeInto(sink) && reference_delta_->SerializeInto(sink);
+ if (!ok)
+ return false;
+
+ if (!sink->PutValue<uint32_t>(
+ base::checked_cast<uint32_t>(extra_targets_.size())))
+ return false;
+ for (const auto& extra_target_sink : extra_targets_) {
+ if (!sink->PutValue<uint8_t>(extra_target_sink.first.value()))
+ return false;
+ if (!extra_target_sink.second.SerializeInto(sink))
+ return false;
+ }
+ return true;
+}
+
+/******** EnsemblePatchWriter ********/
+
+EnsemblePatchWriter::~EnsemblePatchWriter() = default;
+
+EnsemblePatchWriter::EnsemblePatchWriter(const PatchHeader& header)
+ : header_(header) {
+ DCHECK_EQ(header_.magic, PatchHeader::kMagic);
+}
+
+EnsemblePatchWriter::EnsemblePatchWriter(ConstBufferView old_image,
+ ConstBufferView new_image) {
+ header_.magic = PatchHeader::kMagic;
+ header_.old_size = base::checked_cast<uint32_t>(old_image.size());
+ header_.old_crc = CalculateCrc32(old_image.begin(), old_image.end());
+ header_.new_size = base::checked_cast<uint32_t>(new_image.size());
+ header_.new_crc = CalculateCrc32(new_image.begin(), new_image.end());
+}
+
+void EnsemblePatchWriter::AddElement(PatchElementWriter&& patch_element) {
+ DCHECK(patch_element.new_element().offset == current_dst_offset_);
+ current_dst_offset_ = patch_element.new_element().EndOffset();
+ elements_.push_back(std::move(patch_element));
+}
+
+size_t EnsemblePatchWriter::SerializedSize() const {
+ size_t serialized_size = sizeof(PatchHeader) + sizeof(uint32_t);
+ for (const auto& patch_element : elements_) {
+ serialized_size += patch_element.SerializedSize();
+ }
+ return serialized_size;
+}
+
+bool EnsemblePatchWriter::SerializeInto(BufferSink* sink) const {
+ DCHECK_EQ(current_dst_offset_, header_.new_size);
+ bool ok =
+ sink->PutValue<PatchHeader>(header_) &&
+ sink->PutValue<uint32_t>(base::checked_cast<uint32_t>(elements_.size()));
+ if (!ok)
+ return false;
+
+ for (const auto& element : elements_) {
+ if (!element.SerializeInto(sink))
+ return false;
+ }
+ return true;
+}
+
+} // namespace zucchini
diff --git a/patch_writer.h b/patch_writer.h
new file mode 100644
index 0000000..26b7baf
--- /dev/null
+++ b/patch_writer.h
@@ -0,0 +1,272 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_PATCH_WRITER_H_
+#define COMPONENTS_ZUCCHINI_PATCH_WRITER_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <map>
+#include <utility>
+#include <vector>
+
+#include "base/check.h"
+#include "components/zucchini/buffer_sink.h"
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/image_utils.h"
+#include "components/zucchini/patch_utils.h"
+#include "third_party/abseil-cpp/absl/types/optional.h"
+
+namespace zucchini {
+
+namespace patch {
+
+// If sufficient space is available, serializes |element_match| into |sink| and
+// returns true. Otherwise returns false, and |sink| will be in an undefined
+// state.
+bool SerializeElementMatch(const ElementMatch& element_match, BufferSink* sink);
+
+// Returns the size in bytes required to serialize |element_match|.
+size_t SerializedElementMatchSize(const ElementMatch& element_match);
+
+// If sufficient space is available, serializes |buffer| into |sink| and returns
+// true. Otherwise returns false, and |sink| will be in an undefined state.
+bool SerializeBuffer(const std::vector<uint8_t>& buffer, BufferSink* sink);
+
+// Returns the size in bytes required to serialize |buffer|.
+size_t SerializedBufferSize(const std::vector<uint8_t>& buffer);
+
+} // namespace patch
+
+// Each of *Sink classes below has an associated "main type", and performs the
+// following:
+// - Receives multiple "main type" elements (hence "Sink" in the name).
+// - Encodes list of received data, and writes them to internal storage (e.g.,
+// applying delta encoding).
+// - Writes encoded data to BufferSink.
+//
+// Common "core functions" implemented for *Sink classes are:
+// - void PutNext(const MAIN_TYPE& inst): Encodes and writes an instance of
+// MAIN_TYPE to internal storage. Assumptions may be applied to successive
+// |inst| provided.
+// - size_t SerializedSize() const: Returns the serialized size in bytes of
+// internal storage.
+// - bool SerializeInto(BufferSink* sink) const: If |sink| has enough space,
+// serializes internal storage into |sink|, and returns true. Otherwise
+// returns false.
+//
+// Usage of *Sink instances don't mix, and PuttNext() have dissimilar
+// interfaces. Therefore we do not use inheritance to relate *Sink classes,
+// simply implement "core functions" with matching names.
+
+// Sink for equivalences.
+class EquivalenceSink {
+ public:
+ EquivalenceSink();
+ EquivalenceSink(const std::vector<uint8_t>& src_skip,
+ const std::vector<uint8_t>& dst_skip,
+ const std::vector<uint8_t>& copy_count);
+
+ EquivalenceSink(EquivalenceSink&&);
+ ~EquivalenceSink();
+
+ // Core functions.
+ // Equivalences must be given by increasing |Equivalence::dst_offset|.
+ void PutNext(const Equivalence& equivalence);
+ size_t SerializedSize() const;
+ bool SerializeInto(BufferSink* sink) const;
+
+ private:
+ // Offset in source, delta-encoded starting from end of last equivalence, and
+ // stored as signed varint.
+ std::vector<uint8_t> src_skip_;
+ // Offset in destination, delta-encoded starting from end of last equivalence,
+ // and stored as unsigned varint.
+ std::vector<uint8_t> dst_skip_;
+ // Length of equivalence stored as unsigned varint.
+ // TODO(etiennep): Investigate on bias.
+ std::vector<uint8_t> copy_count_;
+
+ offset_t src_offset_ = 0; // Last offset in source.
+ offset_t dst_offset_ = 0; // Last offset in destination.
+};
+
+// Sink for extra data.
+class ExtraDataSink {
+ public:
+ ExtraDataSink();
+ explicit ExtraDataSink(const std::vector<uint8_t>& extra_data);
+ ExtraDataSink(ExtraDataSink&&);
+ ~ExtraDataSink();
+
+ // Core functions.
+ void PutNext(ConstBufferView region);
+ size_t SerializedSize() const;
+ bool SerializeInto(BufferSink* sink) const;
+
+ private:
+ std::vector<uint8_t> extra_data_;
+};
+
+// Sink for raw delta.
+class RawDeltaSink {
+ public:
+ RawDeltaSink();
+ RawDeltaSink(const std::vector<uint8_t>& raw_delta_skip,
+ const std::vector<uint8_t>& raw_delta_diff);
+ RawDeltaSink(RawDeltaSink&&);
+ ~RawDeltaSink();
+
+ // Core functions.
+ // Deltas must be given by increasing |RawDeltaUnit::copy_offset|.
+ void PutNext(const RawDeltaUnit& delta);
+ size_t SerializedSize() const;
+ bool SerializeInto(BufferSink* sink) const;
+
+ private:
+ std::vector<uint8_t> raw_delta_skip_; // Copy offset stating from last delta.
+ std::vector<uint8_t> raw_delta_diff_; // Bytewise difference.
+
+ // We keep track of the compensation needed for next copy offset, taking into
+ // accound delta encoding and bias of -1. Stored delta are biased by -1, so a
+ // sequence of single byte deltas is represented as a string of 0's.
+ offset_t copy_offset_compensation_ = 0;
+};
+
+// Sink for reference delta.
+class ReferenceDeltaSink {
+ public:
+ ReferenceDeltaSink();
+ explicit ReferenceDeltaSink(const std::vector<uint8_t>& reference_delta);
+ ReferenceDeltaSink(ReferenceDeltaSink&&);
+ ~ReferenceDeltaSink();
+
+ // Core functions.
+ void PutNext(int32_t diff);
+ size_t SerializedSize() const;
+ bool SerializeInto(BufferSink* sink) const;
+
+ private:
+ std::vector<uint8_t> reference_delta_;
+};
+
+// Sink for additional targets.
+class TargetSink {
+ public:
+ TargetSink();
+ explicit TargetSink(const std::vector<uint8_t>& extra_targets);
+ TargetSink(TargetSink&&);
+ ~TargetSink();
+
+ // Core functions.
+ // Targets must be given by increasing order.
+ void PutNext(uint32_t target);
+ size_t SerializedSize() const;
+ bool SerializeInto(BufferSink* sink) const;
+
+ private:
+ // Targets are delta-encoded and biaised by 1, stored as unsigned varint.
+ std::vector<uint8_t> extra_targets_;
+
+ // We keep track of the compensation needed for next target, taking into
+ // accound delta encoding and bias of -1.
+ offset_t target_compensation_ = 0;
+};
+
+// Following are utility classes to write structured data forming a patch.
+
+// Utility to write a patch element. A patch element contains all the
+// information necessary to patch a single element. This class
+// provides an interface to individually set different building blocks of data
+// in the patch element.
+class PatchElementWriter {
+ public:
+ PatchElementWriter();
+ explicit PatchElementWriter(ElementMatch element_match);
+ PatchElementWriter(PatchElementWriter&&);
+ ~PatchElementWriter();
+
+ const ElementMatch& element_match() const { return element_match_; }
+ const Element& old_element() const { return element_match_.old_element; }
+ const Element& new_element() const { return element_match_.new_element; }
+
+ // Following methods set individual blocks for this element. Previous
+ // corresponding block is replaced. All streams must be set before call to
+ // SerializedSize() of SerializeInto().
+
+ void SetEquivalenceSink(EquivalenceSink&& equivalences) {
+ equivalences_.emplace(std::move(equivalences));
+ }
+ void SetExtraDataSink(ExtraDataSink&& extra_data) {
+ extra_data_.emplace(std::move(extra_data));
+ }
+ void SetRawDeltaSink(RawDeltaSink&& raw_delta) {
+ raw_delta_.emplace(std::move(raw_delta));
+ }
+ void SetReferenceDeltaSink(ReferenceDeltaSink reference_delta) {
+ reference_delta_.emplace(std::move(reference_delta));
+ }
+ // Set additional targets for pool identified with |pool_tag|.
+ void SetTargetSink(PoolTag pool_tag, TargetSink&& extra_targets) {
+ DCHECK(pool_tag != kNoPoolTag);
+ extra_targets_.emplace(pool_tag, std::move(extra_targets));
+ }
+
+ // Returns the serialized size in bytes of the data this object is holding.
+ size_t SerializedSize() const;
+
+ // If sufficient space is available, serializes data into |sink|, which is at
+ // least SerializedSize() bytes, and returns true. Otherwise returns false.
+ bool SerializeInto(BufferSink* sink) const;
+
+ private:
+ ElementMatch element_match_;
+ absl::optional<EquivalenceSink> equivalences_;
+ absl::optional<ExtraDataSink> extra_data_;
+ absl::optional<RawDeltaSink> raw_delta_;
+ absl::optional<ReferenceDeltaSink> reference_delta_;
+ std::map<PoolTag, TargetSink> extra_targets_;
+};
+
+// Utility to write a Zucchini ensemble patch. An ensemble patch is the
+// concatenation of a patch header with a vector of patch elements.
+class EnsemblePatchWriter {
+ public:
+ explicit EnsemblePatchWriter(const PatchHeader& header);
+ EnsemblePatchWriter(ConstBufferView old_image, ConstBufferView new_image);
+ EnsemblePatchWriter(const EnsemblePatchWriter&) = delete;
+ const EnsemblePatchWriter& operator=(const EnsemblePatchWriter&) = delete;
+ ~EnsemblePatchWriter();
+
+ // Reserves space for |count| patch elements.
+ void ReserveElements(size_t count) { elements_.reserve(count); }
+
+ // Adds an patch element into the patch. Patch elements must be ordered by
+ // their location in the new image file.
+ void AddElement(PatchElementWriter&& patch_element);
+
+ // Returns the serialized size in bytes of the data this object is holding.
+ size_t SerializedSize() const;
+
+ // If sufficient space is available, serializes data into |sink|, which is at
+ // least SerializedSize() bytes, and returns true. Otherwise returns false.
+ bool SerializeInto(BufferSink* sink) const;
+
+ // If sufficient space is available, serializes data into |buffer|, which is
+ // at least SerializedSize() bytes, and returns true. Otherwise returns false.
+ bool SerializeInto(MutableBufferView buffer) const {
+ BufferSink sink(buffer);
+ return SerializeInto(&sink);
+ }
+
+ private:
+ PatchHeader header_;
+ std::vector<PatchElementWriter> elements_;
+ offset_t current_dst_offset_ = 0;
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_PATCH_WRITER_H_
diff --git a/reference_bytes_mixer.cc b/reference_bytes_mixer.cc
new file mode 100644
index 0000000..6855853
--- /dev/null
+++ b/reference_bytes_mixer.cc
@@ -0,0 +1,150 @@
+// Copyright 2018 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/reference_bytes_mixer.h"
+
+#include <algorithm>
+
+#include "base/check_op.h"
+#include "base/logging.h"
+#include "base/notreached.h"
+#include "components/zucchini/disassembler.h"
+#include "components/zucchini/disassembler_elf.h"
+
+namespace zucchini {
+
+/******** ReferenceBytesMixer ********/
+
+// Default implementation is a stub, i.e., for architectures whose references
+// have operation bits and payload bits stored in separate bytes. So during
+// patch application, payload bits are copied for matched blocks, ignored by
+// bytewise corrections, and fixed by reference target corrections.
+ReferenceBytesMixer::ReferenceBytesMixer() {}
+
+ReferenceBytesMixer::~ReferenceBytesMixer() = default;
+
+// static.
+std::unique_ptr<ReferenceBytesMixer> ReferenceBytesMixer::Create(
+ const Disassembler& src_dis,
+ const Disassembler& dst_dis) {
+ ExecutableType exe_type = src_dis.GetExeType();
+ DCHECK_EQ(exe_type, dst_dis.GetExeType());
+ if (exe_type == kExeTypeElfAArch32)
+ return std::make_unique<ReferenceBytesMixerElfArm>(exe_type);
+ if (exe_type == kExeTypeElfAArch64)
+ return std::make_unique<ReferenceBytesMixerElfArm>(exe_type);
+ return std::make_unique<ReferenceBytesMixer>();
+}
+
+// Stub implementation.
+int ReferenceBytesMixer::NumBytes(uint8_t type) const {
+ return 0;
+}
+
+// Base class implementation is a stub that should not be called.
+ConstBufferView ReferenceBytesMixer::Mix(uint8_t type,
+ ConstBufferView old_view,
+ offset_t old_offset,
+ ConstBufferView new_view,
+ offset_t new_offset) {
+ NOTREACHED() << "Stub.";
+ return ConstBufferView();
+}
+
+/******** ReferenceBytesMixerElfArm ********/
+
+ReferenceBytesMixerElfArm::ReferenceBytesMixerElfArm(ExecutableType exe_type)
+ : exe_type_(exe_type), out_buffer_(4) {} // 4 is a bound on NumBytes().
+
+ReferenceBytesMixerElfArm::~ReferenceBytesMixerElfArm() = default;
+
+int ReferenceBytesMixerElfArm::NumBytes(uint8_t type) const {
+ if (exe_type_ == kExeTypeElfAArch32) {
+ switch (type) {
+ case AArch32ReferenceType::kRel32_A24: // Falls through.
+ case AArch32ReferenceType::kRel32_T20:
+ case AArch32ReferenceType::kRel32_T24:
+ return 4;
+ case AArch32ReferenceType::kRel32_T8: // Falls through.
+ case AArch32ReferenceType::kRel32_T11:
+ return 2;
+ }
+ } else if (exe_type_ == kExeTypeElfAArch64) {
+ switch (type) {
+ case AArch64ReferenceType::kRel32_Immd14: // Falls through.
+ case AArch64ReferenceType::kRel32_Immd19:
+ case AArch64ReferenceType::kRel32_Immd26:
+ return 4;
+ }
+ }
+ return 0;
+}
+
+ConstBufferView ReferenceBytesMixerElfArm::Mix(uint8_t type,
+ ConstBufferView old_view,
+ offset_t old_offset,
+ ConstBufferView new_view,
+ offset_t new_offset) {
+ int num_bytes = NumBytes(type);
+ ConstBufferView::const_iterator new_it = new_view.begin() + new_offset;
+ DCHECK_LE(static_cast<size_t>(num_bytes), out_buffer_.size());
+ MutableBufferView out_buffer_view(&out_buffer_[0], num_bytes);
+ std::copy(new_it, new_it + num_bytes, out_buffer_view.begin());
+
+ ArmCopyDispFun copier = GetCopier(type);
+ DCHECK_NE(copier, nullptr);
+
+ if (!copier(old_view, old_offset, out_buffer_view, 0U)) {
+ // Failed to mix old payload bits with new operation bits. The main cause of
+ // of this rare failure is when BL (encoding T1) with payload bits
+ // representing disp % 4 == 2 transforms into BLX (encoding T2). Error
+ // arises because BLX requires payload bits to have disp == 0 (mod 4).
+ // Mixing failures are not fatal to patching; we simply fall back to direct
+ // copy and forgo benefits from mixing for these cases.
+ // TODO(huangs, etiennep): Ongoing discussion on whether we should just
+ // nullify all payload disp so we won't have to deal with this case, but at
+ // the cost of having Zucchini-apply do more work.
+ static int output_quota = 10;
+ if (output_quota > 0) {
+ LOG(WARNING) << "Reference byte mix failed with type = "
+ << static_cast<uint32_t>(type) << "." << std::endl;
+ --output_quota;
+ if (!output_quota)
+ LOG(WARNING) << "(Additional output suppressed)";
+ }
+ // Fall back to direct copy.
+ std::copy(new_it, new_it + num_bytes, out_buffer_view.begin());
+ }
+ return ConstBufferView(out_buffer_view);
+}
+
+ArmCopyDispFun ReferenceBytesMixerElfArm::GetCopier(uint8_t type) const {
+ if (exe_type_ == kExeTypeElfAArch32) {
+ switch (type) {
+ case AArch32ReferenceType::kRel32_A24:
+ return ArmCopyDisp<AArch32Rel32Translator::AddrTraits_A24>;
+ case AArch32ReferenceType::kRel32_T8:
+ return ArmCopyDisp<AArch32Rel32Translator::AddrTraits_T8>;
+ case AArch32ReferenceType::kRel32_T11:
+ return ArmCopyDisp<AArch32Rel32Translator::AddrTraits_T11>;
+ case AArch32ReferenceType::kRel32_T20:
+ return ArmCopyDisp<AArch32Rel32Translator::AddrTraits_T20>;
+ case AArch32ReferenceType::kRel32_T24:
+ return ArmCopyDisp<AArch32Rel32Translator::AddrTraits_T24>;
+ }
+ } else if (exe_type_ == kExeTypeElfAArch64) {
+ switch (type) {
+ case AArch64ReferenceType::kRel32_Immd14:
+ return ArmCopyDisp<AArch64Rel32Translator::AddrTraits_Immd14>;
+ case AArch64ReferenceType::kRel32_Immd19:
+ return ArmCopyDisp<AArch64Rel32Translator::AddrTraits_Immd19>;
+ case AArch64ReferenceType::kRel32_Immd26:
+ return ArmCopyDisp<AArch64Rel32Translator::AddrTraits_Immd26>;
+ }
+ }
+ DLOG(FATAL) << "NOTREACHED";
+ return nullptr;
+}
+
+} // namespace zucchini
diff --git a/reference_bytes_mixer.h b/reference_bytes_mixer.h
new file mode 100644
index 0000000..f20b0ef
--- /dev/null
+++ b/reference_bytes_mixer.h
@@ -0,0 +1,118 @@
+// Copyright 2018 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_REFERENCE_BYTES_MIXER_H_
+#define COMPONENTS_ZUCCHINI_REFERENCE_BYTES_MIXER_H_
+
+#include <stdint.h>
+
+#include <memory>
+
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/image_utils.h"
+#include "components/zucchini/rel32_utils.h"
+
+namespace zucchini {
+
+class Disassembler;
+
+// References encoding may be quite complex in some architectures (e.g., ARM),
+// requiring bit-level manipulation. In general, bits in a reference body fall
+// under 2 categories:
+// - Operation bits: Instruction op code, conditionals, or structural data.
+// - Payload bits: Actual target data of the reference. These may be absolute,
+// or be displacements relative to instruction pointer / program counter.
+// During patch application,
+// Old reference bytes = {old operation, old payload},
+// is transformed to
+// New reference bytes = {new operation, new payload}.
+// New image bytes are written by three sources:
+// (1) Direct copy from old image to new image for matched blocks.
+// (2) Bytewise diff correction.
+// (3) Dedicated reference target correction.
+//
+// For references whose operation and payload bits are stored in easily
+// separable bytes (e.g., rel32 reference in X86), (2) can exclude payload bits.
+// So during patch application, (1) naively copies everything, (2) fixes
+// operation bytes only, and (3) fixes payload bytes only.
+//
+// For architectures with references whose operation and payload bits may mix
+// within shared bytes (e.g., ARM rel32), a dilemma arises:
+// - (2) cannot ignores shared bytes, since otherwise new operation bits not
+// properly transfer.
+// - Having (2) always overwrite these bytes would reduce the benefits of
+// reference correction, since references are likely to change.
+//
+// Our solution applies a hybrid approach: For each matching old / new reference
+// pair, define:
+// Mixed reference bytes = {new operation, old payload},
+//
+// During patch generation, we compute bytewise correction from old reference
+// bytes to the mixed reference bytes. So during patch application, (2) only
+// corrects operation bit changes (and skips if they don't change), and (3)
+// overwrites old payload bits to new payload bits.
+
+// A base class for (stateful) mixed reference byte generation. This base class
+// serves as a stub. Architectures whose references store operation bits and
+// payload bits can share common bytes (e.g., ARM rel32) should override this.
+class ReferenceBytesMixer {
+ public:
+ ReferenceBytesMixer();
+ ReferenceBytesMixer(const ReferenceBytesMixer&) = delete;
+ const ReferenceBytesMixer& operator=(const ReferenceBytesMixer&) = delete;
+ virtual ~ReferenceBytesMixer();
+
+ // Returns a new ReferenceBytesMixer instance that's owned by the caller.
+ static std::unique_ptr<ReferenceBytesMixer> Create(
+ const Disassembler& src_dis,
+ const Disassembler& dst_dis);
+
+ // Returns the number of bytes that need to be mixed for references with given
+ // |type|. Returns 0 if no mixing is required.
+ virtual int NumBytes(uint8_t type) const;
+
+ // Computes mixed reference bytes by combining (a) "payload bits" from an
+ // "old" reference of |type| at |old_view[old_offset]| with (b) "operation
+ // bits" from a "new" reference of |type| at |new_view[new_offset]|. Returns
+ // the result as ConstBufferView, which is valid only until the next call to
+ // Mix().
+ virtual ConstBufferView Mix(uint8_t type,
+ ConstBufferView old_view,
+ offset_t old_offset,
+ ConstBufferView new_view,
+ offset_t new_offset);
+};
+
+// In AArch32 and AArch64, instructions mix operation bits and payload bits in
+// complex ways. This is the main use case of ReferenceBytesMixer.
+class ReferenceBytesMixerElfArm : public ReferenceBytesMixer {
+ public:
+ // |exe_type| must be EXE_TYPE_ELF_ARM or EXE_TYPE_ELF_AARCH64.
+ explicit ReferenceBytesMixerElfArm(ExecutableType exe_type);
+ ReferenceBytesMixerElfArm(const ReferenceBytesMixerElfArm&) = delete;
+ const ReferenceBytesMixerElfArm& operator=(const ReferenceBytesMixerElfArm&) =
+ delete;
+ ~ReferenceBytesMixerElfArm() override;
+
+ // ReferenceBytesMixer:
+ int NumBytes(uint8_t type) const override;
+ ConstBufferView Mix(uint8_t type,
+ ConstBufferView old_view,
+ offset_t old_offset,
+ ConstBufferView new_view,
+ offset_t new_offset) override;
+
+ private:
+ ArmCopyDispFun GetCopier(uint8_t type) const;
+
+ // For simplicity, 32-bit vs. 64-bit distinction is represented by state
+ // |exe_type_|, instead of creating derived classes.
+ const ExecutableType exe_type_;
+
+ std::vector<uint8_t> out_buffer_;
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_REFERENCE_BYTES_MIXER_H_
diff --git a/reference_set.cc b/reference_set.cc
new file mode 100644
index 0000000..82a9951
--- /dev/null
+++ b/reference_set.cc
@@ -0,0 +1,60 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/reference_set.h"
+
+#include <algorithm>
+#include <iterator>
+
+#include "base/check_op.h"
+#include "components/zucchini/target_pool.h"
+
+namespace zucchini {
+
+namespace {
+
+// Returns true if |refs| is sorted by location.
+bool IsReferenceListSorted(const std::vector<Reference>& refs) {
+ return std::is_sorted(refs.begin(), refs.end(),
+ [](const Reference& a, const Reference& b) {
+ return a.location < b.location;
+ });
+}
+
+} // namespace
+
+ReferenceSet::ReferenceSet(const ReferenceTypeTraits& traits,
+ const TargetPool& target_pool)
+ : traits_(traits), target_pool_(target_pool) {}
+ReferenceSet::ReferenceSet(ReferenceSet&&) = default;
+ReferenceSet::~ReferenceSet() = default;
+
+void ReferenceSet::InitReferences(ReferenceReader&& ref_reader) {
+ DCHECK(references_.empty());
+ for (auto ref = ref_reader.GetNext(); ref.has_value();
+ ref = ref_reader.GetNext()) {
+ references_.push_back(*ref);
+ }
+ DCHECK(IsReferenceListSorted(references_));
+}
+
+void ReferenceSet::InitReferences(const std::vector<Reference>& refs) {
+ DCHECK(references_.empty());
+ DCHECK(IsReferenceListSorted(references_));
+ references_.assign(refs.begin(), refs.end());
+}
+
+Reference ReferenceSet::at(offset_t offset) const {
+ auto pos = std::upper_bound(references_.begin(), references_.end(), offset,
+ [](offset_t offset, const Reference& ref) {
+ return offset < ref.location;
+ });
+
+ DCHECK(pos != references_.begin()); // Iterators.
+ --pos;
+ DCHECK_LT(offset, pos->location + width());
+ return *pos;
+}
+
+} // namespace zucchini
diff --git a/reference_set.h b/reference_set.h
new file mode 100644
index 0000000..07940f0
--- /dev/null
+++ b/reference_set.h
@@ -0,0 +1,64 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_REFERENCE_SET_H_
+#define COMPONENTS_ZUCCHINI_REFERENCE_SET_H_
+
+#include <stddef.h>
+
+#include <vector>
+
+#include "components/zucchini/image_utils.h"
+
+namespace zucchini {
+
+class TargetPool;
+
+// Container of distinct references of one type, along with traits, only used
+// during patch generation.
+class ReferenceSet {
+ public:
+ using const_iterator = std::vector<Reference>::const_iterator;
+
+ // |traits| specifies the reference represented. |target_pool| specifies
+ // common targets shared by all reference represented, and mediates target
+ // translation between offsets and indexes.
+ ReferenceSet(const ReferenceTypeTraits& traits,
+ const TargetPool& target_pool);
+ ReferenceSet(const ReferenceSet&) = delete;
+ ReferenceSet(ReferenceSet&&);
+ ~ReferenceSet();
+
+ // Either one of the initializers below should be called exactly once. These
+ // insert all references from |ref_reader/refs| into this class. The targets
+ // of these references must be in |target_pool_|.
+ void InitReferences(ReferenceReader&& ref_reader);
+ void InitReferences(const std::vector<Reference>& refs);
+
+ const std::vector<Reference>& references() const { return references_; }
+ const ReferenceTypeTraits& traits() const { return traits_; }
+ const TargetPool& target_pool() const { return target_pool_; }
+ TypeTag type_tag() const { return traits_.type_tag; }
+ PoolTag pool_tag() const { return traits_.pool_tag; }
+ offset_t width() const { return traits_.width; }
+
+ // Looks up the Reference by an |offset| that it spans. |offset| is assumed to
+ // be valid, i.e., |offset| must be spanned by some Reference in
+ // |references_|.
+ Reference at(offset_t offset) const;
+
+ size_t size() const { return references_.size(); }
+ const_iterator begin() const { return references_.begin(); }
+ const_iterator end() const { return references_.end(); }
+
+ private:
+ ReferenceTypeTraits traits_;
+ const TargetPool& target_pool_;
+ // List of distinct Reference instances sorted by location.
+ std::vector<Reference> references_;
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_REFERENCE_SET_H_
diff --git a/reference_set_unittest.cc b/reference_set_unittest.cc
new file mode 100644
index 0000000..0bf869e
--- /dev/null
+++ b/reference_set_unittest.cc
@@ -0,0 +1,49 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/reference_set.h"
+
+#include <vector>
+
+#include "components/zucchini/image_utils.h"
+#include "components/zucchini/target_pool.h"
+#include "components/zucchini/test_reference_reader.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+namespace {
+
+constexpr offset_t kWidth = 2U;
+
+} // namespace
+
+class ReferenceSetTest : public testing::Test {
+ protected:
+ // For simplicity, |target_pool_| has no type info (not needed here).
+ TargetPool target_pool_ = TargetPool{{0, 2, 3, 5}};
+ ReferenceSet reference_set_ =
+ ReferenceSet{{kWidth, TypeTag(0), PoolTag(0)}, target_pool_};
+};
+
+TEST_F(ReferenceSetTest, InitReferencesFromReader) {
+ EXPECT_EQ(std::vector<Reference>(), reference_set_.references());
+ EXPECT_EQ(0U, reference_set_.size());
+ std::vector<Reference> references = {{10, 0}, {12, 2}, {14, 5}};
+ reference_set_.InitReferences(TestReferenceReader(references));
+ EXPECT_EQ(references, reference_set_.references());
+}
+
+TEST_F(ReferenceSetTest, At) {
+ reference_set_.InitReferences({{10, 0}, {12, 2}, {15, 5}});
+ // Each references has kWidth = 2, so check all bytes covered.
+ EXPECT_EQ(Reference({10, 0}), reference_set_.at(10));
+ EXPECT_EQ(Reference({10, 0}), reference_set_.at(11));
+ EXPECT_EQ(Reference({12, 2}), reference_set_.at(12));
+ EXPECT_EQ(Reference({12, 2}), reference_set_.at(13));
+ EXPECT_EQ(Reference({15, 5}), reference_set_.at(15));
+ EXPECT_EQ(Reference({15, 5}), reference_set_.at(16));
+}
+
+} // namespace zucchini
diff --git a/rel32_finder.cc b/rel32_finder.cc
new file mode 100644
index 0000000..1ad8910
--- /dev/null
+++ b/rel32_finder.cc
@@ -0,0 +1,294 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/rel32_finder.h"
+
+#include <algorithm>
+
+#include "base/numerics/safe_conversions.h"
+
+namespace zucchini {
+
+/******** Abs32GapFinder ********/
+
+Abs32GapFinder::Abs32GapFinder(ConstBufferView image,
+ ConstBufferView region,
+ const std::vector<offset_t>& abs32_locations,
+ size_t abs32_width)
+ : base_(image.begin()),
+ region_end_(region.end()),
+ abs32_end_(abs32_locations.end()),
+ abs32_width_(abs32_width) {
+ DCHECK_GT(abs32_width, size_t(0));
+ DCHECK_GE(region.begin(), image.begin());
+ DCHECK_LE(region.end(), image.end());
+
+ const offset_t begin_offset =
+ base::checked_cast<offset_t>(region.begin() - image.begin());
+ // Find the first |abs32_cur_| with |*abs32_cur_ >= begin_offset|.
+ abs32_cur_ = std::lower_bound(abs32_locations.begin(), abs32_locations.end(),
+ begin_offset);
+
+ // Find lower boundary, accounting for the possibility that |abs32_cur_[-1]|
+ // may straddle across |region.begin()|.
+ cur_lo_ = region.begin();
+ if (abs32_cur_ > abs32_locations.begin())
+ cur_lo_ = std::max(cur_lo_, image.begin() + abs32_cur_[-1] + abs32_width_);
+}
+
+Abs32GapFinder::~Abs32GapFinder() = default;
+
+bool Abs32GapFinder::FindNext() {
+ // Iterate over |[abs32_cur_, abs32_end_)| and emit segments.
+ while (abs32_cur_ != abs32_end_ && base_ + *abs32_cur_ < region_end_) {
+ ConstBufferView::const_iterator hi = base_ + *abs32_cur_;
+ gap_ = ConstBufferView::FromRange(cur_lo_, hi);
+ cur_lo_ = hi + abs32_width_;
+ ++abs32_cur_;
+ if (!gap_.empty())
+ return true;
+ }
+ // Emit final segment.
+ if (cur_lo_ < region_end_) {
+ gap_ = ConstBufferView::FromRange(cur_lo_, region_end_);
+ cur_lo_ = region_end_;
+ return true;
+ }
+ return false;
+}
+
+/******** Rel32Finder ********/
+
+Rel32Finder::Rel32Finder(ConstBufferView image,
+ const AddressTranslator& translator)
+ : image_(image), offset_to_rva_(translator) {}
+
+Rel32Finder::~Rel32Finder() = default;
+
+void Rel32Finder::SetRegion(ConstBufferView region) {
+ region_ = region;
+ accept_it_ = region.begin();
+}
+
+bool Rel32Finder::FindNext() {
+ NextIterators next_iters = Scan(region_);
+ if (next_iters.reject == nullptr) {
+ region_.seek(region_.end());
+ return false;
+ }
+ region_.seek(next_iters.reject);
+ accept_it_ = next_iters.accept;
+ DCHECK_GE(accept_it_, region_.begin());
+ DCHECK_LE(accept_it_, region_.end());
+ return true;
+}
+
+void Rel32Finder::Accept() {
+ region_.seek(accept_it_);
+}
+
+/******** Rel32FinderIntel ********/
+
+Rel32Finder::NextIterators Rel32FinderIntel::SetResult(
+ ConstBufferView::const_iterator cursor,
+ uint32_t opcode_size,
+ bool can_point_outside_section) {
+ offset_t location =
+ base::checked_cast<offset_t>((cursor + opcode_size) - image_.begin());
+ rva_t location_rva = offset_to_rva_.Convert(location);
+ DCHECK_NE(location_rva, kInvalidRva);
+ rva_t target_rva = location_rva + 4 + image_.read<uint32_t>(location);
+ rel32_ = {location, target_rva, can_point_outside_section};
+ return {cursor + 1, cursor + (opcode_size + 4)};
+}
+
+/******** Rel32FinderX86 ********/
+
+Rel32Finder::NextIterators Rel32FinderX86::Scan(ConstBufferView region) {
+ ConstBufferView::const_iterator cursor = region.begin();
+ while (cursor < region.end()) {
+ // Heuristic rel32 detection by looking for opcodes that use them.
+ if (cursor + 5 <= region.end()) {
+ if (cursor[0] == 0xE8 || cursor[0] == 0xE9) // JMP rel32; CALL rel32
+ return SetResult(cursor, 1, false);
+ }
+ if (cursor + 6 <= region.end()) {
+ if (cursor[0] == 0x0F && (cursor[1] & 0xF0) == 0x80) // Jcc long form
+ return SetResult(cursor, 2, false);
+ }
+ ++cursor;
+ }
+ return {nullptr, nullptr};
+}
+
+/******** Rel32FinderX64 ********/
+
+Rel32Finder::NextIterators Rel32FinderX64::Scan(ConstBufferView region) {
+ ConstBufferView::const_iterator cursor = region.begin();
+ while (cursor < region.end()) {
+ // Heuristic rel32 detection by looking for opcodes that use them.
+ if (cursor + 5 <= region.end()) {
+ if (cursor[0] == 0xE8 || cursor[0] == 0xE9) // JMP rel32; CALL rel32
+ return SetResult(cursor, 1, false);
+ }
+ if (cursor + 6 <= region.end()) {
+ if (cursor[0] == 0x0F && (cursor[1] & 0xF0) == 0x80) { // Jcc long form
+ return SetResult(cursor, 2, false);
+ } else if ((cursor[0] == 0xFF &&
+ (cursor[1] == 0x15 || cursor[1] == 0x25)) ||
+ ((cursor[0] == 0x89 || cursor[0] == 0x8B ||
+ cursor[0] == 0x8D) &&
+ (cursor[1] & 0xC7) == 0x05)) {
+ // 6-byte instructions:
+ // [2-byte opcode] [disp32]:
+ // Opcode
+ // FF 15: CALL QWORD PTR [rip+disp32]
+ // FF 25: JMP QWORD PTR [rip+disp32]
+ //
+ // [1-byte opcode] [ModR/M] [disp32]:
+ // Opcode
+ // 89: MOV DWORD PTR [rip+disp32],reg
+ // 8B: MOV reg,DWORD PTR [rip+disp32]
+ // 8D: LEA reg,[rip+disp32]
+ // ModR/M : MMRRRMMM
+ // MM = 00 & MMM = 101 => rip+disp32
+ // RRR: selects reg operand from [eax|ecx|...|edi]
+ return SetResult(cursor, 2, true);
+ }
+ }
+ ++cursor;
+ }
+ return {nullptr, nullptr};
+}
+
+/******** Rel32FinderArm ********/
+
+template <typename ADDR_TYPE>
+Rel32FinderArm<ADDR_TYPE>::Rel32FinderArm(ConstBufferView image,
+ const AddressTranslator& translator)
+ : Rel32Finder(image, translator) {}
+
+template <typename ADDR_TYPE>
+Rel32FinderArm<ADDR_TYPE>::~Rel32FinderArm() = default;
+
+template <typename ADDR_TYPE>
+Rel32Finder::NextIterators Rel32FinderArm<ADDR_TYPE>::SetResult(
+ Result&& result,
+ ConstBufferView::const_iterator cursor,
+ int instr_size) {
+ rel32_ = result;
+ return {cursor + instr_size, cursor + instr_size};
+}
+
+// SetResult() for end of scan.
+template <typename ADDR_TYPE>
+Rel32Finder::NextIterators Rel32FinderArm<ADDR_TYPE>::SetEmptyResult() {
+ rel32_ = {kInvalidOffset, kInvalidOffset, ADDR_TYPE::ADDR_NONE};
+ return {nullptr, nullptr};
+}
+
+/******** Rel32FinderAArch32 ********/
+
+Rel32FinderAArch32::Rel32FinderAArch32(ConstBufferView image,
+ const AddressTranslator& translator,
+ bool is_thumb2)
+ : Rel32FinderArm(image, translator), is_thumb2_(is_thumb2) {}
+
+Rel32FinderAArch32::~Rel32FinderAArch32() = default;
+
+Rel32Finder::NextIterators Rel32FinderAArch32::ScanA32(ConstBufferView region) {
+ // Guard against alignment potentially causing |cursor > region.end()|.
+ if (region.size() < 4)
+ return SetEmptyResult();
+ ConstBufferView::const_iterator cursor = region.begin();
+ cursor += IncrementForAlignCeil4(cursor - image_.begin());
+ for (; region.end() - cursor >= 4; cursor += 4) {
+ offset_t offset = base::checked_cast<offset_t>(cursor - image_.begin());
+ AArch32Rel32Translator translator;
+ rva_t instr_rva = offset_to_rva_.Convert(offset);
+ uint32_t code32 = translator.FetchArmCode32(image_, offset);
+ rva_t target_rva = kInvalidRva;
+ if (translator.ReadA24(instr_rva, code32, &target_rva)) {
+ return SetResult({offset, target_rva, AArch32Rel32Translator::ADDR_A24},
+ cursor, 4);
+ }
+ }
+ return SetEmptyResult();
+}
+
+Rel32Finder::NextIterators Rel32FinderAArch32::ScanT32(ConstBufferView region) {
+ // Guard against alignment potentially causing |cursor > region.end()|.
+ if (region.size() < 2)
+ return SetEmptyResult();
+ ConstBufferView::const_iterator cursor = region.begin();
+ cursor += IncrementForAlignCeil2(cursor - image_.begin());
+ while (region.end() - cursor >= 2) {
+ offset_t offset = base::checked_cast<offset_t>(cursor - image_.begin());
+ AArch32Rel32Translator translator;
+ AArch32Rel32Translator::AddrType type = AArch32Rel32Translator::ADDR_NONE;
+ rva_t instr_rva = offset_to_rva_.Convert(offset);
+ uint16_t code16 = translator.FetchThumb2Code16(image_, offset);
+ int instr_size = GetThumb2InstructionSize(code16);
+ rva_t target_rva = kInvalidRva;
+ if (instr_size == 2) { // 16-bit THUMB2 instruction.
+ if (translator.ReadT8(instr_rva, code16, &target_rva))
+ type = AArch32Rel32Translator::ADDR_T8;
+ else if (translator.ReadT11(instr_rva, code16, &target_rva))
+ type = AArch32Rel32Translator::ADDR_T11;
+ } else { // |instr_size == 4|: 32-bit THUMB2 instruction.
+ if (region.end() - cursor >= 4) {
+ uint32_t code32 = translator.FetchThumb2Code32(image_, offset);
+ if (translator.ReadT20(instr_rva, code32, &target_rva))
+ type = AArch32Rel32Translator::ADDR_T20;
+ else if (translator.ReadT24(instr_rva, code32, &target_rva))
+ type = AArch32Rel32Translator::ADDR_T24;
+ }
+ }
+ if (type != AArch32Rel32Translator::ADDR_NONE)
+ return SetResult({offset, target_rva, type}, cursor, instr_size);
+ cursor += instr_size;
+ }
+ return SetEmptyResult();
+}
+
+Rel32Finder::NextIterators Rel32FinderAArch32::Scan(ConstBufferView region) {
+ return is_thumb2_ ? ScanT32(region) : ScanA32(region);
+}
+
+/******** Rel32FinderAArch64 ********/
+
+Rel32FinderAArch64::Rel32FinderAArch64(ConstBufferView image,
+ const AddressTranslator& translator)
+ : Rel32FinderArm(image, translator) {}
+
+Rel32FinderAArch64::~Rel32FinderAArch64() = default;
+
+Rel32Finder::NextIterators Rel32FinderAArch64::Scan(ConstBufferView region) {
+ // Guard against alignment potentially causing |cursor > region.end()|.
+ if (region.size() < 4)
+ return SetEmptyResult();
+ ConstBufferView::const_iterator cursor = region.begin();
+ cursor += IncrementForAlignCeil4(cursor - image_.begin());
+ for (; region.end() - cursor >= 4; cursor += 4) {
+ offset_t offset = base::checked_cast<offset_t>(cursor - image_.begin());
+ // For simplicity we assume RVA fits within 32-bits.
+ AArch64Rel32Translator translator;
+ AArch64Rel32Translator::AddrType type = AArch64Rel32Translator::ADDR_NONE;
+ rva_t instr_rva = offset_to_rva_.Convert(offset);
+ uint32_t code32 = translator.FetchCode32(image_, offset);
+ rva_t target_rva = kInvalidRva;
+ if (translator.ReadImmd14(instr_rva, code32, &target_rva)) {
+ type = AArch64Rel32Translator::ADDR_IMMD14;
+ } else if (translator.ReadImmd19(instr_rva, code32, &target_rva)) {
+ type = AArch64Rel32Translator::ADDR_IMMD19;
+ } else if (translator.ReadImmd26(instr_rva, code32, &target_rva)) {
+ type = AArch64Rel32Translator::ADDR_IMMD26;
+ }
+ if (type != AArch64Rel32Translator::ADDR_NONE)
+ return SetResult({offset, target_rva, type}, cursor, 4);
+ }
+ return SetEmptyResult();
+}
+
+} // namespace zucchini
diff --git a/rel32_finder.h b/rel32_finder.h
new file mode 100644
index 0000000..3ebeb95
--- /dev/null
+++ b/rel32_finder.h
@@ -0,0 +1,284 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_REL32_FINDER_H_
+#define COMPONENTS_ZUCCHINI_REL32_FINDER_H_
+
+#include <stddef.h>
+
+#include <vector>
+
+#include "components/zucchini/address_translator.h"
+#include "components/zucchini/arm_utils.h"
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/image_utils.h"
+
+namespace zucchini {
+
+// See README.md for definitions on abs32 and rel32 references. The following
+// are assumed:
+// * Abs32 reference bodies have fixed widths.
+// * Rel32 locations can be identified by heuristically disassembling machine
+// code, and errors are tolerated.
+// * The collection all abs32 and rel32 reference bodies do not overlap.
+
+// A class to visit non-empty contiguous gaps in |region| that lie outside of
+// |abs32_locations| elements, each with a body that spans |abs32_width_| bytes.
+// For example, given:
+// region = [base_ + 4, base_ + 26),
+// abs32_locations = {2, 6, 15, 20, 27},
+// abs32_width_ = 4,
+// the following is obtained:
+// 111111111122222222223 -> offsets
+// 0123456789012345678901234567890
+// ....**********************..... -> region = *
+// ^ ^ ^ ^ ^ -> abs32 locations
+// aaaaaaaa aaaa aaaa aaaa -> abs32 bodies
+// ....------*****----*----**..... -> regions excluding abs32 -> 3 gaps
+// The resulting gaps (non-empty, so [6, 6) is excluded) are:
+// [10, 15), [19, 20), [24, 26).
+// These gaps can then be passed to Rel32Finder (below) to find rel32 references
+// with bodies that are guaranteed to not overlap with any abs32 bodies.
+class Abs32GapFinder {
+ public:
+ // |abs32_locations| is a sorted list of non-overlapping abs32 locations in
+ // |image|, each spanning |abs32_width| bytes. Gaps are searched in |region|,
+ // which must be part of |image|.
+ Abs32GapFinder(ConstBufferView image,
+ ConstBufferView region,
+ const std::vector<offset_t>& abs32_locations,
+ size_t abs32_width);
+ Abs32GapFinder(const Abs32GapFinder&) = delete;
+ const Abs32GapFinder& operator=(const Abs32GapFinder&) = delete;
+ ~Abs32GapFinder();
+
+ // Searches for the next available gap, and returns successfulness.
+ bool FindNext();
+
+ // Returns the cached result from the last successful FindNext().
+ ConstBufferView GetGap() const { return gap_; }
+
+ private:
+ const ConstBufferView::const_iterator base_;
+ const ConstBufferView::const_iterator region_end_;
+ ConstBufferView::const_iterator cur_lo_;
+ const std::vector<offset_t>::const_iterator abs32_end_;
+ std::vector<offset_t>::const_iterator abs32_cur_;
+ const size_t abs32_width_;
+ ConstBufferView gap_;
+};
+
+// A class to scan regions within an image to find successive rel32 references.
+// Architecture-specific parsing and result extraction are delegated to
+// inherited classes (say, Rel32Finder_Impl). Sample extraction loop, combined
+// with Abs32GapFinder usage:
+//
+// Abs32GapFinder gap_finder(...);
+// Rel32Finder_Impl finder(...);
+// while (gap_finder.FindNext()) {
+// rel_finder.SetRegion(gap_finder.GetGap());
+// while (rel_finder.FindNext()) {
+// auto rel32 = rel_finder.GetRel32(); // In Rel32Finder_Impl.
+// if (architecture_specific_validation(rel32)) {
+// rel_finder.Accept();
+// // Store rel32.
+// }
+// }
+// }
+class Rel32Finder {
+ public:
+ Rel32Finder(ConstBufferView image, const AddressTranslator& translator);
+ Rel32Finder(const Rel32Finder&) = delete;
+ const Rel32Finder& operator=(const Rel32Finder&) = delete;
+ virtual ~Rel32Finder();
+
+ // Assigns the scan |region| for rel32 references to enable FindNext() use.
+ void SetRegion(ConstBufferView region);
+
+ // Scans for the next rel32 reference, and returns whether any is found, so a
+ // "while" loop can be used for iterative rel32 extraction. The results are
+ // cached in Rel32Finder_Impl and obtained by Rel32Finder_Impl::GetRel32().
+ bool FindNext();
+
+ // When a rel32 reference is found, the caller needs to decide whether to keep
+ // the result (perhaps following more validation). If it decides to keep the
+ // result, then it must call Accept(), so the next call to FindNext() can skip
+ // the accepted rel32 reference.
+ void Accept();
+
+ // Accessors for unittest.
+ ConstBufferView::const_iterator accept_it() const { return accept_it_; }
+ ConstBufferView region() const { return region_; }
+
+ protected:
+ // Alternatives for where to continue the next scan when a rel32 reference is
+ // found. nulls indicate that no rel32 references remain.
+ struct NextIterators {
+ // The next iterator if the caller does not call Accept().
+ ConstBufferView::const_iterator reject;
+
+ // The next iterator if the caller calls Accept().
+ ConstBufferView::const_iterator accept;
+ };
+
+ // Detects and extracts architecture-specific rel32 reference. For each one
+ // found, the implementation should cache the necessary data to be retrieved
+ // via accessors. Returns a NextIterators that stores alternatives for where
+ // to continue the scan. If no rel32 reference is found then the returned
+ // NextIterators are nulls.
+ virtual NextIterators Scan(ConstBufferView region) = 0;
+
+ const ConstBufferView image_;
+ AddressTranslator::OffsetToRvaCache offset_to_rva_;
+
+ private:
+ ConstBufferView region_;
+ ConstBufferView::const_iterator accept_it_ = nullptr;
+};
+
+// Parsing for X86 or X64: we perform naive scan for opcodes that have rel32 as
+// an argument, and disregard instruction alignment.
+class Rel32FinderIntel : public Rel32Finder {
+ public:
+ Rel32FinderIntel(const Rel32FinderIntel&) = delete;
+ const Rel32FinderIntel& operator=(const Rel32FinderIntel&) = delete;
+
+ // Struct to store GetRel32() results.
+ struct Result {
+ offset_t location;
+ rva_t target_rva;
+
+ // Some references must have their target in the same section as location,
+ // which we use this to heuristically reject rel32 reference candidates.
+ // When true, this constraint is relaxed.
+ bool can_point_outside_section;
+ };
+
+ using Rel32Finder::Rel32Finder;
+
+ // Returns the cached result from the last successful FindNext().
+ const Result& GetRel32() { return rel32_; }
+
+ protected:
+ // Helper for Scan() that also assigns |rel32_|.
+ Rel32Finder::NextIterators SetResult(ConstBufferView::const_iterator cursor,
+ uint32_t code_size,
+ bool can_point_outside_section);
+
+ // Cached results.
+ Result rel32_;
+
+ // Rel32Finder:
+ NextIterators Scan(ConstBufferView region) override = 0;
+};
+
+// X86 instructions.
+class Rel32FinderX86 : public Rel32FinderIntel {
+ public:
+ using Rel32FinderIntel::Rel32FinderIntel;
+
+ Rel32FinderX86(const Rel32FinderX86&) = delete;
+ const Rel32FinderX86& operator=(const Rel32FinderX86&) = delete;
+
+ private:
+ // Rel32Finder:
+ NextIterators Scan(ConstBufferView region) override;
+};
+
+// X64 instructions.
+class Rel32FinderX64 : public Rel32FinderIntel {
+ public:
+ using Rel32FinderIntel::Rel32FinderIntel;
+
+ Rel32FinderX64(const Rel32FinderX64&) = delete;
+ const Rel32FinderX64& operator=(const Rel32FinderX64&) = delete;
+
+ private:
+ // Rel32Finder:
+ NextIterators Scan(ConstBufferView region) override;
+};
+
+// Base class for ARM (AArch32 and AArch64) instructions.
+template <typename ADDR_TYPE>
+class Rel32FinderArm : public Rel32Finder {
+ public:
+ struct Result {
+ offset_t location;
+ rva_t target_rva;
+ ADDR_TYPE type;
+
+ // For testing.
+ bool operator==(const Result& other) const {
+ return location == other.location && target_rva == other.target_rva &&
+ type == other.type;
+ }
+ };
+
+ Rel32FinderArm(ConstBufferView image, const AddressTranslator& translator);
+ Rel32FinderArm(const Rel32FinderArm&) = delete;
+ const Rel32FinderArm& operator=(const Rel32FinderArm&) = delete;
+ ~Rel32FinderArm() override;
+
+ // Helper for Scan*() that also assigns |rel32_|.
+ NextIterators SetResult(Result&& result,
+ ConstBufferView::const_iterator cursor,
+ int instr_size);
+
+ // SetResult() for end of scan.
+ NextIterators SetEmptyResult();
+
+ protected:
+ // Cached results.
+ Result rel32_;
+};
+
+// AArch32 instructions.
+class Rel32FinderAArch32
+ : public Rel32FinderArm<AArch32Rel32Translator::AddrType> {
+ public:
+ Rel32FinderAArch32(ConstBufferView image,
+ const AddressTranslator& translator,
+ bool is_thumb2);
+ Rel32FinderAArch32(const Rel32FinderAArch32&) = delete;
+ const Rel32FinderAArch32& operator=(const Rel32FinderAArch32&) = delete;
+ ~Rel32FinderAArch32() override;
+
+ const Result& GetRel32() const { return rel32_; }
+
+ private:
+ // Rel32 extraction, assuming segment is in ARM mode.
+ NextIterators ScanA32(ConstBufferView region);
+
+ // Rel32 extraction, assuming segment is in THUMB2 mode.
+ NextIterators ScanT32(ConstBufferView region);
+
+ // Rel32Finder:
+ NextIterators Scan(ConstBufferView region) override;
+
+ // Indicates whether segment is in THUMB2 or ARM mod. In general this can
+ // change throughout a section. However, currently we assume that this is
+ // constant for an entire section.
+ const bool is_thumb2_;
+};
+
+// AArch64 instructions.
+class Rel32FinderAArch64
+ : public Rel32FinderArm<AArch64Rel32Translator::AddrType> {
+ public:
+ Rel32FinderAArch64(ConstBufferView image,
+ const AddressTranslator& translator);
+ Rel32FinderAArch64(const Rel32FinderAArch64&) = delete;
+ const Rel32FinderAArch64& operator=(const Rel32FinderAArch64&) = delete;
+ ~Rel32FinderAArch64() override;
+
+ const Result& GetRel32() const { return rel32_; }
+
+ private:
+ // Rel32Finder:
+ NextIterators Scan(ConstBufferView region) override;
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_REL32_FINDER_H_
diff --git a/rel32_finder_unittest.cc b/rel32_finder_unittest.cc
new file mode 100644
index 0000000..7e4a21e
--- /dev/null
+++ b/rel32_finder_unittest.cc
@@ -0,0 +1,743 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/rel32_finder.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <iterator>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "base/check_op.h"
+#include "base/format_macros.h"
+#include "base/numerics/safe_conversions.h"
+#include "base/strings/stringprintf.h"
+#include "components/zucchini/arm_utils.h"
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/disassembler_elf.h"
+#include "components/zucchini/image_utils.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+TEST(Abs32GapFinderTest, All) {
+ const size_t kRegionTotal = 99;
+ std::vector<uint8_t> buffer(kRegionTotal);
+ ConstBufferView image(buffer.data(), buffer.size());
+
+ // Common test code that returns the resulting segments as a string.
+ auto run_test = [&](size_t rlo, size_t rhi,
+ std::vector<offset_t> abs32_locations,
+ std::ptrdiff_t abs32_width) -> std::string {
+ CHECK_LE(rlo, kRegionTotal);
+ CHECK_LE(rhi, kRegionTotal);
+ CHECK(std::is_sorted(abs32_locations.begin(), abs32_locations.end()));
+ CHECK_GT(abs32_width, 0);
+ ConstBufferView region =
+ ConstBufferView::FromRange(image.begin() + rlo, image.begin() + rhi);
+ Abs32GapFinder gap_finder(image, region, abs32_locations, abs32_width);
+
+ std::string out_str;
+ while (gap_finder.FindNext()) {
+ ConstBufferView gap = gap_finder.GetGap();
+ size_t lo = base::checked_cast<size_t>(gap.begin() - image.begin());
+ size_t hi = base::checked_cast<size_t>(gap.end() - image.begin());
+ out_str.append(base::StringPrintf("[%" PRIuS ",%" PRIuS ")", lo, hi));
+ }
+ return out_str;
+ };
+
+ // Empty regions yield empty segments.
+ EXPECT_EQ("", run_test(0, 0, {}, 4));
+ EXPECT_EQ("", run_test(9, 9, {}, 4));
+ EXPECT_EQ("", run_test(8, 8, {8}, 4));
+ EXPECT_EQ("", run_test(8, 8, {0, 12}, 4));
+
+ // If no abs32 locations exist then the segment is the main range.
+ EXPECT_EQ("[0,99)", run_test(0, 99, {}, 4));
+ EXPECT_EQ("[20,21)", run_test(20, 21, {}, 4));
+ EXPECT_EQ("[51,55)", run_test(51, 55, {}, 4));
+
+ // abs32 locations found near start of main range.
+ EXPECT_EQ("[10,20)", run_test(10, 20, {5}, 4));
+ EXPECT_EQ("[10,20)", run_test(10, 20, {6}, 4));
+ EXPECT_EQ("[11,20)", run_test(10, 20, {7}, 4));
+ EXPECT_EQ("[12,20)", run_test(10, 20, {8}, 4));
+ EXPECT_EQ("[13,20)", run_test(10, 20, {9}, 4));
+ EXPECT_EQ("[14,20)", run_test(10, 20, {10}, 4));
+ EXPECT_EQ("[10,11)[15,20)", run_test(10, 20, {11}, 4));
+
+ // abs32 locations found near end of main range.
+ EXPECT_EQ("[10,15)[19,20)", run_test(10, 20, {15}, 4));
+ EXPECT_EQ("[10,16)", run_test(10, 20, {16}, 4));
+ EXPECT_EQ("[10,17)", run_test(10, 20, {17}, 4));
+ EXPECT_EQ("[10,18)", run_test(10, 20, {18}, 4));
+ EXPECT_EQ("[10,19)", run_test(10, 20, {19}, 4));
+ EXPECT_EQ("[10,20)", run_test(10, 20, {20}, 4));
+ EXPECT_EQ("[10,20)", run_test(10, 20, {21}, 4));
+
+ // Main range completely eclipsed by abs32 location.
+ EXPECT_EQ("", run_test(10, 11, {7}, 4));
+ EXPECT_EQ("", run_test(10, 11, {8}, 4));
+ EXPECT_EQ("", run_test(10, 11, {9}, 4));
+ EXPECT_EQ("", run_test(10, 11, {10}, 4));
+ EXPECT_EQ("", run_test(10, 12, {8}, 4));
+ EXPECT_EQ("", run_test(10, 12, {9}, 4));
+ EXPECT_EQ("", run_test(10, 12, {10}, 4));
+ EXPECT_EQ("", run_test(10, 13, {9}, 4));
+ EXPECT_EQ("", run_test(10, 13, {10}, 4));
+ EXPECT_EQ("", run_test(10, 14, {10}, 4));
+ EXPECT_EQ("", run_test(10, 14, {8, 12}, 4));
+
+ // Partial eclipses.
+ EXPECT_EQ("[24,25)", run_test(20, 25, {20}, 4));
+ EXPECT_EQ("[20,21)", run_test(20, 25, {21}, 4));
+ EXPECT_EQ("[20,21)[25,26)", run_test(20, 26, {21}, 4));
+
+ // abs32 location outside main range.
+ EXPECT_EQ("[40,60)", run_test(40, 60, {36, 60}, 4));
+ EXPECT_EQ("[41,61)", run_test(41, 61, {0, 10, 20, 30, 34, 62, 68, 80}, 4));
+
+ // Change abs32 width.
+ EXPECT_EQ("[10,11)[12,14)[16,19)", run_test(10, 20, {9, 11, 14, 15, 19}, 1));
+ EXPECT_EQ("", run_test(10, 11, {10}, 1));
+ EXPECT_EQ("[18,23)[29,31)", run_test(17, 31, {15, 23, 26, 31}, 3));
+ EXPECT_EQ("[17,22)[25,26)[29,30)", run_test(17, 31, {14, 22, 26, 30}, 3));
+ EXPECT_EQ("[10,11)[19,20)", run_test(10, 20, {11}, 8));
+
+ // Mixed cases with abs32 width = 4.
+ EXPECT_EQ("[10,15)[19,20)[24,25)", run_test(8, 25, {2, 6, 15, 20, 27}, 4));
+ EXPECT_EQ("[0,25)[29,45)[49,50)", run_test(0, 50, {25, 45}, 4));
+ EXPECT_EQ("[10,20)[28,50)", run_test(10, 50, {20, 24}, 4));
+ EXPECT_EQ("[49,50)[54,60)[64,70)[74,80)[84,87)",
+ run_test(49, 87, {10, 20, 30, 40, 50, 60, 70, 80, 90}, 4));
+ EXPECT_EQ("[0,10)[14,20)[24,25)[29,50)", run_test(0, 50, {10, 20, 25}, 4));
+}
+
+namespace {
+
+// A mock Rel32Finder to inject next search result on Scan().
+class TestRel32Finder : public Rel32Finder {
+ public:
+ using Rel32Finder::Rel32Finder;
+
+ // Rel32Finder:
+ NextIterators Scan(ConstBufferView region) override { return next_result; }
+
+ NextIterators next_result;
+};
+
+AddressTranslator GetTrivialTranslator(size_t size) {
+ AddressTranslator translator;
+ EXPECT_EQ(AddressTranslator::kSuccess,
+ translator.Initialize({{0, base::checked_cast<offset_t>(size), 0U,
+ base::checked_cast<rva_t>(size)}}));
+ return translator;
+}
+
+} // namespace
+
+TEST(Rel32FinderTest, Scan) {
+ const size_t kRegionTotal = 99;
+ std::vector<uint8_t> buffer(kRegionTotal);
+ ConstBufferView image(buffer.data(), buffer.size());
+ AddressTranslator translator(GetTrivialTranslator(image.size()));
+ TestRel32Finder finder(image, translator);
+ finder.SetRegion(image);
+
+ auto check_finder_state = [&](const TestRel32Finder& finder,
+ size_t expected_cursor,
+ size_t expected_accept_it) {
+ CHECK_LE(expected_cursor, kRegionTotal);
+ CHECK_LE(expected_accept_it, kRegionTotal);
+
+ EXPECT_EQ(image.begin() + expected_cursor, finder.region().begin());
+ EXPECT_EQ(image.begin() + expected_accept_it, finder.accept_it());
+ };
+
+ check_finder_state(finder, 0, 0);
+
+ finder.next_result = {image.begin() + 1, image.begin() + 1};
+ EXPECT_TRUE(finder.FindNext());
+ check_finder_state(finder, 1, 1);
+
+ finder.next_result = {image.begin() + 2, image.begin() + 2};
+ EXPECT_TRUE(finder.FindNext());
+ check_finder_state(finder, 2, 2);
+
+ finder.next_result = {image.begin() + 5, image.begin() + 6};
+ EXPECT_TRUE(finder.FindNext());
+ check_finder_state(finder, 5, 6);
+ finder.Accept();
+ check_finder_state(finder, 6, 6);
+
+ finder.next_result = {image.begin() + 7, image.begin() + 7};
+ EXPECT_TRUE(finder.FindNext());
+ check_finder_state(finder, 7, 7);
+
+ finder.next_result = {image.begin() + 8, image.begin() + 8};
+ EXPECT_TRUE(finder.FindNext());
+ check_finder_state(finder, 8, 8);
+
+ finder.next_result = {image.begin() + 99, image.begin() + 99};
+ EXPECT_TRUE(finder.FindNext());
+ check_finder_state(finder, 99, 99);
+
+ finder.next_result = {nullptr, nullptr};
+ EXPECT_FALSE(finder.FindNext());
+ check_finder_state(finder, 99, 99);
+}
+
+namespace {
+
+// X86 test data. (x) and +x entries are covered by abs32 references, which have
+// width = 4.
+constexpr uint8_t kDataX86[] = {
+ 0x55, // 00: push ebp
+ 0x8B, 0xEC, // 01: mov ebp,esp
+ 0xE8, 0, 0, 0, 0, // 03: call 08
+ (0xE9), +0, +0, +0, 0, // 08: jmp 0D
+ 0x0F, 0x80, 0, 0, 0, 0, // 0D: jo 13
+ 0x0F, 0x81, 0, 0, (0), +0, // 13: jno 19
+ +0x0F, +0x82, 0, 0, 0, 0, // 19: jb 1F
+ 0x0F, 0x83, 0, 0, 0, 0, // 1F: jae 25
+ 0x0F, (0x84), +0, +0, +0, (0), // 25: je 2B
+ +0x0F, +0x85, +0, 0, 0, 0, // 2B: jne 31
+ 0x0F, 0x86, 0, 0, 0, 0, // 31: jbe 37
+ 0x0F, 0x87, 0, 0, 0, 0, // 37: ja 3D
+ 0x0F, 0x88, 0, (0), +0, +0, // 3D: js 43
+ +0x0F, 0x89, 0, 0, 0, 0, // 43: jns 49
+ 0x0F, 0x8A, 0, 0, 0, 0, // 49: jp 4F
+ 0x0F, 0x8B, (0), +0, +0, +0, // 4F: jnp 55
+ 0x0F, 0x8C, 0, 0, 0, 0, // 55: jl 5B
+ 0x0F, 0x8D, 0, 0, (0), +0, // 5B: jge 61
+ +0x0F, +0x8E, (0), +0, +0, +0, // 61: jle 67
+ 0x0F, 0x8F, 0, 0, 0, 0, // 67: jg 6D
+ 0x5D, // 6D: pop ebp
+ 0xC3, // C3: ret
+};
+
+// Abs32 locations corresponding to |kDataX86|, with width = 4.
+constexpr offset_t kAbs32X86[] = {0x08, 0x17, 0x26, 0x2A,
+ 0x40, 0x51, 0x5F, 0x63};
+
+} // namespace
+
+TEST(Rel32FinderX86Test, FindNext) {
+ ConstBufferView image =
+ ConstBufferView::FromRange(std::begin(kDataX86), std::end(kDataX86));
+ AddressTranslator translator(GetTrivialTranslator(image.size()));
+ Rel32FinderX86 rel_finder(image, translator);
+ rel_finder.SetRegion(image);
+
+ // List of expected locations as pairs of {cursor offset, rel32 offset},
+ // ignoring |kAbs32X86|.
+ std::vector<std::pair<size_t, size_t>> expected_locations = {
+ {0x04, 0x04}, {0x09, 0x09}, {0x0E, 0x0F}, {0x14, 0x15}, {0x1A, 0x1B},
+ {0x20, 0x21}, {0x26, 0x27}, {0x2C, 0x2D}, {0x32, 0x33}, {0x38, 0x39},
+ {0x3E, 0x3F}, {0x44, 0x45}, {0x4A, 0x4B}, {0x50, 0x51}, {0x56, 0x57},
+ {0x5C, 0x5D}, {0x62, 0x63}, {0x68, 0x69},
+ };
+ for (auto location : expected_locations) {
+ EXPECT_TRUE(rel_finder.FindNext());
+ auto rel32 = rel_finder.GetRel32();
+
+ EXPECT_EQ(location.first,
+ size_t(rel_finder.region().begin() - image.begin()));
+ EXPECT_EQ(location.second, rel32.location);
+ EXPECT_EQ(image.begin() + (rel32.location + 4), rel_finder.accept_it());
+ EXPECT_FALSE(rel32.can_point_outside_section);
+ rel_finder.Accept();
+ }
+ EXPECT_FALSE(rel_finder.FindNext());
+}
+
+TEST(Rel32FinderX86Test, Integrated) {
+ // Truncated form of Rel32FinderIntel::Result.
+ using TruncatedResults = std::pair<offset_t, rva_t>;
+
+ ConstBufferView image =
+ ConstBufferView::FromRange(std::begin(kDataX86), std::end(kDataX86));
+ std::vector<offset_t> abs32_locations(std::begin(kAbs32X86),
+ std::end(kAbs32X86));
+ std::vector<TruncatedResults> results;
+
+ Abs32GapFinder gap_finder(image, image, abs32_locations,
+ DisassemblerElfX86::Traits::kVAWidth);
+ AddressTranslator translator(GetTrivialTranslator(image.size()));
+ Rel32FinderX86 rel_finder(image, translator);
+ while (gap_finder.FindNext()) {
+ rel_finder.SetRegion(gap_finder.GetGap());
+ while (rel_finder.FindNext()) {
+ auto rel32 = rel_finder.GetRel32();
+ rel_finder.Accept();
+ results.emplace_back(TruncatedResults{rel32.location, rel32.target_rva});
+ }
+ }
+
+ std::vector<TruncatedResults> expected_results = {
+ {0x04, 0x08},
+ /* {0x09, 0x0D}, */ {0x0F, 0x13},
+ /* {0x15, 0x19}, */ /*{0x1B, 0x1F}, */
+ {0x21, 0x25},
+ /* {0x27, 0x2B}, */ /* {0x2D, 0x31}, */ {0x33, 0x37},
+ {0x39, 0x3D},
+ /* {0x3F, 0x43}, */ /* {0x45, 0x49}, */ {0x4B, 0x4F},
+ /* {0x51, 0x55}, */ {0x57, 0x5B},
+ /* {0x5D, 0x61}, */ /* {0x63, 0x67}, */ {0x69, 0x6D},
+ };
+ EXPECT_EQ(expected_results, results);
+}
+
+TEST(Rel32FinderX86Test, Accept) {
+ constexpr uint8_t data[] = {
+ 0xB9, 0x00, 0x00, 0x00, 0xE9, // 00: mov E9000000
+ 0xE8, 0x00, 0x00, 0x00, 0xE9, // 05: call E900000A
+ 0xE8, 0x00, 0x00, 0x00, 0xE9, // 0A: call E900000F
+ };
+
+ ConstBufferView image =
+ ConstBufferView::FromRange(std::begin(data), std::end(data));
+
+ auto next_location = [](Rel32FinderX86& rel_finder) -> offset_t {
+ EXPECT_TRUE(rel_finder.FindNext());
+ auto rel32 = rel_finder.GetRel32();
+ return rel32.location;
+ };
+
+ AddressTranslator translator(GetTrivialTranslator(image.size()));
+ Rel32FinderX86 rel_finder(image, translator);
+ rel_finder.SetRegion(image);
+
+ EXPECT_EQ(0x05U, next_location(rel_finder)); // False positive.
+ rel_finder.Accept();
+ // False negative: shadowed by 0x05
+ // EXPECT_EQ(0x06, next_location(rel_finder));
+ EXPECT_EQ(0x0AU, next_location(rel_finder)); // False positive.
+ EXPECT_EQ(0x0BU, next_location(rel_finder)); // Found if 0x0A is discarded.
+}
+
+namespace {
+
+// X64 test data. (x) and +x entries are covered by abs32 references, which have
+// width = 8.
+constexpr uint8_t kDataX64[] = {
+ 0x55, // 00: push ebp
+ 0x8B, 0xEC, // 01: mov ebp,esp
+ 0xE8, 0, 0, 0, 0, // 03: call 08
+ 0xE9, 0, 0, 0, (0), // 08: jmp 0D
+ +0x0F, +0x80, +0, +0, +0, +0, // 0D: jo 13
+ +0x0F, 0x81, 0, 0, 0, 0, // 13: jno 19
+ 0x0F, 0x82, 0, 0, 0, 0, // 19: jb 1F
+ (0x0F), +0x83, +0, +0, +0, +0, // 1F: jae 25
+ +0x0F, +0x84, 0, 0, 0, 0, // 25: je 2B
+ 0x0F, 0x85, 0, 0, 0, 0, // 2B: jne 31
+ 0x0F, 0x86, (0), +0, +0, +0, // 31: jbe 37
+ +0x0F, +0x87, +0, +0, (0), +0, // 37: ja 3D
+ +0x0F, +0x88, +0, +0, +0, +0, // 3D: js 43
+ 0x0F, 0x89, 0, 0, 0, 0, // 43: jns 49
+ (0x0F), +0x8A, +0, +0, +0, +0, // 49: jp 4F
+ +0x0F, +0x8B, 0, 0, 0, 0, // 4F: jnp 55
+ 0x0F, 0x8C, 0, 0, 0, 0, // 55: jl 5B
+ 0x0F, 0x8D, 0, 0, 0, 0, // 5B: jge 61
+ 0x0F, 0x8E, 0, 0, 0, 0, // 61: jle 67
+ 0x0F, 0x8F, 0, (0), +0, +0, // 67: jg 6F
+ +0xFF, +0x15, +0, +0, +0, 0, // 6D: call [rip+00] # 73
+ 0xFF, 0x25, 0, 0, 0, 0, // 73: jmp [rip+00] # 79
+ 0x8B, 0x05, 0, 0, 0, 0, // 79: mov eax,[rip+00] # 7F
+ 0x8B, 0x3D, 0, 0, 0, 0, // 7F: mov edi,[rip+00] # 85
+ 0x8D, 0x05, 0, 0, 0, 0, // 85: lea eax,[rip+00] # 8B
+ 0x8D, 0x3D, 0, 0, 0, 0, // 8B: lea edi,[rip+00] # 91
+ 0x48, 0x8B, 0x05, 0, 0, 0, 0, // 91: mov rax,[rip+00] # 98
+ 0x48, (0x8B), +0x3D, +0, +0, +0, +0, // 98: mov rdi,[rip+00] # 9F
+ +0x48, +0x8D, 0x05, 0, 0, 0, 0, // 9F: lea rax,[rip+00] # A6
+ 0x48, 0x8D, 0x3D, 0, 0, 0, 0, // A6: lea rdi,[rip+00] # AD
+ 0x4C, 0x8B, 0x05, 0, 0, 0, (0), // AD: mov r8,[rip+00] # B4
+ +0x4C, +0x8B, +0x3D, +0, +0, +0, +0, // B4: mov r15,[rip+00] # BB
+ 0x4C, 0x8D, 0x05, 0, 0, 0, 0, // BB: lea r8,[rip+00] # C2
+ 0x4C, 0x8D, 0x3D, 0, 0, 0, 0, // C2: lea r15,[rip+00] # C9
+ 0x66, 0x8B, 0x05, (0), +0, +0, +0, // C9: mov ax,[rip+00] # D0
+ +0x66, +0x8B, +0x3D, +0, 0, 0, 0, // D0: mov di,[rip+00] # D7
+ 0x66, 0x8D, 0x05, 0, 0, 0, 0, // D7: lea ax,[rip+00] # DE
+ 0x66, 0x8D, 0x3D, 0, 0, 0, 0, // DE: lea di,[rip+00] # E5
+ 0x5D, // E5: pop ebp
+ 0xC3, // E6: ret
+};
+
+// Abs32 locations corresponding to |kDataX64|, with width = 8.
+constexpr offset_t kAbs32X64[] = {0x0C, 0x1F, 0x33, 0x3B, 0x49,
+ 0x6A, 0x99, 0xB3, 0xCC};
+
+} // namespace
+
+TEST(Rel32FinderX64Test, FindNext) {
+ ConstBufferView image =
+ ConstBufferView::FromRange(std::begin(kDataX64), std::end(kDataX64));
+ AddressTranslator translator(GetTrivialTranslator(image.size()));
+ Rel32FinderX64 rel_finder(image, translator);
+ rel_finder.SetRegion(image);
+
+ // Lists of expected locations as pairs of {cursor offset, rel32 offset},
+ // ignoring |kAbs32X64|.
+ std::vector<std::pair<size_t, size_t>> expected_locations = {
+ {0x04, 0x04}, {0x09, 0x09}, {0x0E, 0x0F}, {0x14, 0x15}, {0x1A, 0x1B},
+ {0x20, 0x21}, {0x26, 0x27}, {0x2C, 0x2D}, {0x32, 0x33}, {0x38, 0x39},
+ {0x3E, 0x3F}, {0x44, 0x45}, {0x4A, 0x4B}, {0x50, 0x51}, {0x56, 0x57},
+ {0x5C, 0x5D}, {0x62, 0x63}, {0x68, 0x69},
+ };
+ std::vector<std::pair<size_t, size_t>> expected_locations_rip = {
+ {0x6E, 0x6F}, {0x74, 0x75}, {0x7A, 0x7B}, {0x80, 0x81}, {0x86, 0x87},
+ {0x8C, 0x8D}, {0x93, 0x94}, {0x9A, 0x9B}, {0xA1, 0xA2}, {0xA8, 0xA9},
+ {0xAF, 0xB0}, {0xB6, 0xB7}, {0xBD, 0xBE}, {0xC4, 0xC5}, {0xCB, 0xCC},
+ {0xD2, 0xD3}, {0xD9, 0xDA}, {0xE0, 0xE1},
+ };
+ // Jump instructions, which cannot point outside section.
+ for (auto location : expected_locations) {
+ EXPECT_TRUE(rel_finder.FindNext());
+ auto rel32 = rel_finder.GetRel32();
+ EXPECT_EQ(location.first,
+ size_t(rel_finder.region().begin() - image.begin()));
+ EXPECT_EQ(location.second, rel32.location);
+ EXPECT_EQ(image.begin() + (rel32.location + 4), rel_finder.accept_it());
+ EXPECT_FALSE(rel32.can_point_outside_section);
+ rel_finder.Accept();
+ }
+ // PC-relative data access instructions, which can point outside section.
+ for (auto location : expected_locations_rip) {
+ EXPECT_TRUE(rel_finder.FindNext());
+ auto rel32 = rel_finder.GetRel32();
+ EXPECT_EQ(location.first,
+ size_t(rel_finder.region().begin() - image.begin()));
+ EXPECT_EQ(location.second, rel32.location);
+ EXPECT_EQ(image.begin() + (rel32.location + 4), rel_finder.accept_it());
+ EXPECT_TRUE(rel32.can_point_outside_section); // Different from before.
+ rel_finder.Accept();
+ }
+ EXPECT_FALSE(rel_finder.FindNext());
+}
+
+TEST(Rel32FinderX64Test, Integrated) {
+ // Truncated form of Rel32FinderIntel::Result.
+ using TruncatedResults = std::pair<offset_t, rva_t>;
+
+ ConstBufferView image =
+ ConstBufferView::FromRange(std::begin(kDataX64), std::end(kDataX64));
+ std::vector<offset_t> abs32_locations(std::begin(kAbs32X64),
+ std::end(kAbs32X64));
+ std::vector<TruncatedResults> results;
+
+ Abs32GapFinder gap_finder(image, image, abs32_locations,
+ DisassemblerElfX64::Traits::kVAWidth);
+ AddressTranslator translator(GetTrivialTranslator(image.size()));
+ Rel32FinderX64 rel_finder(image, translator);
+ while (gap_finder.FindNext()) {
+ rel_finder.SetRegion(gap_finder.GetGap());
+ while (rel_finder.FindNext()) {
+ auto rel32 = rel_finder.GetRel32();
+ rel_finder.Accept();
+ results.emplace_back(TruncatedResults{rel32.location, rel32.target_rva});
+ }
+ }
+
+ std::vector<TruncatedResults> expected_results = {
+ {0x04, 0x08},
+ /* {0x09, 0x0D}, */
+ /* {0x0F, 0x13}, */ /* {0x15, 0x19}, */ {0x1B, 0x1F},
+ /* {0x21, 0x25}, */ /* {0x27, 0x2B}, */ {0x2D, 0x31},
+ /* {0x33, 0x37}, */ /* {0x39, 0x3D}, */
+ /* {0x3F, 0x43}, */ {0x45, 0x49},
+ /* {0x4B, 0x4F}, */ /* {0x51, 0x55}, */
+ {0x57, 0x5B},
+ {0x5D, 0x61},
+ {0x63, 0x67}, /* {0x69, 0x6F}, */
+ /* {0x6F, 0x73}, */ {0x75, 0x79},
+ {0x7B, 0x7F},
+ {0x81, 0x85},
+ {0x87, 0x8B},
+ {0x8D, 0x91},
+ {0x94, 0x98},
+ /* {0x9B, 0x9F}, */ /* {0xA2, 0xA6}, */ {0xA9, 0xAD},
+ /* {0xB0, 0xB4}, */ /* {0xB7, 0xBB}, */ {0xBE, 0xC2},
+ {0xC5, 0xC9},
+ /* {0xCC, 0xD0}, */ /* {0xD3, 0xD7}, */ {0xDA, 0xDE},
+ {0xE1, 0xE5},
+ };
+ EXPECT_EQ(expected_results, results);
+}
+
+namespace {
+
+// Runs the ARM rel32 extraction (nested) loop on |image| using |rel32_finder|,
+// given |abs32_locations| for abs32 references each having |abs32_width|.
+// Returns the list of extracted references.
+template <class REL32_FINDER>
+std::vector<typename REL32_FINDER::Result> ArmExtractRel32(
+ ConstBufferView image,
+ const std::vector<offset_t>& abs32_locations,
+ int abs32_width,
+ REL32_FINDER&& rel32_finder) {
+ std::vector<typename REL32_FINDER::Result> results;
+ Abs32GapFinder gap_finder(image, image, abs32_locations, abs32_width);
+ while (gap_finder.FindNext()) {
+ rel32_finder.SetRegion(gap_finder.GetGap());
+ while (rel32_finder.FindNext()) {
+ typename REL32_FINDER::Result rel32 = rel32_finder.GetRel32();
+ rel32_finder.Accept();
+ results.emplace_back(rel32);
+ }
+ }
+ return results;
+}
+
+} // namespace
+
+namespace {
+
+// AArch32 ARM mode test data. (x) and +x entries are covered by abs32
+// references (if used), which have width = 4.
+constexpr uint8_t kDataAarch32ArmMode[] = {
+ 0x00, 0x01, 0x02, 0xEA, // 00: B 00080408 ; B encoding A1
+ 0x00, 0x01, (0x02), +0xEA, // 04: B 0008040C ; B encoding A1
+ +0x00, +0x01, 0x02, 0xEA, // 08: B 00080410 ; B encoding A1
+ 0x00, 0x01, 0x02, 0xEA, // 0C: B 00080414 ; B encoding A1
+ 0x00, 0x01, 0x02, (0xEA), // 10: B 00080418 ; B encoding A1
+ +0x00, +0x01, +0x02, 0xEA, // 14: B 0008041C ; B encoding A1
+ 0x00, 0x01, 0x02, 0xEA, // 18: B 00080420 ; B encoding A1
+};
+
+// Abs32 locations corresponding to |kDataAarch32ArmMode|, with width = 4.
+constexpr offset_t kAbs32Aarch32ArmMode[] = {0x6, 0x13};
+
+} // namespace
+
+TEST(Rel32FinderAArch32Test, IntegratedArmModeWithoutAbs32) {
+ using AddrType = AArch32Rel32Translator::AddrType;
+ using Result = Rel32FinderAArch32::Result;
+ std::vector<Result> expected_results = {
+ {0x00, 0x80408, AddrType::ADDR_A24}, {0x04, 0x8040C, AddrType::ADDR_A24},
+ {0x08, 0x80410, AddrType::ADDR_A24}, {0x0C, 0x80414, AddrType::ADDR_A24},
+ {0x10, 0x80418, AddrType::ADDR_A24}, {0x14, 0x8041C, AddrType::ADDR_A24},
+ {0x18, 0x80420, AddrType::ADDR_A24},
+ };
+
+ ConstBufferView image = ConstBufferView::FromRange(
+ std::begin(kDataAarch32ArmMode), std::end(kDataAarch32ArmMode));
+ AddressTranslator translator(GetTrivialTranslator(image.size()));
+ Rel32FinderAArch32 rel32_finder(image, translator, /* is_thumb2 */ false);
+
+ std::vector<Result> results = ArmExtractRel32(
+ image, /* abs32_locations */ {}, DisassemblerElfAArch32::Traits::kVAWidth,
+ std::move(rel32_finder));
+
+ EXPECT_EQ(expected_results, results);
+}
+
+TEST(Rel32FinderAArch32Test, IntegratedArmModeWithAbs32) {
+ using AddrType = AArch32Rel32Translator::AddrType;
+ using Result = Rel32FinderAArch32::Result;
+ std::vector<Result> expected_results = {
+ {0x00, 0x80408, AddrType::ADDR_A24},
+ /* {0x04, 0x8040C, AddrType::ADDR_A24}, */
+ /* {0x08, 0x80410, AddrType::ADDR_A24}, */
+ {0x0C, 0x80414, AddrType::ADDR_A24},
+ /* {0x10, 0x80418, AddrType::ADDR_A24}, */
+ /* {0x14, 0x8041C, AddrType::ADDR_A24}, */
+ {0x18, 0x80420, AddrType::ADDR_A24},
+ };
+
+ ConstBufferView image = ConstBufferView::FromRange(
+ std::begin(kDataAarch32ArmMode), std::end(kDataAarch32ArmMode));
+ std::vector<offset_t> abs32_locations(std::begin(kAbs32Aarch32ArmMode),
+ std::end(kAbs32Aarch32ArmMode));
+ AddressTranslator translator(GetTrivialTranslator(image.size()));
+ Rel32FinderAArch32 rel32_finder(image, translator, /* is_thumb2 */ false);
+
+ std::vector<Result> results = ArmExtractRel32(
+ image, abs32_locations, DisassemblerElfAArch32::Traits::kVAWidth,
+ std::move(rel32_finder));
+
+ EXPECT_EQ(expected_results, results);
+}
+
+namespace {
+
+// AArch32 THUMB2 mode test data. (x) and +x entries are covered by abs32
+// references (if used), which have width = 4.
+constexpr uint8_t kDataAarch32Thumb2Mode[] = {
+ 0x00, 0xDE, // 00: B.AL 00000004 ; B encoding T1
+ 0x00, 0xDE, // 02: B.AL 00000006 ; B encoding T1
+ 0x00, (0xDE), // 04: B.AL 00000008 ; B encoding T1
+ +0x00, +0xDE, // 06: B.AL 0000000A ; B encoding T1
+ +0x00, 0xE0, // 08: B 0000000C ; B encoding T2
+ 0x00, 0xE0, // 0A: B 0000000E ; B encoding T2
+ 0x00, 0xE0, // 0C: B 00000010 ; B encoding T2
+ (0x00), +0xE0, // 0E: B 00000012 ; B encoding T2
+ +0x00, +0xF0, 0x00, 0x80, // 10: B 00000014 ; B encoding T3
+ 0x00, 0xF0, 0x00, 0x80, // 14: B 00000018 ; B encoding T3
+ (0x00), +0xF0, +0x00, +0x80, // 18: B 0000001C ; B encoding T3
+ 0x00, 0xF0, 0x00, 0x80, // 1C: B 00000020 ; B encoding T3
+ 0x00, 0xF0, 0x00, 0xB8, // 20: B 00000024 ; B encoding T4
+ 0x00, 0xF0, 0x00, (0xB8), // 24: B 00000028 ; B encoding T4
+ +0xFE, +0xDE, // 28: B.AL 00000028 ; B encoding T1
+ +0x00, 0xF0, 0x00, 0xF8, // 2A: BL 0000002E ; BL encoding T1
+ 0x00, 0xF0, 0x00, 0xE8, // 2E: BLX 00000030 ; BLX encoding T2
+ 0x00, 0x0B, // 32: NOP
+ 0x00, 0xF0, 0x00, 0xE8, // 34: BLX 00000038 ; BLX encoding T2
+ 0x00, 0xF0, 0x00, 0xB8, // 38: B 0000003C ; B encoding T4
+};
+
+// Abs32 locations corresponding to |kDataAarch32Thumb2Mode|, with width = 4.
+constexpr offset_t kAbs32Aarch32Thumb2Mode[] = {0x05, 0x0E, 0x18, 0x27};
+
+} // namespace
+
+TEST(Rel32FinderAArch32Test, IntegratedThumb2ModeWithoutAbs32) {
+ using AddrType = AArch32Rel32Translator::AddrType;
+ using Result = Rel32FinderAArch32::Result;
+ std::vector<Result> expected_results = {
+ {0x00, 0x04, AddrType::ADDR_T8}, {0x02, 0x06, AddrType::ADDR_T8},
+ {0x04, 0x08, AddrType::ADDR_T8}, {0x06, 0x0A, AddrType::ADDR_T8},
+ {0x08, 0x0C, AddrType::ADDR_T11}, {0x0A, 0x0E, AddrType::ADDR_T11},
+ {0x0C, 0x10, AddrType::ADDR_T11}, {0x0E, 0x12, AddrType::ADDR_T11},
+ {0x10, 0x14, AddrType::ADDR_T20}, {0x14, 0x18, AddrType::ADDR_T20},
+ {0x18, 0x1C, AddrType::ADDR_T20}, {0x1C, 0x20, AddrType::ADDR_T20},
+ {0x20, 0x24, AddrType::ADDR_T24}, {0x24, 0x28, AddrType::ADDR_T24},
+ {0x28, 0x28, AddrType::ADDR_T8}, {0x2A, 0x2E, AddrType::ADDR_T24},
+ {0x2E, 0x30, AddrType::ADDR_T24}, {0x34, 0x38, AddrType::ADDR_T24},
+ {0x38, 0x3C, AddrType::ADDR_T24},
+ };
+
+ ConstBufferView image = ConstBufferView::FromRange(
+ std::begin(kDataAarch32Thumb2Mode), std::end(kDataAarch32Thumb2Mode));
+ AddressTranslator translator(GetTrivialTranslator(image.size()));
+ Rel32FinderAArch32 rel32_finder(image, translator, /* is_thumb2 */ true);
+
+ std::vector<Result> results = ArmExtractRel32(
+ image, /* abs32_locations */ {}, DisassemblerElfAArch32::Traits::kVAWidth,
+ std::move(rel32_finder));
+
+ EXPECT_EQ(expected_results, results);
+}
+
+TEST(Rel32FinderAArch32Test, IntegratedThumb2ModeWithAbs32) {
+ using AddrType = AArch32Rel32Translator::AddrType;
+ using Result = Rel32FinderAArch32::Result;
+ std::vector<Result> expected_results = {
+ {0x00, 0x04, AddrType::ADDR_T8},
+ {0x02, 0x06, AddrType::ADDR_T8},
+ /* {0x04, 0x08, AddrType::ADDR_T8}, */
+ /* {0x06, 0x0A, AddrType::ADDR_T8}, */
+ /* {0x08, 0x0C, AddrType::ADDR_T11}, */
+ {0x0A, 0x0E, AddrType::ADDR_T11},
+ {0x0C, 0x10, AddrType::ADDR_T11},
+ /* {0x0E, 0x12, AddrType::ADDR_T11}, */
+ /* {0x10, 0x14, AddrType::ADDR_T20}, */
+ {0x14, 0x18, AddrType::ADDR_T20},
+ /* {0x18, 0x1C, AddrType::ADDR_T20}, */
+ {0x1C, 0x20, AddrType::ADDR_T20},
+ {0x20, 0x24, AddrType::ADDR_T24},
+ /* {0x24, 0x28, AddrType::ADDR_T24}, */
+ /* {0x28, 0x28, AddrType::ADDR_T8}, */
+ /* {0x2A, 0x2E, AddrType::ADDR_T24}, */
+ // Abs32 reference 0x27 disrupts alignment, and THUMB2 disassembly starts
+ // at 0x2C, causing the following to be excluded!
+ /* {0x2E, 0x30, AddrType::ADDR_T24}, */
+ {0x34, 0x38, AddrType::ADDR_T24},
+ {0x38, 0x3C, AddrType::ADDR_T24},
+ };
+
+ ConstBufferView image = ConstBufferView::FromRange(
+ std::begin(kDataAarch32Thumb2Mode), std::end(kDataAarch32Thumb2Mode));
+ std::vector<offset_t> abs32_locations(std::begin(kAbs32Aarch32Thumb2Mode),
+ std::end(kAbs32Aarch32Thumb2Mode));
+ AddressTranslator translator(GetTrivialTranslator(image.size()));
+ Rel32FinderAArch32 rel32_finder(image, translator, /* is_thumb2 */ true);
+
+ std::vector<Result> results = ArmExtractRel32(
+ image, abs32_locations, DisassemblerElfAArch32::Traits::kVAWidth,
+ std::move(rel32_finder));
+
+ EXPECT_EQ(expected_results, results);
+}
+
+namespace {
+
+// AArch32 THUMB2 mode test data. (x) and +x entries are covered by abs32
+// references (if used), which have width = 8.
+constexpr uint8_t kDataAarch64[] = {
+ 0x0E, 0x00, 0x00, 0x36, // 00: TBZ X0,#0,00000000 ; Immd14
+ 0x0E, 0x00, 0x00, (0x36), // 04: TBZ X0,#0,00000004 ; Immd14
+ +0x0E, +0x00, +0x00, +0x36, // 08: TBZ X0,#0,00000008 ; Immd14
+ +0x0E, +0x00, +0x00, 0x54, // 0C: B.AL 0000000C ; Immd19
+ 0x0E, 0x00, 0x00, 0x54, // 10: B.AL 00000010 ; Immd19
+ (0x0E), +0x00, +0x00, +0x54, // 14: B.AL 00000014 ; Immd19
+ +0x00, +0x00, +0x00, +0x94, // 18: BL 00000018 ; Immd26
+ 0x00, 0x00, 0x00, 0x14, // 1C: B 0000001C ; Immd26
+ 0x00, 0x00, 0x00, 0x94, // 20: BL 00000020 ; Immd26
+ 0x00, 0x00, 0x00, 0x14, // 24: B 00000024 ; Immd26
+};
+
+// Abs32 locations corresponding to |kDataAarch64|, with width = 8.
+constexpr offset_t kAbs32Aarch64[] = {0x07, 0x14};
+
+} // namespace
+
+TEST(Rel32FinderAArch64Test, IntegratedWithoutAbs32) {
+ using AddrType = AArch64Rel32Translator::AddrType;
+ using Result = Rel32FinderAArch64::Result;
+ std::vector<Result> expected_results = {
+ {0x00, 0x00, AddrType::ADDR_IMMD14}, {0x04, 0x04, AddrType::ADDR_IMMD14},
+ {0x08, 0x08, AddrType::ADDR_IMMD14}, {0x0C, 0x0C, AddrType::ADDR_IMMD19},
+ {0x10, 0x10, AddrType::ADDR_IMMD19}, {0x14, 0x14, AddrType::ADDR_IMMD19},
+ {0x18, 0x18, AddrType::ADDR_IMMD26}, {0x1C, 0x1C, AddrType::ADDR_IMMD26},
+ {0x20, 0x20, AddrType::ADDR_IMMD26}, {0x24, 0x24, AddrType::ADDR_IMMD26},
+ };
+
+ ConstBufferView image = ConstBufferView::FromRange(std::begin(kDataAarch64),
+ std::end(kDataAarch64));
+ AddressTranslator translator(GetTrivialTranslator(image.size()));
+ Rel32FinderAArch64 rel32_finder(image, translator);
+
+ std::vector<Result> results = ArmExtractRel32(
+ image, /* abs32_locations */ {}, DisassemblerElfAArch64::Traits::kVAWidth,
+ std::move(rel32_finder));
+
+ EXPECT_EQ(expected_results, results);
+}
+
+TEST(Rel32FinderAArch64Test, IntegratedWithAbs32) {
+ using AddrType = AArch64Rel32Translator::AddrType;
+ using Result = Rel32FinderAArch64::Result;
+ std::vector<Result> expected_results = {
+ {0x00, 0x00, AddrType::ADDR_IMMD14},
+ /* {0x04, 0x04, AddrType::ADDR_IMMD14}, */
+ /* {0x08, 0x08, AddrType::ADDR_IMMD14}, */
+ /* {0x0C, 0x0C, AddrType::ADDR_IMMD19}, */
+ {0x10, 0x10, AddrType::ADDR_IMMD19},
+ /* {0x14, 0x14, AddrType::ADDR_IMMD19}, */
+ /* {0x18, 0x18, AddrType::ADDR_IMMD26}, */
+ {0x1C, 0x1C, AddrType::ADDR_IMMD26},
+ {0x20, 0x20, AddrType::ADDR_IMMD26},
+ {0x24, 0x24, AddrType::ADDR_IMMD26},
+ };
+
+ ConstBufferView image = ConstBufferView::FromRange(std::begin(kDataAarch64),
+ std::end(kDataAarch64));
+ std::vector<offset_t> abs32_locations(std::begin(kAbs32Aarch64),
+ std::end(kAbs32Aarch64));
+ AddressTranslator translator(GetTrivialTranslator(image.size()));
+ Rel32FinderAArch64 rel32_finder(image, translator);
+
+ std::vector<Result> results = ArmExtractRel32(
+ image, abs32_locations, DisassemblerElfAArch64::Traits::kVAWidth,
+ std::move(rel32_finder));
+
+ EXPECT_EQ(expected_results, results);
+}
+
+} // namespace zucchini
diff --git a/rel32_utils.cc b/rel32_utils.cc
new file mode 100644
index 0000000..c22cb23
--- /dev/null
+++ b/rel32_utils.cc
@@ -0,0 +1,67 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/rel32_utils.h"
+
+#include <algorithm>
+
+#include "base/check_op.h"
+#include "components/zucchini/io_utils.h"
+
+namespace zucchini {
+
+/******** Rel32ReaderX86 ********/
+
+Rel32ReaderX86::Rel32ReaderX86(ConstBufferView image,
+ offset_t lo,
+ offset_t hi,
+ const std::deque<offset_t>* locations,
+ const AddressTranslator& translator)
+ : image_(image),
+ target_rva_to_offset_(translator),
+ location_offset_to_rva_(translator),
+ hi_(hi),
+ last_(locations->end()) {
+ DCHECK_LE(lo, image.size());
+ DCHECK_LE(hi, image.size());
+ current_ = std::lower_bound(locations->begin(), locations->end(), lo);
+}
+
+Rel32ReaderX86::~Rel32ReaderX86() = default;
+
+absl::optional<Reference> Rel32ReaderX86::GetNext() {
+ while (current_ < last_ && *current_ < hi_) {
+ offset_t loc_offset = *(current_++);
+ DCHECK_LE(loc_offset + 4, image_.size()); // Sanity check.
+ rva_t loc_rva = location_offset_to_rva_.Convert(loc_offset);
+ rva_t target_rva = loc_rva + 4 + image_.read<int32_t>(loc_offset);
+ offset_t target_offset = target_rva_to_offset_.Convert(target_rva);
+ // |locations| is valid by assumption (see class description).
+ DCHECK_NE(kInvalidOffset, target_offset);
+ return Reference{loc_offset, target_offset};
+ }
+ return absl::nullopt;
+}
+
+/******** Rel32ReceptorX86 ********/
+
+Rel32WriterX86::Rel32WriterX86(MutableBufferView image,
+ const AddressTranslator& translator)
+ : image_(image),
+ target_offset_to_rva_(translator),
+ location_offset_to_rva_(translator) {}
+
+Rel32WriterX86::~Rel32WriterX86() = default;
+
+void Rel32WriterX86::PutNext(Reference ref) {
+ rva_t target_rva = target_offset_to_rva_.Convert(ref.target);
+ rva_t loc_rva = location_offset_to_rva_.Convert(ref.location);
+
+ // Subtraction underflow is okay
+ uint32_t code =
+ static_cast<uint32_t>(target_rva) - (static_cast<uint32_t>(loc_rva) + 4);
+ image_.write<uint32_t>(ref.location, code);
+}
+
+} // namespace zucchini
diff --git a/rel32_utils.h b/rel32_utils.h
new file mode 100644
index 0000000..f54c5cd
--- /dev/null
+++ b/rel32_utils.h
@@ -0,0 +1,184 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_REL32_UTILS_H_
+#define COMPONENTS_ZUCCHINI_REL32_UTILS_H_
+
+#include <algorithm>
+#include <deque>
+#include <memory>
+
+#include "base/logging.h"
+#include "components/zucchini/address_translator.h"
+#include "components/zucchini/arm_utils.h"
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/image_utils.h"
+#include "components/zucchini/io_utils.h"
+#include "third_party/abseil-cpp/absl/types/optional.h"
+
+namespace zucchini {
+
+// Reader that emits x86 / x64 References (locations and target) from a list of
+// valid locations, constrained by a portion of an image.
+class Rel32ReaderX86 : public ReferenceReader {
+ public:
+ // |image| is an image containing x86 / x64 code in [|lo|, |hi|).
+ // |locations| is a sorted list of offsets of rel32 reference locations.
+ // |translator| (for |image|) is embedded into |target_rva_to_offset_| and
+ // |location_offset_to_rva_| for address translation, and therefore must
+ // outlive |*this|.
+ Rel32ReaderX86(ConstBufferView image,
+ offset_t lo,
+ offset_t hi,
+ const std::deque<offset_t>* locations,
+ const AddressTranslator& translator);
+ Rel32ReaderX86(const Rel32ReaderX86&) = delete;
+ const Rel32ReaderX86& operator=(const Rel32ReaderX86&) = delete;
+ ~Rel32ReaderX86() override;
+
+ // Returns the next reference, or absl::nullopt if exhausted.
+ absl::optional<Reference> GetNext() override;
+
+ private:
+ ConstBufferView image_;
+ AddressTranslator::RvaToOffsetCache target_rva_to_offset_;
+ AddressTranslator::OffsetToRvaCache location_offset_to_rva_;
+ const offset_t hi_;
+ const std::deque<offset_t>::const_iterator last_;
+ std::deque<offset_t>::const_iterator current_;
+};
+
+// Writer for x86 / x64 rel32 References.
+class Rel32WriterX86 : public ReferenceWriter {
+ public:
+ // |image| wraps the raw bytes of a binary in which rel32 references will be
+ // written. |translator| (for |image|) is embedded into
+ // |target_offset_to_rva_| and |location_offset_to_rva_| for address
+ // translation, and therefore must outlive |*this|.
+ Rel32WriterX86(MutableBufferView image, const AddressTranslator& translator);
+ Rel32WriterX86(const Rel32WriterX86&) = delete;
+ const Rel32WriterX86& operator=(const Rel32WriterX86&) = delete;
+ ~Rel32WriterX86() override;
+
+ void PutNext(Reference ref) override;
+
+ private:
+ MutableBufferView image_;
+ AddressTranslator::OffsetToRvaCache target_offset_to_rva_;
+ AddressTranslator::OffsetToRvaCache location_offset_to_rva_;
+};
+
+// Reader that emits x86 / x64 References (locations and target) of a spcific
+// type from a list of valid locations, constrained by a portion of an image.
+template <class ADDR_TRAITS>
+class Rel32ReaderArm : public ReferenceReader {
+ public:
+ using CODE_T = typename ADDR_TRAITS::code_t;
+
+ Rel32ReaderArm(const AddressTranslator& translator,
+ ConstBufferView view,
+ const std::deque<offset_t>& rel32_locations,
+ offset_t lo,
+ offset_t hi)
+ : view_(view),
+ offset_to_rva_(translator),
+ rva_to_offset_(translator),
+ hi_(hi) {
+ cur_it_ =
+ std::lower_bound(rel32_locations.begin(), rel32_locations.end(), lo);
+ rel32_end_ = rel32_locations.end();
+ }
+
+ Rel32ReaderArm(const Rel32ReaderArm&) = delete;
+ const Rel32ReaderArm& operator=(const Rel32ReaderArm&) = delete;
+
+ absl::optional<Reference> GetNext() override {
+ while (cur_it_ < rel32_end_ && *cur_it_ < hi_) {
+ offset_t location = *(cur_it_++);
+ CODE_T code = ADDR_TRAITS::Fetch(view_, location);
+ rva_t instr_rva = offset_to_rva_.Convert(location);
+ rva_t target_rva = kInvalidRva;
+ if (ADDR_TRAITS::Read(instr_rva, code, &target_rva)) {
+ offset_t target = rva_to_offset_.Convert(target_rva);
+ if (target != kInvalidOffset)
+ return Reference{location, target};
+ }
+ }
+ return absl::nullopt;
+ }
+
+ private:
+ ConstBufferView view_;
+ AddressTranslator::OffsetToRvaCache offset_to_rva_;
+ AddressTranslator::RvaToOffsetCache rva_to_offset_;
+ std::deque<offset_t>::const_iterator cur_it_;
+ std::deque<offset_t>::const_iterator rel32_end_;
+ offset_t hi_;
+};
+
+// Writer for ARM rel32 References of a specific type.
+template <class ADDR_TRAITS>
+class Rel32WriterArm : public ReferenceWriter {
+ public:
+ using CODE_T = typename ADDR_TRAITS::code_t;
+
+ Rel32WriterArm(const AddressTranslator& translator,
+ MutableBufferView mutable_view)
+ : mutable_view_(mutable_view), offset_to_rva_(translator) {}
+
+ Rel32WriterArm(const Rel32WriterArm&) = delete;
+ const Rel32WriterArm& operator=(const Rel32WriterArm&) = delete;
+
+ void PutNext(Reference ref) override {
+ CODE_T code = ADDR_TRAITS::Fetch(mutable_view_, ref.location);
+ rva_t instr_rva = offset_to_rva_.Convert(ref.location);
+ rva_t target_rva = offset_to_rva_.Convert(ref.target);
+ if (ADDR_TRAITS::Write(instr_rva, target_rva, &code)) {
+ ADDR_TRAITS::Store(mutable_view_, ref.location, code);
+ } else {
+ LOG(ERROR) << "Write error: " << AsHex<8>(ref.location) << ": "
+ << AsHex<static_cast<int>(sizeof(CODE_T)) * 2>(code)
+ << " <= " << AsHex<8>(target_rva) << ".";
+ }
+ }
+
+ private:
+ MutableBufferView mutable_view_;
+ AddressTranslator::OffsetToRvaCache offset_to_rva_;
+};
+
+// Type for specialized versions of ArmCopyDisp().
+// TODO(etiennep/huangs): Fold ReferenceByteMixer into Disassembler and remove
+// direct function pointer usage.
+using ArmCopyDispFun = bool (*)(ConstBufferView src_view,
+ offset_t src_idx,
+ MutableBufferView dst_view,
+ offset_t dst_idx);
+
+// Copier that makes |*dst_it| similar to |*src_it| (both assumed to point to
+// rel32 instructions of type ADDR_TRAITS) by copying the displacement (i.e.,
+// payload bits) from |src_it| to |dst_it|. If successful, updates |*dst_it|,
+// and returns true. Otherwise returns false. Note that alignment is not an
+// issue since the displacement is not translated to target RVA!
+template <class ADDR_TRAITS>
+bool ArmCopyDisp(ConstBufferView src_view,
+ offset_t src_idx,
+ MutableBufferView dst_view,
+ offset_t dst_idx) {
+ using CODE_T = typename ADDR_TRAITS::code_t;
+ CODE_T src_code = ADDR_TRAITS::Fetch(src_view, src_idx);
+ arm_disp_t disp = 0;
+ if (ADDR_TRAITS::Decode(src_code, &disp)) {
+ CODE_T dst_code = ADDR_TRAITS::Fetch(dst_view, dst_idx);
+ if (ADDR_TRAITS::Encode(disp, &dst_code)) {
+ ADDR_TRAITS::Store(dst_view, dst_idx, dst_code);
+ return true;
+ }
+ }
+ return false;
+}
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_REL32_UTILS_H_
diff --git a/rel32_utils_unittest.cc b/rel32_utils_unittest.cc
new file mode 100644
index 0000000..f4a6bde
--- /dev/null
+++ b/rel32_utils_unittest.cc
@@ -0,0 +1,541 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/rel32_utils.h"
+
+#include <stdint.h>
+
+#include <deque>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "base/test/gtest_util.h"
+#include "components/zucchini/address_translator.h"
+#include "components/zucchini/arm_utils.h"
+#include "components/zucchini/image_utils.h"
+#include "testing/gtest/include/gtest/gtest.h"
+#include "third_party/abseil-cpp/absl/types/optional.h"
+
+namespace zucchini {
+
+namespace {
+
+// A trivial AddressTranslator that applies constant shift.
+class TestAddressTranslator : public AddressTranslator {
+ public:
+ TestAddressTranslator(offset_t image_size, rva_t rva_begin) {
+ DCHECK_GE(rva_begin, 0U);
+ CHECK_EQ(AddressTranslator::kSuccess,
+ Initialize({{0, image_size, rva_begin, image_size}}));
+ }
+};
+
+// Checks that |reader| emits and only emits |expected_refs|, in order.
+void CheckReader(const std::vector<Reference>& expected_refs,
+ std::unique_ptr<ReferenceReader> reader) {
+ for (Reference expected_ref : expected_refs) {
+ auto ref = reader->GetNext();
+ EXPECT_TRUE(ref.has_value());
+ EXPECT_EQ(expected_ref, ref.value());
+ }
+ EXPECT_EQ(absl::nullopt, reader->GetNext()); // Nothing should be left.
+}
+
+// Copies displacements from |bytes1| to |bytes2| and checks results against
+// |bytes_exp_1_to_2|. Then repeats for |*bytes2| , |*byte1|, and
+// |bytes_exp_2_to_1|. Empty expected bytes mean failure is expected. The copy
+// function is specified by |copier|.
+void CheckCopy(const std::vector<uint8_t>& bytes_exp_1_to_2,
+ const std::vector<uint8_t>& bytes_exp_2_to_1,
+ const std::vector<uint8_t>& bytes1,
+ const std::vector<uint8_t>& bytes2,
+ ArmCopyDispFun copier) {
+ auto run_test = [&copier](const std::vector<uint8_t>& bytes_exp,
+ const std::vector<uint8_t>& bytes_in,
+ std::vector<uint8_t> bytes_out) {
+ ConstBufferView buffer_in(&bytes_in[0], bytes_in.size());
+ MutableBufferView buffer_out(&bytes_out[0], bytes_out.size());
+ if (bytes_exp.empty()) {
+ EXPECT_FALSE(copier(buffer_in, 0U, buffer_out, 0U));
+ } else {
+ EXPECT_TRUE(copier(buffer_in, 0U, buffer_out, 0U));
+ EXPECT_EQ(bytes_exp, bytes_out);
+ }
+ };
+ run_test(bytes_exp_1_to_2, bytes1, bytes2);
+ run_test(bytes_exp_2_to_1, bytes2, bytes1);
+}
+
+} // namespace
+
+TEST(Rel32UtilsTest, Rel32ReaderX86) {
+ constexpr offset_t kTestImageSize = 0x00100000U;
+ constexpr rva_t kRvaBegin = 0x00030000U;
+ TestAddressTranslator translator(kTestImageSize, kRvaBegin);
+
+ // For simplicity, test data is not real X86 machine code. We are only
+ // including rel32 targets, without the full instructions.
+ std::vector<uint8_t> bytes = {
+ 0xFF, 0xFF, 0xFF, 0xFF, // 00030000: (Filler)
+ 0xFF, 0xFF, 0xFF, 0xFF, // 0003000C: (Filler)
+ 0x04, 0x00, 0x00, 0x00, // 00030008: 00030010
+ 0xFF, 0xFF, 0xFF, 0xFF, // 0003000C: (Filler)
+ 0x00, 0x00, 0x00, 0x00, // 00030010: 00030014
+ 0xFF, 0xFF, 0xFF, 0xFF, // 00030014: (Filler)
+ 0xF4, 0xFF, 0xFF, 0xFF, // 00030018: 00030010
+ 0xE4, 0xFF, 0xFF, 0xFF, // 0003001C: 00030004
+ };
+ ConstBufferView buffer(bytes.data(), bytes.size());
+ // Specify rel32 locations directly, instead of parsing.
+ std::deque<offset_t> rel32_locations = {0x0008U, 0x0010U, 0x0018U, 0x001CU};
+
+ // Generate everything.
+ auto reader1 = std::make_unique<Rel32ReaderX86>(buffer, 0x0000U, 0x0020U,
+ &rel32_locations, translator);
+ CheckReader({{0x0008U, 0x0010U},
+ {0x0010U, 0x0014U},
+ {0x0018U, 0x0010U},
+ {0x001CU, 0x0004U}},
+ std::move(reader1));
+
+ // Exclude last.
+ auto reader2 = std::make_unique<Rel32ReaderX86>(buffer, 0x0000U, 0x001CU,
+ &rel32_locations, translator);
+ CheckReader({{0x0008U, 0x0010U}, {0x0010U, 0x0014U}, {0x0018U, 0x0010U}},
+ std::move(reader2));
+
+ // Only find one.
+ auto reader3 = std::make_unique<Rel32ReaderX86>(buffer, 0x000CU, 0x0018U,
+ &rel32_locations, translator);
+ CheckReader({{0x0010U, 0x0014U}}, std::move(reader3));
+}
+
+TEST(Rel32UtilsTest, Rel32WriterX86) {
+ constexpr offset_t kTestImageSize = 0x00100000U;
+ constexpr rva_t kRvaBegin = 0x00030000U;
+ TestAddressTranslator translator(kTestImageSize, kRvaBegin);
+
+ std::vector<uint8_t> bytes(32, 0xFF);
+ MutableBufferView buffer(bytes.data(), bytes.size());
+
+ Rel32WriterX86 writer(buffer, translator);
+ writer.PutNext({0x0008U, 0x0010U});
+ EXPECT_EQ(0x00000004U, buffer.read<uint32_t>(0x08)); // 00030008: 00030010
+
+ writer.PutNext({0x0010U, 0x0014U});
+ EXPECT_EQ(0x00000000U, buffer.read<uint32_t>(0x10)); // 00030010: 00030014
+
+ writer.PutNext({0x0018U, 0x0010U});
+ EXPECT_EQ(0xFFFFFFF4U, buffer.read<uint32_t>(0x18)); // 00030018: 00030010
+
+ writer.PutNext({0x001CU, 0x0004U});
+ EXPECT_EQ(0xFFFFFFE4U, buffer.read<uint32_t>(0x1C)); // 0003001C: 00030004
+
+ EXPECT_EQ(std::vector<uint8_t>({
+ 0xFF, 0xFF, 0xFF, 0xFF, // 00030000: (Filler)
+ 0xFF, 0xFF, 0xFF, 0xFF, // 00030004: (Filler)
+ 0x04, 0x00, 0x00, 0x00, // 00030008: 00030010
+ 0xFF, 0xFF, 0xFF, 0xFF, // 0003000C: (Filler)
+ 0x00, 0x00, 0x00, 0x00, // 00030010: 00030014
+ 0xFF, 0xFF, 0xFF, 0xFF, // 00030014: (Filler)
+ 0xF4, 0xFF, 0xFF, 0xFF, // 00030018: 00030010
+ 0xE4, 0xFF, 0xFF, 0xFF, // 0003001C: 00030004
+ }),
+ bytes);
+}
+
+TEST(Rel32UtilsTest, Rel32ReaderArm_AArch32) {
+ constexpr offset_t kTestImageSize = 0x00100000U;
+ constexpr rva_t kRvaBegin = 0x00030000U;
+ TestAddressTranslator translator(kTestImageSize, kRvaBegin);
+
+ // A24.
+ std::vector<uint8_t> bytes = {
+ 0xFF, 0xFF, 0xFF, 0xFF, // 00030000: (Filler)
+ 0xFF, 0xFF, 0xFF, 0xFF, // 00030004: (Filler)
+ 0x00, 0x00, 0x00, 0xEA, // 00030008: B 00030010 ; A24
+ 0xFF, 0xFF, 0xFF, 0xFF, // 0003000C: (Filler)
+ 0xFF, 0xFF, 0xFF, 0xEB, // 00030010: BL 00030014 ; A24
+ 0xFF, 0xFF, 0xFF, 0xFF, // 00030014: (Filler)
+ 0xFC, 0xFF, 0xFF, 0xEB, // 00030018: BL 00030010 ; A24
+ 0xF8, 0xFF, 0xFF, 0xEA, // 0003001C: B 00030004 ; A24
+ };
+ ConstBufferView region(&bytes[0], bytes.size());
+ // Specify rel32 locations directly, instead of parsing.
+ std::deque<offset_t> rel32_locations_A24 = {0x0008U, 0x0010U, 0x0018U,
+ 0x001CU};
+
+ // Generate everything.
+ auto reader1 =
+ std::make_unique<Rel32ReaderArm<AArch32Rel32Translator::AddrTraits_A24>>(
+ translator, region, rel32_locations_A24, 0x0000U, 0x0020U);
+ CheckReader({{0x0008U, 0x0010U},
+ {0x0010U, 0x0014U},
+ {0x0018U, 0x0010U},
+ {0x001CU, 0x0004U}},
+ std::move(reader1));
+
+ // Exclude last.
+ auto reader2 =
+ std::make_unique<Rel32ReaderArm<AArch32Rel32Translator::AddrTraits_A24>>(
+ translator, region, rel32_locations_A24, 0x0000U, 0x001CU);
+ CheckReader({{0x0008U, 0x0010U}, {0x0010U, 0x0014U}, {0x0018U, 0x0010U}},
+ std::move(reader2));
+
+ // Only find one.
+ auto reader3 =
+ std::make_unique<Rel32ReaderArm<AArch32Rel32Translator::AddrTraits_A24>>(
+ translator, region, rel32_locations_A24, 0x000CU, 0x0018U);
+ CheckReader({{0x0010U, 0x0014U}}, std::move(reader3));
+}
+
+TEST(Rel32UtilsTest, Rel32WriterArm_AArch32_Easy) {
+ constexpr offset_t kTestImageSize = 0x00100000U;
+ constexpr rva_t kRvaBegin = 0x00030000U;
+ TestAddressTranslator translator(kTestImageSize, kRvaBegin);
+
+ std::vector<uint8_t> bytes = {
+ 0xFF, 0xFF, // 00030000: (Filler)
+ 0x01, 0xDE, // 00030002: B 00030008 ; T8
+ 0xFF, 0xFF, 0xFF, 0xFF, // 00030004: (Filler)
+ 0x01, 0xE0, // 00030008: B 0003000E ; T11
+ 0xFF, 0xFF, // 0003000A: (Filler)
+ 0x80, 0xF3, 0x00, 0x80, // 0003000C: B 00030010 ; T20
+ };
+ MutableBufferView region(&bytes[0], bytes.size());
+
+ auto writer1 =
+ std::make_unique<Rel32WriterArm<AArch32Rel32Translator::AddrTraits_T8>>(
+ translator, region);
+ writer1->PutNext({0x0002U, 0x0004U});
+ EXPECT_EQ(0xFF, bytes[0x02]); // 00030002: B 00030004 ; T8
+ EXPECT_EQ(0xDE, bytes[0x03]);
+
+ writer1->PutNext({0x0002U, 0x000AU});
+ EXPECT_EQ(0x02, bytes[0x02]); // 00030002: B 0003000A ; T8
+ EXPECT_EQ(0xDE, bytes[0x03]);
+
+ auto writer2 =
+ std::make_unique<Rel32WriterArm<AArch32Rel32Translator::AddrTraits_T11>>(
+ translator, region);
+ writer2->PutNext({0x0008U, 0x0008U});
+ EXPECT_EQ(0xFE, bytes[0x08]); // 00030008: B 00030008 ; T11
+ EXPECT_EQ(0xE7, bytes[0x09]);
+ writer2->PutNext({0x0008U, 0x0010U});
+ EXPECT_EQ(0x02, bytes[0x08]); // 00030008: B 00030010 ; T11
+ EXPECT_EQ(0xE0, bytes[0x09]);
+
+ auto writer3 =
+ std::make_unique<Rel32WriterArm<AArch32Rel32Translator::AddrTraits_T20>>(
+ translator, region);
+ writer3->PutNext({0x000CU, 0x000AU});
+ EXPECT_EQ(0xBF, bytes[0x0C]); // 0003000C: B 0003000A ; T20
+ EXPECT_EQ(0xF7, bytes[0x0D]);
+ EXPECT_EQ(0xFD, bytes[0x0E]);
+ EXPECT_EQ(0xAF, bytes[0x0F]);
+ writer3->PutNext({0x000CU, 0x0010U});
+ EXPECT_EQ(0x80, bytes[0x0C]); // 0003000C: B 00030010 ; T20
+ EXPECT_EQ(0xF3, bytes[0x0D]);
+ EXPECT_EQ(0x00, bytes[0x0E]);
+ EXPECT_EQ(0x80, bytes[0x0F]);
+}
+
+TEST(Rel32UtilsTest, Rel32WriterArm_AArch32_Hard) {
+ constexpr offset_t kTestImageSize = 0x10000000U;
+ constexpr rva_t kRvaBegin = 0x0C030000U;
+ TestAddressTranslator translator(kTestImageSize, kRvaBegin);
+
+ std::vector<uint8_t> bytes = {
+ 0xFF, 0xFF, // 0C030000: (Filler)
+ 0x00, 0xF0, 0x00, 0xB8, // 0C030002: B 0C030006 ; T24
+ 0xFF, 0xFF, 0xFF, 0xFF, // 0C030006: (Filler)
+ 0x00, 0xF0, 0x7A, 0xE8, // 0C03000A: BLX 0C030100 ; T24
+ 0xFF, 0xFF, // 0C03000E: (Filler)
+ 0x00, 0xF0, 0x7A, 0xE8, // 0C030010: BLX 0C030108 ; T24
+ };
+ MutableBufferView region(&bytes[0], bytes.size());
+
+ auto writer =
+ std::make_unique<Rel32WriterArm<AArch32Rel32Translator::AddrTraits_T24>>(
+ translator, region);
+ writer->PutNext({0x0002U, 0x0000U});
+ EXPECT_EQ(0xFF, bytes[0x02]); // 0C030002: B 0C030000 ; T24
+ EXPECT_EQ(0xF7, bytes[0x03]);
+ EXPECT_EQ(0xFD, bytes[0x04]);
+ EXPECT_EQ(0xBF, bytes[0x05]);
+ writer->PutNext({0x0002U, 0x0008U});
+ EXPECT_EQ(0x00, bytes[0x02]); // 0C030002: B 0C030008 ; T24
+ EXPECT_EQ(0xF0, bytes[0x03]);
+ EXPECT_EQ(0x01, bytes[0x04]);
+ EXPECT_EQ(0xB8, bytes[0x05]);
+
+ // BLX complication, with location that's not 4-byte aligned.
+ writer->PutNext({0x000AU, 0x0010U});
+ EXPECT_EQ(0x00, bytes[0x0A]); // 0C03000A: BLX 0C030010 ; T24
+ EXPECT_EQ(0xF0, bytes[0x0B]);
+ EXPECT_EQ(0x02, bytes[0x0C]);
+ EXPECT_EQ(0xE8, bytes[0x0D]);
+ writer->PutNext({0x000AU, 0x0100U});
+ EXPECT_EQ(0x00, bytes[0x0A]); // 0C03000A: BLX 0C030100 ; T24
+ EXPECT_EQ(0xF0, bytes[0x0B]);
+ EXPECT_EQ(0x7A, bytes[0x0C]);
+ EXPECT_EQ(0xE8, bytes[0x0D]);
+ writer->PutNext({0x000AU, 0x0000U});
+ EXPECT_EQ(0xFF, bytes[0x0A]); // 0C03000A: BLX 0C030000 ; T24
+ EXPECT_EQ(0xF7, bytes[0x0B]);
+ EXPECT_EQ(0xFA, bytes[0x0C]);
+ EXPECT_EQ(0xEF, bytes[0x0D]);
+
+ // BLX complication, with location that's 4-byte aligned.
+ writer->PutNext({0x0010U, 0x0010U});
+ EXPECT_EQ(0xFF, bytes[0x10]); // 0C030010: BLX 0C030010 ; T24
+ EXPECT_EQ(0xF7, bytes[0x11]);
+ EXPECT_EQ(0xFE, bytes[0x12]);
+ EXPECT_EQ(0xEF, bytes[0x13]);
+ writer->PutNext({0x0010U, 0x0108U});
+ EXPECT_EQ(0x00, bytes[0x10]); // 0C030010: BLX 0C030108 ; T24
+ EXPECT_EQ(0xF0, bytes[0x11]);
+ EXPECT_EQ(0x7A, bytes[0x12]);
+ EXPECT_EQ(0xE8, bytes[0x13]);
+}
+
+// Test BLX encoding A2, which is an ARM instruction that switches to THUMB2,
+// and therefore should have 2-byte alignment.
+TEST(Rel32UtilsTest, AArch32SwitchToThumb2) {
+ constexpr offset_t kTestImageSize = 0x10000000U;
+ constexpr rva_t kRvaBegin = 0x08030000U;
+ TestAddressTranslator translator(kTestImageSize, kRvaBegin);
+
+ std::vector<uint8_t> bytes = {
+ 0xFF, 0xFF, 0x00, 0x00, // 08030000: (Filler)
+ 0x00, 0x00, 0x00, 0xFA, // 08030004: BLX 0803000C ; A24
+ };
+ MutableBufferView region(&bytes[0], bytes.size());
+
+ auto writer =
+ std::make_unique<Rel32WriterArm<AArch32Rel32Translator::AddrTraits_A24>>(
+ translator, region);
+
+ // To location that's 4-byte aligned.
+ writer->PutNext({0x0004U, 0x0100U});
+ EXPECT_EQ(0x3D, bytes[0x04]); // 08030004: BLX 08030100 ; A24
+ EXPECT_EQ(0x00, bytes[0x05]);
+ EXPECT_EQ(0x00, bytes[0x06]);
+ EXPECT_EQ(0xFA, bytes[0x07]);
+
+ // To location that's 2-byte aligned but not 4-byte aligned.
+ writer->PutNext({0x0004U, 0x0052U});
+ EXPECT_EQ(0x11, bytes[0x04]); // 08030004: BLX 08030052 ; A24
+ EXPECT_EQ(0x00, bytes[0x05]);
+ EXPECT_EQ(0x00, bytes[0x06]);
+ EXPECT_EQ(0xFB, bytes[0x07]);
+
+ // Clean slate code.
+ writer->PutNext({0x0004U, 0x000CU});
+ EXPECT_EQ(0x00, bytes[0x04]); // 08030004: BLX 0803000C ; A24
+ EXPECT_EQ(0x00, bytes[0x05]);
+ EXPECT_EQ(0x00, bytes[0x06]);
+ EXPECT_EQ(0xFA, bytes[0x07]);
+}
+
+TEST(Rel32UtilsTest, ArmCopyDisp_AArch32) {
+ std::vector<uint8_t> expect_fail;
+
+ // Successful A24.
+ ArmCopyDispFun copier_A24 =
+ ArmCopyDisp<AArch32Rel32Translator::AddrTraits_A24>;
+ CheckCopy({0x12, 0x34, 0x56, 0xEB}, // 00000100: BL 0158D150
+ {0xA0, 0xC0, 0x0E, 0x2A}, // 00000100: BCS 003B0388
+ {0x12, 0x34, 0x56, 0x2A}, // 00000100: BCS 0158D150
+ {0xA0, 0xC0, 0x0E, 0xEB}, // 00000100: BL 003B0388
+ copier_A24);
+
+ // Successful T8.
+ ArmCopyDispFun copier_T8 = ArmCopyDisp<AArch32Rel32Translator::AddrTraits_T8>;
+ CheckCopy({0x12, 0xD5}, // 00000100: BPL 00000128
+ {0xAB, 0xD8}, // 00000100: BHI 0000005A
+ {0x12, 0xD8}, // 00000100: BHI 00000128
+ {0xAB, 0xD5}, // 00000100: BPL 0000005A
+ copier_T8);
+
+ // Successful T11.
+ ArmCopyDispFun copier_T11 =
+ ArmCopyDisp<AArch32Rel32Translator::AddrTraits_T11>;
+ CheckCopy({0xF5, 0xE0}, // 00000100: B 000002EE
+ {0x12, 0xE7}, // 00000100: B FFFFFF28
+ {0xF5, 0xE0}, // 00000100: B 000002EE
+ {0x12, 0xE7}, // 00000100: B FFFFFF28
+ copier_T11);
+
+ // Failure if wrong copier is used.
+ CheckCopy(expect_fail, expect_fail, {0xF5, 0xE0}, {0x12, 0xE7}, copier_T8);
+
+ // Successful T20.
+ ArmCopyDispFun copier_T20 =
+ ArmCopyDisp<AArch32Rel32Translator::AddrTraits_T20>;
+ CheckCopy({0x41, 0xF2, 0xA5, 0x88}, // 00000100: BLS.W 0008124E
+ {0x04, 0xF3, 0x3C, 0xA2}, // 00000100: BGT.W 0004457C
+ {0x01, 0xF3, 0xA5, 0x88}, // 00000100: BGT.W 0008124E
+ {0x44, 0xF2, 0x3C, 0xA2}, // 00000100: BLS.W 0004457C
+ copier_T20);
+ CheckCopy({0x7F, 0xF6, 0xFF, 0xAF}, // 00000100: BLS.W 00000102
+ {0x00, 0xF3, 0x00, 0x80}, // 00000100: BGT.W 00000104
+ {0x3F, 0xF7, 0xFF, 0xAF}, // 00000100: BGT.W 00000102
+ {0x40, 0xF2, 0x00, 0x80}, // 00000100: BLS.W 00000104
+ copier_T20);
+
+ // Failure if wrong copier is used.
+ CheckCopy(expect_fail, expect_fail, {0x41, 0xF2, 0xA5, 0x88},
+ {0x84, 0xF3, 0x3C, 0xA2}, copier_A24);
+
+ // T24: Mix B encoding T4 and BL encoding T1.
+ ArmCopyDispFun copier_T24 =
+ ArmCopyDisp<AArch32Rel32Translator::AddrTraits_T24>;
+ CheckCopy({0xFF, 0xF7, 0xFF, 0xFF}, // 00000100: BL 00000102
+ {0x00, 0xF0, 0x00, 0x90}, // 00000100: B.W 00C00104
+ {0xFF, 0xF7, 0xFF, 0xBF}, // 00000100: B.W 00000102
+ {0x00, 0xF0, 0x00, 0xD0}, // 00000100: BL 00C00104
+ copier_T24);
+
+ // Mix B encoding T4 and BLX encoding T2. Note that the forward direction
+ // fails because B's target is invalid for BLX! It's possible to do "best
+ // effort" copying to reduce diff -- but right now we're not doing this.
+ CheckCopy(expect_fail, {0x00, 0xF0, 0x00, 0x90}, // 00000100: B.W 00C00104
+ {0xFF, 0xF7, 0xFF, 0xBF}, // 00000100: B.W 00000102
+ {0x00, 0xF0, 0x00, 0xC0}, // 00000100: BLX 00C00104
+ copier_T24);
+ // Success if ow B's target is valid for BLX.
+ CheckCopy({0xFF, 0xF7, 0xFE, 0xEF}, // 00000100: BLX 00000100
+ {0x00, 0xF0, 0x00, 0x90}, // 00000100: B.W 00C00104
+ {0xFF, 0xF7, 0xFE, 0xBF}, // 00000100: B.W 00000100
+ {0x00, 0xF0, 0x00, 0xC0}, // 00000100: BLX 00C00104
+ copier_T24);
+}
+
+TEST(Rel32UtilsTest, Rel32ReaderArm_AArch64) {
+ constexpr offset_t kTestImageSize = 0x00100000U;
+ constexpr rva_t kRvaBegin = 0x00030000U;
+ TestAddressTranslator translator(kTestImageSize, kRvaBegin);
+
+ std::vector<uint8_t> bytes = {
+ 0xFF, 0xFF, 0xFF, 0xFF, // 00030000: (Filler)
+ 0xFF, 0xFF, 0xFF, 0xFF, // 00030004: (Filler)
+ 0x02, 0x00, 0x00, 0x14, // 00030008: B 00030010 ; Immd26
+ 0xFF, 0xFF, 0xFF, 0xFF, // 0003000C: (Filler)
+ 0x25, 0x00, 0x00, 0x35, // 00030010: CBNZ R5,00030014 ; Immd19
+ 0xFF, 0xFF, 0xFF, 0xFF, // 00030014: (Filler)
+ 0xCA, 0xFF, 0xFF, 0x54, // 00030018: BGE 00030010 ; Immd19
+ 0x4C, 0xFF, 0x8F, 0x36, // 0003001C: TBZ X12,#17,00030004 ; Immd14
+ };
+ MutableBufferView region(&bytes[0], bytes.size());
+
+ // Generate Immd26. We specify rel32 locations directly.
+ std::deque<offset_t> rel32_locations_Immd26 = {0x0008U};
+ auto reader1 = std::make_unique<
+ Rel32ReaderArm<AArch64Rel32Translator::AddrTraits_Immd26>>(
+ translator, region, rel32_locations_Immd26, 0x0000U, 0x0020U);
+ CheckReader({{0x0008U, 0x0010U}}, std::move(reader1));
+
+ // Generate Immd19.
+ std::deque<offset_t> rel32_locations_Immd19 = {0x0010U, 0x0018U};
+ auto reader2 = std::make_unique<
+ Rel32ReaderArm<AArch64Rel32Translator::AddrTraits_Immd19>>(
+ translator, region, rel32_locations_Immd19, 0x0000U, 0x0020U);
+ CheckReader({{0x0010U, 0x0014U}, {0x0018U, 0x0010U}}, std::move(reader2));
+
+ // Generate Immd14.
+ std::deque<offset_t> rel32_locations_Immd14 = {0x001CU};
+ auto reader3 = std::make_unique<
+ Rel32ReaderArm<AArch64Rel32Translator::AddrTraits_Immd14>>(
+ translator, region, rel32_locations_Immd14, 0x0000U, 0x0020U);
+ CheckReader({{0x001CU, 0x0004U}}, std::move(reader3));
+}
+
+TEST(Rel32UtilsTest, Rel32WriterArm_AArch64) {
+ constexpr offset_t kTestImageSize = 0x00100000U;
+ constexpr rva_t kRvaBegin = 0x00030000U;
+ TestAddressTranslator translator(kTestImageSize, kRvaBegin);
+
+ std::vector<uint8_t> bytes = {
+ 0xFF, 0xFF, 0xFF, 0xFF, // 00030000: (Filler)
+ 0xFF, 0xFF, 0xFF, 0xFF, // 00030004: (Filler)
+ 0x02, 0x00, 0x00, 0x14, // 00030008: B 00030010 ; Immd26
+ 0xFF, 0xFF, 0xFF, 0xFF, // 0003000C: (Filler)
+ 0x25, 0x00, 0x00, 0x35, // 00030010: CBNZ R5,00030014 ; Immd19
+ 0xFF, 0xFF, 0xFF, 0xFF, // 00030014: (Filler)
+ 0xCA, 0xFF, 0xFF, 0x54, // 00030018: BGE 00030010 ; Immd19
+ 0x4C, 0xFF, 0x8F, 0x36, // 0003001C: TBZ X12,#17,00030004 ; Immd14
+ };
+ MutableBufferView region(&bytes[0], bytes.size());
+
+ auto writer1 = std::make_unique<
+ Rel32WriterArm<AArch64Rel32Translator::AddrTraits_Immd26>>(translator,
+ region);
+ writer1->PutNext({0x0008U, 0x0000U});
+ EXPECT_EQ(0xFE, bytes[0x08]); // 00030008: B 00030000 ; Immd26
+ EXPECT_EQ(0xFF, bytes[0x09]);
+ EXPECT_EQ(0xFF, bytes[0x0A]);
+ EXPECT_EQ(0x17, bytes[0x0B]);
+
+ auto writer2 = std::make_unique<
+ Rel32WriterArm<AArch64Rel32Translator::AddrTraits_Immd19>>(translator,
+ region);
+ writer2->PutNext({0x0010U, 0x0000U});
+ EXPECT_EQ(0x85, bytes[0x10]); // 00030010: CBNZ R5,00030000 ; Immd19
+ EXPECT_EQ(0xFF, bytes[0x11]);
+ EXPECT_EQ(0xFF, bytes[0x12]);
+ EXPECT_EQ(0x35, bytes[0x13]);
+ writer2->PutNext({0x0018U, 0x001CU});
+ EXPECT_EQ(0x2A, bytes[0x18]); // 00030018: BGE 0003001C ; Immd19
+ EXPECT_EQ(0x00, bytes[0x19]);
+ EXPECT_EQ(0x00, bytes[0x1A]);
+ EXPECT_EQ(0x54, bytes[0x1B]);
+
+ auto writer3 = std::make_unique<
+ Rel32WriterArm<AArch64Rel32Translator::AddrTraits_Immd14>>(translator,
+ region);
+ writer3->PutNext({0x001CU, 0x0010U});
+ EXPECT_EQ(0xAC, bytes[0x1C]); // 0003001C: TBZ X12,#17,00030010 ; Immd14
+ EXPECT_EQ(0xFF, bytes[0x1D]);
+ EXPECT_EQ(0x8F, bytes[0x1E]);
+ EXPECT_EQ(0x36, bytes[0x1F]);
+}
+
+TEST(Rel32UtilsTest, ArmCopyDisp_AArch64) {
+ std::vector<uint8_t> expect_fail;
+
+ // Successful Imm26.
+ ArmCopyDispFun copier_Immd26 =
+ ArmCopyDisp<AArch64Rel32Translator::AddrTraits_Immd26>;
+ CheckCopy({0x12, 0x34, 0x56, 0x94}, // 00000100: BL 0158D148
+ {0xA1, 0xC0, 0x0E, 0x17}, // 00000100: B FC3B0384
+ {0x12, 0x34, 0x56, 0x14}, // 00000100: B 0158D148
+ {0xA1, 0xC0, 0x0E, 0x97}, // 00000100: BL FC3B0384
+ copier_Immd26);
+
+ // Successful Imm19.
+ ArmCopyDispFun copier_Immd19 =
+ ArmCopyDisp<AArch64Rel32Translator::AddrTraits_Immd19>;
+ CheckCopy({0x24, 0x12, 0x34, 0x54}, // 00000100: BMI 00068344
+ {0xD7, 0xA5, 0xFC, 0xB4}, // 00000100: CBZ X23,FFFF95B8
+ {0x37, 0x12, 0x34, 0xB4}, // 00000100: CBZ X23,00068344
+ {0xC4, 0xA5, 0xFC, 0x54}, // 00000100: BMI FFFF95B8
+ copier_Immd19);
+
+ // Successful Imm14.
+ ArmCopyDispFun copier_Immd14 =
+ ArmCopyDisp<AArch64Rel32Translator::AddrTraits_Immd14>;
+ CheckCopy({0x00, 0x00, 0x00, 0x36}, // 00000100: TBZ X0,#0,00000100
+ {0xFF, 0xFF, 0xFF, 0xB7}, // 00000100: TBNZ ZR,#63,000000FC
+ {0x1F, 0x00, 0xF8, 0xB7}, // 00000100: TBNZ ZR,#63,00000100
+ {0xE0, 0xFF, 0x07, 0x36}, // 00000100: TBZ X0,#0,000000FC
+ copier_Immd14);
+
+ // Failure if wrong copier is used.
+ CheckCopy(expect_fail, expect_fail, {0x1F, 0x00, 0xF8, 0xB7},
+ {0xE0, 0xFF, 0x07, 0x36}, copier_Immd26);
+}
+
+} // namespace zucchini
diff --git a/reloc_elf.cc b/reloc_elf.cc
new file mode 100644
index 0000000..a7d1b38
--- /dev/null
+++ b/reloc_elf.cc
@@ -0,0 +1,163 @@
+// Copyright 2018 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/reloc_elf.h"
+
+#include <algorithm>
+
+#include "base/logging.h"
+#include "components/zucchini/algorithm.h"
+
+namespace zucchini {
+
+/******** RelocReaderElf ********/
+
+RelocReaderElf::RelocReaderElf(
+ ConstBufferView image,
+ Bitness bitness,
+ const std::vector<SectionDimensionsElf>& reloc_section_dims,
+ uint32_t rel_type,
+ offset_t lo,
+ offset_t hi,
+ const AddressTranslator& translator)
+ : image_(image),
+ bitness_(bitness),
+ rel_type_(rel_type),
+ reloc_section_dimensions_(reloc_section_dims),
+ hi_(hi),
+ target_rva_to_offset_(translator) {
+ DCHECK(bitness_ == kBit32 || bitness_ == kBit64);
+
+ // Find the relocation section at or right before |lo|.
+ cur_section_dimensions_ = std::upper_bound(
+ reloc_section_dimensions_.begin(), reloc_section_dimensions_.end(), lo);
+ if (cur_section_dimensions_ != reloc_section_dimensions_.begin())
+ --cur_section_dimensions_;
+
+ // |lo| and |hi_| do not cut across a reloc reference (e.g.,
+ // Elf_Rel::r_offset), but may cut across a reloc struct (e.g. Elf_Rel)!
+ // GetNext() emits all reloc references in |[lo, hi_)|, but needs to examine
+ // the entire reloc struct for context. Knowing that |r_offset| is the first
+ // entry in a reloc struct, |cursor_| and |hi_| are adjusted by the following:
+ // - If |lo| is in a reloc section, then |cursor_| is chosen, as |lo| aligned
+ // up to the next reloc struct, to exclude reloc struct that |lo| may cut
+ // across.
+ // - If |hi_| is in a reloc section, then align it up, to include reloc struct
+ // that |hi_| may cut across.
+ cursor_ =
+ base::checked_cast<offset_t>(cur_section_dimensions_->region.offset);
+ if (cursor_ < lo)
+ cursor_ +=
+ AlignCeil<offset_t>(lo - cursor_, cur_section_dimensions_->entry_size);
+
+ auto end_section = std::upper_bound(reloc_section_dimensions_.begin(),
+ reloc_section_dimensions_.end(), hi_);
+ if (end_section != reloc_section_dimensions_.begin()) {
+ --end_section;
+ if (hi_ - end_section->region.offset < end_section->region.size) {
+ offset_t end_region_offset =
+ base::checked_cast<offset_t>(end_section->region.offset);
+ hi_ = end_region_offset + AlignCeil<offset_t>(hi_ - end_region_offset,
+ end_section->entry_size);
+ }
+ }
+}
+
+RelocReaderElf::~RelocReaderElf() = default;
+
+rva_t RelocReaderElf::GetRelocationTarget(elf::Elf32_Rel rel) const {
+ // The least significant byte of |rel.r_info| is the type. The other 3 bytes
+ // store the symbol, which we ignore.
+ uint32_t type = static_cast<uint32_t>(rel.r_info & 0xFF);
+ if (type == rel_type_)
+ return rel.r_offset;
+ return kInvalidRva;
+}
+
+rva_t RelocReaderElf::GetRelocationTarget(elf::Elf64_Rel rel) const {
+ // The least significant 4 bytes of |rel.r_info| is the type. The other 4
+ // bytes store the symbol, which we ignore.
+ uint32_t type = static_cast<uint32_t>(rel.r_info & 0xFFFFFFFF);
+ if (type == rel_type_) {
+ // Assume |rel.r_offset| fits within 32-bit integer.
+ if ((rel.r_offset & 0xFFFFFFFF) == rel.r_offset)
+ return static_cast<rva_t>(rel.r_offset);
+ // Otherwise output warning.
+ LOG(WARNING) << "Warning: Skipping r_offset whose value exceeds 32-bits.";
+ }
+ return kInvalidRva;
+}
+
+absl::optional<Reference> RelocReaderElf::GetNext() {
+ offset_t cur_entry_size = cur_section_dimensions_->entry_size;
+ offset_t cur_section_dimensions_end =
+ base::checked_cast<offset_t>(cur_section_dimensions_->region.hi());
+
+ for (; cursor_ + cur_entry_size <= hi_; cursor_ += cur_entry_size) {
+ while (cursor_ >= cur_section_dimensions_end) {
+ ++cur_section_dimensions_;
+ if (cur_section_dimensions_ == reloc_section_dimensions_.end())
+ return absl::nullopt;
+ cur_entry_size = cur_section_dimensions_->entry_size;
+ cursor_ =
+ base::checked_cast<offset_t>(cur_section_dimensions_->region.offset);
+ if (cursor_ + cur_entry_size > hi_)
+ return absl::nullopt;
+ cur_section_dimensions_end =
+ base::checked_cast<offset_t>(cur_section_dimensions_->region.hi());
+ }
+ rva_t target_rva = kInvalidRva;
+ // TODO(huangs): Fix RELA sections: Need to process |r_addend|.
+ switch (bitness_) {
+ case kBit32:
+ target_rva = GetRelocationTarget(image_.read<elf::Elf32_Rel>(cursor_));
+ break;
+ case kBit64:
+ target_rva = GetRelocationTarget(image_.read<elf::Elf64_Rel>(cursor_));
+ break;
+ }
+ if (target_rva == kInvalidRva)
+ continue;
+ // TODO(huangs): Make the check more strict: The reference body should not
+ // straddle section boundary.
+ offset_t target = target_rva_to_offset_.Convert(target_rva);
+ if (target == kInvalidOffset)
+ continue;
+ // |target| will be used to obtain abs32 references, so we must ensure that
+ // it lies inside |image_|.
+ if (!image_.covers({target, WidthOf(bitness_)}))
+ continue;
+ offset_t location = cursor_;
+ cursor_ += cur_entry_size;
+ return Reference{location, target};
+ }
+ return absl::nullopt;
+}
+
+/******** RelocWriterElf ********/
+
+RelocWriterElf::RelocWriterElf(MutableBufferView image,
+ Bitness bitness,
+ const AddressTranslator& translator)
+ : image_(image), bitness_(bitness), target_offset_to_rva_(translator) {
+ DCHECK(bitness_ == kBit32 || bitness_ == kBit64);
+}
+
+RelocWriterElf::~RelocWriterElf() = default;
+
+void RelocWriterElf::PutNext(Reference ref) {
+ switch (bitness_) {
+ case kBit32:
+ image_.modify<elf::Elf32_Rel>(ref.location).r_offset =
+ target_offset_to_rva_.Convert(ref.target);
+ break;
+ case kBit64:
+ image_.modify<elf::Elf64_Rel>(ref.location).r_offset =
+ target_offset_to_rva_.Convert(ref.target);
+ break;
+ }
+ // Leave |reloc.r_info| alone.
+}
+
+} // namespace zucchini
diff --git a/reloc_elf.h b/reloc_elf.h
new file mode 100644
index 0000000..ebf2577
--- /dev/null
+++ b/reloc_elf.h
@@ -0,0 +1,102 @@
+// Copyright 2018 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_RELOC_ELF_H_
+#define COMPONENTS_ZUCCHINI_RELOC_ELF_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "base/numerics/safe_conversions.h"
+#include "components/zucchini/address_translator.h"
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/image_utils.h"
+#include "components/zucchini/type_elf.h"
+#include "third_party/abseil-cpp/absl/types/optional.h"
+
+namespace zucchini {
+
+// Section dimensions for ELF files, to store relevant dimensions data from
+// Elf32_Shdr and Elf64_Shdr, while reducing code duplication from templates.
+struct SectionDimensionsElf {
+ SectionDimensionsElf() = default;
+
+ template <class Elf_Shdr>
+ explicit SectionDimensionsElf(const Elf_Shdr& section)
+ : region(BufferRegion{base::checked_cast<size_t>(section.sh_offset),
+ base::checked_cast<size_t>(section.sh_size)}),
+ entry_size(base::checked_cast<offset_t>(section.sh_entsize)) {}
+
+ friend bool operator<(const SectionDimensionsElf& a,
+ const SectionDimensionsElf& b) {
+ return a.region.offset < b.region.offset;
+ }
+
+ friend bool operator<(offset_t offset, const SectionDimensionsElf& section) {
+ return offset < section.region.offset;
+ }
+
+ BufferRegion region;
+ offset_t entry_size; // Varies across REL / RELA sections.
+};
+
+// A Generator to visit all reloc structs located in [|lo|, |hi|) (excluding
+// truncated strct at |lo| but inlcuding truncated struct at |hi|), and emit
+// valid References with |rel_type|. This implements a nested loop unrolled into
+// a generator: the outer loop has |cur_section_dimensions_| visiting
+// |reloc_section_dims| (sorted by |region.offset|), and the inner loop has
+// |cursor_| visiting successive reloc structs within |cur_section_dimensions_|.
+class RelocReaderElf : public ReferenceReader {
+ public:
+ RelocReaderElf(
+ ConstBufferView image,
+ Bitness bitness,
+ const std::vector<SectionDimensionsElf>& reloc_section_dimensions,
+ uint32_t rel_type,
+ offset_t lo,
+ offset_t hi,
+ const AddressTranslator& translator);
+ ~RelocReaderElf() override;
+
+ // If |rel| contains |r_offset| for |rel_type_|, return the RVA. Otherwise
+ // return |kInvalidRva|. These also handle Elf*_Rela, by using the fact that
+ // Elf*_Rel is a prefix of Elf*_Rela.
+ rva_t GetRelocationTarget(elf::Elf32_Rel rel) const;
+ rva_t GetRelocationTarget(elf::Elf64_Rel rel) const;
+
+ // ReferenceReader:
+ absl::optional<Reference> GetNext() override;
+
+ private:
+ const ConstBufferView image_;
+ const Bitness bitness_;
+ const uint32_t rel_type_;
+ const std::vector<SectionDimensionsElf>& reloc_section_dimensions_;
+ std::vector<SectionDimensionsElf>::const_iterator cur_section_dimensions_;
+ offset_t hi_;
+ offset_t cursor_;
+ AddressTranslator::RvaToOffsetCache target_rva_to_offset_;
+};
+
+class RelocWriterElf : public ReferenceWriter {
+ public:
+ RelocWriterElf(MutableBufferView image,
+ Bitness bitness,
+ const AddressTranslator& translator);
+ ~RelocWriterElf() override;
+
+ // ReferenceWriter:
+ void PutNext(Reference ref) override;
+
+ private:
+ MutableBufferView image_;
+ const Bitness bitness_;
+ AddressTranslator::OffsetToRvaCache target_offset_to_rva_;
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_RELOC_ELF_H_
diff --git a/reloc_elf_unittest.cc b/reloc_elf_unittest.cc
new file mode 100644
index 0000000..8a1b932
--- /dev/null
+++ b/reloc_elf_unittest.cc
@@ -0,0 +1,242 @@
+// Copyright 2018 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/reloc_elf.h"
+
+#include <stdint.h>
+
+#include <algorithm>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "base/numerics/safe_conversions.h"
+#include "components/zucchini/address_translator.h"
+#include "components/zucchini/algorithm.h"
+#include "components/zucchini/disassembler_elf.h"
+#include "components/zucchini/image_utils.h"
+#include "components/zucchini/test_utils.h"
+#include "components/zucchini/type_elf.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+namespace {
+
+template <class Elf_Shdr>
+SectionDimensionsElf MakeSectionDimensions(const BufferRegion& region,
+ offset_t entry_size) {
+ using sh_offset_t = decltype(Elf_Shdr::sh_offset);
+ using sh_size_t = decltype(Elf_Shdr::sh_size);
+ using sh_entsize_t = decltype(Elf_Shdr::sh_entsize);
+ return SectionDimensionsElf{Elf_Shdr{
+ 0, // sh_name
+ 0, // sh_type
+ 0, // sh_flags
+ 0, // sh_addr
+ // sh_offset
+ base::checked_cast<sh_offset_t>(region.offset),
+ // sh_size
+ base::checked_cast<sh_size_t>(region.size),
+ 0, // sh_link
+ 0, // sh_info
+ 0, // sh_addralign
+ // sh_entsize
+ base::checked_cast<sh_entsize_t>(entry_size),
+ }};
+}
+
+// Helper to manipulate an image with one or more relocation tables.
+template <class ELF_INTEL_TRAITS>
+class FakeImageWithReloc {
+ public:
+ using ElfIntelTraits = ELF_INTEL_TRAITS;
+ struct RelocSpec {
+ offset_t start;
+ std::vector<uint8_t> data;
+ };
+
+ FakeImageWithReloc(size_t image_size,
+ rva_t base_rva,
+ const std::vector<RelocSpec>& reloc_specs)
+ : image_data_(image_size, 0xFF),
+ mutable_image_(&image_data_[0], image_data_.size()) {
+ translator_.Initialize({{0, static_cast<offset_t>(image_size), base_rva,
+ static_cast<rva_t>(image_size)}});
+ // Set up test image with reloc sections.
+ for (const RelocSpec& reloc_spec : reloc_specs) {
+ BufferRegion reloc_region = {reloc_spec.start, reloc_spec.data.size()};
+ std::copy(reloc_spec.data.begin(), reloc_spec.data.end(),
+ image_data_.begin() + reloc_region.lo());
+ section_dimensions_.emplace_back(
+ MakeSectionDimensions<typename ElfIntelTraits::Elf_Shdr>(
+ reloc_region, ElfIntelTraits::kVAWidth));
+ reloc_regions_.push_back(reloc_region);
+ }
+ }
+
+ std::vector<Reference> ExtractRelocReferences() {
+ const size_t image_size = image_data_.size();
+ ConstBufferView image = {image_data_.data(), image_size};
+
+ // Make RelocReaderElf.
+ auto reader = std::make_unique<RelocReaderElf>(
+ image, ElfIntelTraits::kBitness, section_dimensions_,
+ ElfIntelTraits::kRelType, 0, image_size, translator_);
+
+ // Read all references and check.
+ std::vector<Reference> refs;
+ for (absl::optional<Reference> ref = reader->GetNext(); ref.has_value();
+ ref = reader->GetNext()) {
+ refs.push_back(ref.value());
+ }
+ return refs;
+ }
+
+ std::unique_ptr<RelocWriterElf> MakeRelocWriter() {
+ return std::move(std::make_unique<RelocWriterElf>(
+ mutable_image_, ElfIntelTraits::kBitness, translator_));
+ }
+
+ std::vector<uint8_t> GetRawRelocData(int reloc_index) {
+ BufferRegion reloc_region = reloc_regions_[reloc_index];
+ return Sub(image_data_, reloc_region.lo(), reloc_region.hi());
+ }
+
+ private:
+ std::vector<uint8_t> image_data_;
+ MutableBufferView mutable_image_;
+ std::vector<BufferRegion> reloc_regions_;
+ std::vector<SectionDimensionsElf> section_dimensions_;
+ AddressTranslator translator_;
+};
+
+} // namespace
+
+TEST(RelocElfTest, ReadWrite32) {
+ // Set up mock image: Size = 0x3000, .reloc at 0x600. RVA is 0x40000 + offset.
+ constexpr size_t kImageSize = 0x3000;
+ constexpr rva_t kBaseRva = 0x40000;
+
+ constexpr offset_t kRelocStart0 = 0x600;
+ // "C0 10 04 00 08 00 00 00" represents
+ // (r_sym, r_type, r_offset) = (0x000000, 0x08, 0x000410C0).
+ // r_type = 0x08 = R_386_RELATIVE, and so |r_offset| is an RVA 0x000410C0.
+ // Zucchini does not care about |r_sym|.
+ std::vector<uint8_t> reloc_data0 = ParseHexString(
+ "C0 10 04 00 08 00 00 00 " // R_386_RELATIVE.
+ "F8 10 04 00 08 AB CD EF " // R_386_RELATIVE.
+ "00 10 04 00 00 AB CD EF " // R_386_NONE.
+ "00 10 04 00 07 AB CD EF"); // R_386_JMP_SLOT.
+
+ constexpr offset_t kRelocStart1 = 0x620;
+ std::vector<uint8_t> reloc_data1 = ParseHexString(
+ "BC 20 04 00 08 00 00 00 " // R_386_RELATIVE.
+ "A0 20 04 00 08 AB CD EF"); // R_386_RELATIVE.
+
+ FakeImageWithReloc<Elf32IntelTraits> fake_image(
+ kImageSize, kBaseRva,
+ {{kRelocStart0, reloc_data0}, {kRelocStart1, reloc_data1}});
+
+ // Only R_386_RELATIVE references are extracted. Targets are translated from
+ // address (e.g., 0x000420BC) to offset (e.g., 0x20BC).
+ std::vector<Reference> exp_refs{
+ {0x600, 0x10C0}, {0x608, 0x10F8}, {0x620, 0x20BC}, {0x628, 0x20A0}};
+ EXPECT_EQ(exp_refs, fake_image.ExtractRelocReferences());
+
+ // Write reference, extract bytes and check.
+ std::unique_ptr<RelocWriterElf> writer = fake_image.MakeRelocWriter();
+
+ writer->PutNext({0x608, 0x1F83});
+ std::vector<uint8_t> exp_reloc_data0 = ParseHexString(
+ "C0 10 04 00 08 00 00 00 " // R_386_RELATIVE.
+ "83 1F 04 00 08 AB CD EF " // R_386_RELATIVE (address modified).
+ "00 10 04 00 00 AB CD EF " // R_386_NONE.
+ "00 10 04 00 07 AB CD EF"); // R_386_JMP_SLOT.
+ EXPECT_EQ(exp_reloc_data0, fake_image.GetRawRelocData(0));
+
+ writer->PutNext({0x628, 0x2950});
+ std::vector<uint8_t> exp_reloc_data1 = ParseHexString(
+ "BC 20 04 00 08 00 00 00 " // R_386_RELATIVE.
+ "50 29 04 00 08 AB CD EF"); // R_386_RELATIVE (address modified).
+ EXPECT_EQ(exp_reloc_data1, fake_image.GetRawRelocData(1));
+}
+
+TEST(RelocElfTest, Limit32) {
+ constexpr size_t kImageSize = 0x3000;
+ constexpr offset_t kBaseRva = 0x40000;
+ constexpr offset_t kRelocStart = 0x600;
+ // All R_386_RELATIVE.
+ std::vector<uint8_t> reloc_data = ParseHexString(
+ // Strictly within file.
+ "00 00 04 00 08 00 00 00 "
+ "00 10 04 00 08 00 00 00 "
+ "F0 2F 04 00 08 00 00 00 "
+ "F8 2F 04 00 08 00 00 00 "
+ "FC 2F 04 00 08 00 00 00 "
+ // Straddles end of file.
+ "FD 2F 04 00 08 00 00 00 "
+ "FE 2F 04 00 08 00 00 00 "
+ "FF 2F 04 00 08 00 00 00 "
+ // Beyond end of file.
+ "00 30 04 00 08 00 00 00 "
+ "01 30 04 00 08 00 00 00 "
+ "FC FF FF 7F 08 00 00 00 "
+ "FE FF FF 7F 08 00 00 00 "
+ "00 00 00 80 08 00 00 00 "
+ "FC FF FF FF 08 00 00 00 "
+ "FF FF FF FF 08 00 00 00 "
+ // Another good reference.
+ "34 12 04 00 08 00 00 00");
+
+ FakeImageWithReloc<Elf32IntelTraits> fake_image(kImageSize, kBaseRva,
+ {{kRelocStart, reloc_data}});
+
+ std::vector<Reference> exp_refs{{0x600, 0x0000}, {0x608, 0x1000},
+ {0x610, 0x2FF0}, {0x618, 0x2FF8},
+ {0x620, 0x2FFC}, {0x678, 0x1234}};
+ EXPECT_EQ(exp_refs, fake_image.ExtractRelocReferences());
+}
+
+TEST(RelocElfTest, Limit64) {
+ constexpr size_t kImageSize = 0x3000;
+ constexpr offset_t kBaseRva = 0x40000;
+
+ constexpr offset_t kRelocStart = 0x600;
+ // All R_X86_64_RELATIVE.
+ std::vector<uint8_t> reloc_data = ParseHexString(
+ // Strictly within file.
+ "00 00 04 00 00 00 00 00 08 00 00 00 00 00 00 00 "
+ "00 10 04 00 00 00 00 00 08 00 00 00 00 00 00 00 "
+ "F0 2F 04 00 00 00 00 00 08 00 00 00 00 00 00 00 "
+ "F4 2F 04 00 00 00 00 00 08 00 00 00 00 00 00 00 "
+ "F8 2F 04 00 00 00 00 00 08 00 00 00 00 00 00 00 "
+ // Straddles end of file.
+ "F9 2F 04 00 00 00 00 00 08 00 00 00 00 00 00 00 "
+ "FC 2F 04 00 00 00 00 00 08 00 00 00 00 00 00 00 "
+ "FF 2F 04 00 00 00 00 00 08 00 00 00 00 00 00 00 "
+ // Beyond end of file.
+ "00 30 04 00 00 00 00 00 08 00 00 00 00 00 00 00 "
+ "01 30 04 00 00 00 00 00 08 00 00 00 00 00 00 00 "
+ "FC FF FF 7F 00 00 00 00 08 00 00 00 00 00 00 00 "
+ "FE FF FF 7F 00 00 00 00 08 00 00 00 00 00 00 00 "
+ "00 00 00 80 00 00 00 00 08 00 00 00 00 00 00 00 "
+ "FC FF FF FF 00 00 00 00 08 00 00 00 00 00 00 00 "
+ "FF FF FF FF 00 00 00 00 08 00 00 00 00 00 00 00 "
+ "00 00 04 00 01 00 00 00 08 00 00 00 00 00 00 00 "
+ "FF FF FF FF FF FF FF FF 08 00 00 00 00 00 00 00 "
+ "F8 FF FF FF FF FF FF FF 08 00 00 00 00 00 00 00 "
+ // Another good reference.
+ "34 12 04 00 00 00 00 00 08 00 00 00 00 00 00 00");
+
+ FakeImageWithReloc<Elf64IntelTraits> fake_image(kImageSize, kBaseRva,
+ {{kRelocStart, reloc_data}});
+
+ std::vector<Reference> exp_refs{{0x600, 0x0000}, {0x610, 0x1000},
+ {0x620, 0x2FF0}, {0x630, 0x2FF4},
+ {0x640, 0x2FF8}, {0x720, 0x1234}};
+ EXPECT_EQ(exp_refs, fake_image.ExtractRelocReferences());
+}
+
+} // namespace zucchini
diff --git a/reloc_win32.cc b/reloc_win32.cc
new file mode 100644
index 0000000..b70aa8a
--- /dev/null
+++ b/reloc_win32.cc
@@ -0,0 +1,196 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/reloc_win32.h"
+
+#include <algorithm>
+#include <tuple>
+#include <utility>
+
+#include "base/logging.h"
+#include "base/numerics/safe_conversions.h"
+#include "components/zucchini/algorithm.h"
+#include "components/zucchini/io_utils.h"
+#include "components/zucchini/type_win_pe.h"
+
+namespace zucchini {
+
+/******** RelocUnitWin32 ********/
+
+RelocUnitWin32::RelocUnitWin32() = default;
+RelocUnitWin32::RelocUnitWin32(uint8_t type_in,
+ offset_t location_in,
+ rva_t target_rva_in)
+ : type(type_in), location(location_in), target_rva(target_rva_in) {}
+
+bool operator==(const RelocUnitWin32& a, const RelocUnitWin32& b) {
+ return std::tie(a.type, a.location, a.target_rva) ==
+ std::tie(b.type, b.location, b.target_rva);
+}
+
+/******** RelocRvaReaderWin32 ********/
+
+// static
+bool RelocRvaReaderWin32::FindRelocBlocks(
+ ConstBufferView image,
+ BufferRegion reloc_region,
+ std::vector<offset_t>* reloc_block_offsets) {
+ CHECK_LT(reloc_region.size, kOffsetBound);
+ ConstBufferView reloc_data = image[reloc_region];
+ reloc_block_offsets->clear();
+ while (reloc_data.size() >= sizeof(pe::RelocHeader)) {
+ reloc_block_offsets->push_back(
+ base::checked_cast<offset_t>(reloc_data.begin() - image.begin()));
+ auto size = reloc_data.read<pe::RelocHeader>(0).size;
+ // |size| must be aligned to 4-bytes.
+ if (size < sizeof(pe::RelocHeader) || size % 4 || size > reloc_data.size())
+ return false;
+ reloc_data.remove_prefix(size);
+ }
+ return reloc_data.empty(); // Fail if trailing data exist.
+}
+
+RelocRvaReaderWin32::RelocRvaReaderWin32(
+ ConstBufferView image,
+ BufferRegion reloc_region,
+ const std::vector<offset_t>& reloc_block_offsets,
+ offset_t lo,
+ offset_t hi)
+ : image_(image) {
+ CHECK_LE(lo, hi);
+ lo = base::checked_cast<offset_t>(reloc_region.InclusiveClamp(lo));
+ hi = base::checked_cast<offset_t>(reloc_region.InclusiveClamp(hi));
+ end_it_ = image_.begin() + hi;
+
+ // By default, get GetNext() to produce empty output.
+ cur_reloc_units_ = BufferSource(end_it_, 0);
+ if (reloc_block_offsets.empty())
+ return;
+
+ // Find the block that contains |lo|.
+ auto block_it = std::upper_bound(reloc_block_offsets.begin(),
+ reloc_block_offsets.end(), lo);
+ DCHECK(block_it != reloc_block_offsets.begin());
+ --block_it;
+
+ // Initialize |cur_reloc_units_| and |rva_hi_bits_|.
+ if (!LoadRelocBlock(image_.begin() + *block_it))
+ return; // Nothing left.
+
+ // Skip |cur_reloc_units_| to |lo|, truncating up.
+ offset_t cur_reloc_units_offset =
+ base::checked_cast<offset_t>(cur_reloc_units_.begin() - image_.begin());
+ if (lo > cur_reloc_units_offset) {
+ offset_t delta =
+ AlignCeil<offset_t>(lo - cur_reloc_units_offset, kRelocUnitSize);
+ cur_reloc_units_.Skip(delta);
+ }
+}
+
+RelocRvaReaderWin32::RelocRvaReaderWin32(RelocRvaReaderWin32&&) = default;
+
+RelocRvaReaderWin32::~RelocRvaReaderWin32() = default;
+
+// Unrolls a nested loop: outer = reloc blocks and inner = reloc entries.
+absl::optional<RelocUnitWin32> RelocRvaReaderWin32::GetNext() {
+ // "Outer loop" to find non-empty reloc block.
+ while (cur_reloc_units_.Remaining() < kRelocUnitSize) {
+ if (!LoadRelocBlock(cur_reloc_units_.end()))
+ return absl::nullopt;
+ }
+ if (end_it_ - cur_reloc_units_.begin() < kRelocUnitSize)
+ return absl::nullopt;
+ // "Inner loop" to extract single reloc unit.
+ offset_t location =
+ base::checked_cast<offset_t>(cur_reloc_units_.begin() - image_.begin());
+ uint16_t entry = cur_reloc_units_.read<uint16_t>(0);
+ uint8_t type = static_cast<uint8_t>(entry >> 12);
+ rva_t rva = rva_hi_bits_ + (entry & 0xFFF);
+ cur_reloc_units_.Skip(kRelocUnitSize);
+ return RelocUnitWin32{type, location, rva};
+}
+
+bool RelocRvaReaderWin32::LoadRelocBlock(
+ ConstBufferView::const_iterator block_begin) {
+ ConstBufferView header_buf(block_begin, sizeof(pe::RelocHeader));
+ if (header_buf.end() >= end_it_ ||
+ end_it_ - header_buf.end() < kRelocUnitSize) {
+ return false;
+ }
+ const auto& header = header_buf.read<pe::RelocHeader>(0);
+ rva_hi_bits_ = header.rva_hi;
+ uint32_t block_size = header.size;
+ if (block_size < sizeof(pe::RelocHeader))
+ return false;
+ if ((block_size - sizeof(pe::RelocHeader)) % kRelocUnitSize != 0)
+ return false;
+ cur_reloc_units_ = BufferSource(block_begin, block_size);
+ cur_reloc_units_.Skip(sizeof(pe::RelocHeader));
+ return true;
+}
+
+/******** RelocReaderWin32 ********/
+
+RelocReaderWin32::RelocReaderWin32(RelocRvaReaderWin32&& reloc_rva_reader,
+ uint16_t reloc_type,
+ offset_t offset_bound,
+ const AddressTranslator& translator)
+ : reloc_rva_reader_(std::move(reloc_rva_reader)),
+ reloc_type_(reloc_type),
+ offset_bound_(offset_bound),
+ entry_rva_to_offset_(translator) {}
+
+RelocReaderWin32::~RelocReaderWin32() = default;
+
+// ReferenceReader:
+absl::optional<Reference> RelocReaderWin32::GetNext() {
+ for (absl::optional<RelocUnitWin32> unit = reloc_rva_reader_.GetNext();
+ unit.has_value(); unit = reloc_rva_reader_.GetNext()) {
+ if (unit->type != reloc_type_)
+ continue;
+ offset_t target = entry_rva_to_offset_.Convert(unit->target_rva);
+ if (target == kInvalidOffset)
+ continue;
+ // Ensure that |target| (abs32 reference) lies entirely within the image.
+ if (target >= offset_bound_)
+ continue;
+ offset_t location = unit->location;
+ return Reference{location, target};
+ }
+ return absl::nullopt;
+}
+
+/******** RelocWriterWin32 ********/
+
+RelocWriterWin32::RelocWriterWin32(
+ uint16_t reloc_type,
+ MutableBufferView image,
+ BufferRegion reloc_region,
+ const std::vector<offset_t>& reloc_block_offsets,
+ const AddressTranslator& translator)
+ : reloc_type_(reloc_type),
+ image_(image),
+ reloc_region_(reloc_region),
+ reloc_block_offsets_(reloc_block_offsets),
+ target_offset_to_rva_(translator) {}
+
+RelocWriterWin32::~RelocWriterWin32() = default;
+
+void RelocWriterWin32::PutNext(Reference ref) {
+ DCHECK_GE(ref.location, reloc_region_.lo());
+ DCHECK_LT(ref.location, reloc_region_.hi());
+ auto block_it = std::upper_bound(reloc_block_offsets_.begin(),
+ reloc_block_offsets_.end(), ref.location);
+ --block_it;
+ rva_t rva_hi_bits = image_.read<pe::RelocHeader>(*block_it).rva_hi;
+ rva_t target_rva = target_offset_to_rva_.Convert(ref.target);
+ rva_t rva_lo_bits = (target_rva - rva_hi_bits) & 0xFFF;
+ if (target_rva != rva_hi_bits + rva_lo_bits) {
+ LOG(ERROR) << "Invalid RVA at " << AsHex<8>(ref.location) << ".";
+ return;
+ }
+ image_.write<uint16_t>(ref.location, rva_lo_bits | (reloc_type_ << 12));
+}
+
+} // namespace zucchini
diff --git a/reloc_win32.h b/reloc_win32.h
new file mode 100644
index 0000000..6393702
--- /dev/null
+++ b/reloc_win32.h
@@ -0,0 +1,140 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_RELOC_WIN32_H_
+#define COMPONENTS_ZUCCHINI_RELOC_WIN32_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "components/zucchini/address_translator.h"
+#include "components/zucchini/buffer_source.h"
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/image_utils.h"
+#include "third_party/abseil-cpp/absl/types/optional.h"
+
+namespace zucchini {
+
+// Win32 PE relocation table stores a list of (type, RVA) pairs. The table is
+// organized into "blocks" for RVAs with common high-order bits (12-31). Each
+// block consists of a list (even length) of 2-byte "units". Each unit stores
+// type (in bits 12-15) and low-order bits (0-11) of an RVA (in bits 0-11). In
+// pseudo-struct:
+// struct Block {
+// uint32_t rva_hi;
+// uint32_t block_size_in_bytes; // 8 + multiple of 4.
+// struct {
+// uint16_t rva_lo:12, type:4; // Little-endian.
+// } units[(block_size_in_bytes - 8) / 2]; // Size must be even.
+// } reloc_table[num_blocks]; // May have padding (type = 0).
+
+// Extracted Win32 reloc Unit data.
+struct RelocUnitWin32 {
+ RelocUnitWin32();
+ RelocUnitWin32(uint8_t type_in, offset_t location_in, rva_t target_rva_in);
+ friend bool operator==(const RelocUnitWin32& a, const RelocUnitWin32& b);
+
+ uint8_t type;
+ offset_t location;
+ rva_t target_rva;
+};
+
+// A reader that parses Win32 PE relocation data and emits RelocUnitWin32 for
+// each reloc unit that lies strictly inside |[lo, hi)|.
+class RelocRvaReaderWin32 {
+ public:
+ enum : ptrdiff_t { kRelocUnitSize = sizeof(uint16_t) };
+
+ // Parses |image| at |reloc_region| to find beginning offsets of each reloc
+ // block. On success, writes the result to |reloc_block_offsets| and returns
+ // true. Otherwise leaves |reloc_block_offsets| in an undetermined state, and
+ // returns false.
+ static bool FindRelocBlocks(ConstBufferView image,
+ BufferRegion reloc_region,
+ std::vector<offset_t>* reloc_block_offsets);
+
+ // |reloc_block_offsets| should be precomputed from FindRelBlocks().
+ RelocRvaReaderWin32(ConstBufferView image,
+ BufferRegion reloc_region,
+ const std::vector<offset_t>& reloc_block_offsets,
+ offset_t lo,
+ offset_t hi);
+ RelocRvaReaderWin32(RelocRvaReaderWin32&&);
+ ~RelocRvaReaderWin32();
+
+ // Successively visits and returns data for each reloc unit, or absl::nullopt
+ // when all reloc units are found. Encapsulates block transition details.
+ absl::optional<RelocUnitWin32> GetNext();
+
+ private:
+ // Assuming that |block_begin| points to the beginning of a reloc block, loads
+ // |rva_hi_bits_| and assigns |cur_reloc_units_| as the region containing the
+ // associated units, potentially truncated by |end_it_|. Returns true if reloc
+ // data are available for read, and false otherwise.
+ bool LoadRelocBlock(ConstBufferView::const_iterator block_begin);
+
+ const ConstBufferView image_;
+
+ // End iterator.
+ ConstBufferView::const_iterator end_it_;
+
+ // Unit data of the current reloc block.
+ BufferSource cur_reloc_units_;
+
+ // High-order bits (12-31) for all relocs of the current reloc block.
+ rva_t rva_hi_bits_;
+};
+
+// A reader for Win32 reloc References, implemented as a filtering and
+// translation adaptor of RelocRvaReaderWin32.
+class RelocReaderWin32 : public ReferenceReader {
+ public:
+ // Takes ownership of |reloc_rva_reader|. |offset_bound| specifies the
+ // exclusive upper bound of reloc target offsets, taking account of widths of
+ // targets (which are abs32 References).
+ RelocReaderWin32(RelocRvaReaderWin32&& reloc_rva_reader,
+ uint16_t reloc_type,
+ offset_t offset_bound,
+ const AddressTranslator& translator);
+ ~RelocReaderWin32() override;
+
+ // ReferenceReader:
+ absl::optional<Reference> GetNext() override;
+
+ private:
+ RelocRvaReaderWin32 reloc_rva_reader_;
+ const uint16_t reloc_type_; // uint16_t to simplify shifting (<< 12).
+ const offset_t offset_bound_;
+ AddressTranslator::RvaToOffsetCache entry_rva_to_offset_;
+};
+
+// A writer for Win32 reloc References. This is simpler than the reader since:
+// - No iteration is required.
+// - High-order bits of reloc target RVAs are assumed to be handled elsewhere,
+// so only low-order bits need to be written.
+class RelocWriterWin32 : public ReferenceWriter {
+ public:
+ RelocWriterWin32(uint16_t reloc_type,
+ MutableBufferView image,
+ BufferRegion reloc_region,
+ const std::vector<offset_t>& reloc_block_offsets,
+ const AddressTranslator& translator);
+ ~RelocWriterWin32() override;
+
+ // ReferenceWriter:
+ void PutNext(Reference ref) override;
+
+ private:
+ const uint16_t reloc_type_;
+ MutableBufferView image_;
+ BufferRegion reloc_region_;
+ const std::vector<offset_t>& reloc_block_offsets_;
+ AddressTranslator::OffsetToRvaCache target_offset_to_rva_;
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_RELOC_WIN32_H_
diff --git a/reloc_win32_unittest.cc b/reloc_win32_unittest.cc
new file mode 100644
index 0000000..e3d33ca
--- /dev/null
+++ b/reloc_win32_unittest.cc
@@ -0,0 +1,251 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/reloc_win32.h"
+
+#include <stdint.h>
+
+#include <algorithm>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "base/numerics/safe_conversions.h"
+#include "base/test/gtest_util.h"
+#include "components/zucchini/address_translator.h"
+#include "components/zucchini/algorithm.h"
+#include "components/zucchini/image_utils.h"
+#include "components/zucchini/test_utils.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+class RelocUtilsWin32Test : public testing::Test {
+ protected:
+ using Units = std::vector<RelocUnitWin32>;
+
+ RelocUtilsWin32Test() {}
+
+ // Resets all tester data, calls RelocRvaReaderWin32::FindRelocBlocks(), and
+ // returns its results.
+ bool Initialize(const std::vector<uint8_t>& image_raw,
+ BufferRegion reloc_region) {
+ image_ = BufferSource(image_raw.data(), image_raw.size());
+ reloc_region_ = reloc_region;
+ return RelocRvaReaderWin32::FindRelocBlocks(image_, reloc_region_,
+ &reloc_block_offsets_);
+ }
+
+ // Uses RelocRvaReaderWin32 to get all relocs, returned as Units.
+ Units EmitAll(offset_t lo, offset_t hi) {
+ RelocRvaReaderWin32 reader(image_, reloc_region_, reloc_block_offsets_, lo,
+ hi);
+ Units units;
+ for (auto unit = reader.GetNext(); unit.has_value();
+ unit = reader.GetNext()) {
+ units.push_back(unit.value());
+ }
+ return units;
+ }
+
+ ConstBufferView image_;
+ BufferRegion reloc_region_;
+ std::vector<uint32_t> reloc_block_offsets_;
+};
+
+TEST_F(RelocUtilsWin32Test, RvaReaderEmpty) {
+ {
+ std::vector<uint8_t> image_raw = ParseHexString("");
+ EXPECT_TRUE(Initialize(image_raw, {0U, 0U}));
+ EXPECT_EQ(std::vector<uint32_t>(), reloc_block_offsets_); // Nothing.
+ EXPECT_EQ(Units(), EmitAll(0U, 0U));
+ }
+ {
+ std::vector<uint8_t> image_raw = ParseHexString("AA BB CC DD EE FF");
+ EXPECT_TRUE(Initialize(image_raw, {2U, 0U}));
+ EXPECT_EQ(std::vector<uint32_t>(), reloc_block_offsets_); // Nothing.
+ EXPECT_EQ(Units(), EmitAll(2U, 2U));
+ }
+ {
+ std::vector<uint8_t> image_raw = ParseHexString("00 C0 00 00 08 00 00 00");
+ EXPECT_TRUE(Initialize(image_raw, {0U, image_raw.size()}));
+ EXPECT_EQ(std::vector<uint32_t>({0U}),
+ reloc_block_offsets_); // Empty block.
+ EXPECT_EQ(Units(), EmitAll(0U, 8U));
+ }
+}
+
+TEST_F(RelocUtilsWin32Test, RvaReaderBad) {
+ std::string test_cases[] = {
+ "00 C0 00 00 07 00 00", // Header too small.
+ "00 C0 00 00 08 00 00", // Header too small, lies about size.
+ "00 C0 00 00 0A 00 00 00 66 31", // Odd number of units.
+ "00 C0 00 00 0C 00 00 00 66 31 88 31 FF", // Trailing data.
+ };
+ for (const std::string& test_case : test_cases) {
+ std::vector<uint8_t> image_raw = ParseHexString(test_case);
+ EXPECT_FALSE(Initialize(image_raw, {0U, image_raw.size()}));
+ }
+}
+
+TEST_F(RelocUtilsWin32Test, RvaReaderSingle) {
+ // Block 0: All type 0x3: {0xC166, 0xC288, 0xC342, (padding) 0xCFFF}.
+ std::vector<uint8_t> image_raw = ParseHexString(
+ "FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF "
+ "00 C0 00 00 10 00 00 00 66 31 88 32 42 33 FF 0F "
+ "FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF");
+ constexpr offset_t kBlock0 = 16U;
+ Units exp0 = {{3, kBlock0 + 8U, 0xC166U},
+ {3, kBlock0 + 10U, 0xC288U},
+ {3, kBlock0 + 12U, 0xC342U},
+ {0, kBlock0 + 14U, 0xCFFFU}};
+
+ EXPECT_TRUE(Initialize(image_raw, {16U, 16U}));
+ EXPECT_EQ(exp0, EmitAll(kBlock0, kBlock0 + 16U));
+ EXPECT_EQ(Units(), EmitAll(kBlock0, kBlock0));
+ EXPECT_EQ(Units(), EmitAll(kBlock0, kBlock0 + 8U));
+ EXPECT_EQ(Units(), EmitAll(kBlock0, kBlock0 + 9U));
+ EXPECT_EQ(Sub(exp0, 0, 1), EmitAll(kBlock0, kBlock0 + 10U));
+ EXPECT_EQ(Sub(exp0, 0, 1), EmitAll(kBlock0 + 8U, kBlock0 + 10U));
+ EXPECT_EQ(Units(), EmitAll(kBlock0 + 9U, kBlock0 + 10U));
+ EXPECT_EQ(Sub(exp0, 0, 3), EmitAll(kBlock0, kBlock0 + 15U));
+ EXPECT_EQ(Sub(exp0, 2, 3), EmitAll(kBlock0 + 11U, kBlock0 + 15U));
+}
+
+TEST_F(RelocUtilsWin32Test, RvaReaderMulti) {
+ // The sample image encodes 3 reloc blocks:
+ // Block 0: All type 0x3: {0xC166, 0xC288, 0xC344, (padding) 0xCFFF}.
+ // Block 1: All type 0x3: {0x12166, 0x12288}.
+ // Block 2: All type 0xA: {0x24000, 0x24010, 0x24020, 0x24028, 0x24A3C,
+ // 0x24170}.
+ std::vector<uint8_t> image_raw = ParseHexString(
+ "FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF "
+ "00 C0 00 00 10 00 00 00 66 31 88 32 42 33 FF 0F "
+ "00 20 01 00 0C 00 00 00 66 31 88 32 "
+ "00 40 02 00 14 00 00 00 00 A0 10 A0 20 A0 28 A0 3C A0 70 A1 "
+ "FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF");
+ offset_t image_size = base::checked_cast<offset_t>(image_raw.size());
+ constexpr offset_t kBlock0 = 16U;
+ constexpr offset_t kBlock1 = kBlock0 + 16U;
+ constexpr offset_t kBlock2 = kBlock1 + 12U;
+ constexpr offset_t kBlockEnd = kBlock2 + 20U;
+ Units exp0 = {{3, kBlock0 + 8U, 0xC166U},
+ {3, kBlock0 + 10U, 0xC288U},
+ {3, kBlock0 + 12U, 0xC342U},
+ {0, kBlock0 + 14U, 0xCFFFU}};
+ Units exp1 = {{3, kBlock0 + 24U, 0x12166U}, {3, kBlock0 + 26U, 0x12288U}};
+ Units exp2 = {{10, kBlock0 + 36U, 0x24000U}, {10, kBlock0 + 38U, 0x24010U},
+ {10, kBlock0 + 40U, 0x24020U}, {10, kBlock0 + 42U, 0x24028U},
+ {10, kBlock0 + 44U, 0x2403CU}, {10, kBlock0 + 46U, 0x24170U}};
+
+ EXPECT_TRUE(Initialize(image_raw, {kBlock0, kBlockEnd - kBlock0}));
+ EXPECT_EQ(std::vector<uint32_t>({kBlock0, kBlock1, kBlock2}),
+ reloc_block_offsets_);
+
+ // Everything.
+ EXPECT_EQ(Cat(Cat(exp0, exp1), exp2), EmitAll(kBlock0, kBlockEnd));
+ EXPECT_EQ(Cat(Cat(exp0, exp1), exp2), EmitAll(0, image_size));
+ // Entire blocks.
+ EXPECT_EQ(exp0, EmitAll(kBlock0, kBlock1));
+ EXPECT_EQ(exp1, EmitAll(kBlock1, kBlock2));
+ EXPECT_EQ(exp2, EmitAll(kBlock2, kBlockEnd));
+ EXPECT_EQ(Units(), EmitAll(0, kBlock0));
+ EXPECT_EQ(Units(), EmitAll(kBlockEnd, image_size));
+ // Within blocks, clipped at boundaries.
+ EXPECT_EQ(exp0, EmitAll(kBlock0 + 5U, kBlock1));
+ EXPECT_EQ(exp0, EmitAll(kBlock0 + 8U, kBlock1));
+ EXPECT_EQ(Sub(exp0, 1, 4), EmitAll(kBlock0 + 9U, kBlock1));
+ EXPECT_EQ(Sub(exp0, 0, 3), EmitAll(kBlock0, kBlock0 + 15U));
+ EXPECT_EQ(Sub(exp0, 0, 3), EmitAll(kBlock0, kBlock0 + 14U));
+ EXPECT_EQ(Sub(exp0, 0, 1), EmitAll(kBlock0 + 8U, kBlock0 + 10U));
+ EXPECT_EQ(Sub(exp1, 1, 2), EmitAll(kBlock1 + 10U, kBlock1 + 12U));
+ EXPECT_EQ(Sub(exp2, 2, 4), EmitAll(kBlock2 + 12U, kBlock2 + 16U));
+ EXPECT_EQ(Units(), EmitAll(kBlock0, kBlock0));
+ EXPECT_EQ(Units(), EmitAll(kBlock0, kBlock0 + 8U));
+ EXPECT_EQ(Units(), EmitAll(kBlock2 + 10U, kBlock2 + 11U));
+ EXPECT_EQ(Units(), EmitAll(kBlock2 + 11U, kBlock2 + 12U));
+ // Across blocks.
+ EXPECT_EQ(Cat(Cat(exp0, exp1), exp2), EmitAll(kBlock0 - 5U, kBlockEnd));
+ EXPECT_EQ(Cat(Cat(exp0, exp1), exp2), EmitAll(kBlock0 + 6U, kBlockEnd));
+ EXPECT_EQ(Cat(Cat(exp0, exp1), Sub(exp2, 0, 5)),
+ EmitAll(kBlock0 + 6U, kBlock2 + 18U));
+ EXPECT_EQ(Cat(Sub(exp0, 2, 4), Sub(exp1, 0, 1)),
+ EmitAll(kBlock0 + 12U, kBlock1 + 10U));
+ EXPECT_EQ(Cat(Sub(exp0, 2, 4), Sub(exp1, 0, 1)),
+ EmitAll(kBlock0 + 11U, kBlock1 + 10U));
+ EXPECT_EQ(Cat(Sub(exp0, 2, 4), Sub(exp1, 0, 1)),
+ EmitAll(kBlock0 + 12U, kBlock1 + 11U));
+ EXPECT_EQ(Sub(exp1, 1, 2), EmitAll(kBlock1 + 10U, kBlock2 + 5U));
+ EXPECT_EQ(Cat(Sub(exp1, 1, 2), exp2), EmitAll(kBlock1 + 10U, kBlockEnd + 5));
+ EXPECT_EQ(Units(), EmitAll(kBlock0 + 15, kBlock1 + 9));
+}
+
+TEST_F(RelocUtilsWin32Test, ReadWrite) {
+ // Set up mock image: Size = 0x3000, .reloc at 0x600. RVA is 0x40000 + offset.
+ constexpr rva_t kBaseRva = 0x40000;
+ std::vector<uint8_t> image_data(0x3000, 0xFF);
+ // 4 x86 relocs (xx 3x), 3 x64 relocs (xx Ax), 1 padding (xx 0X).
+ std::vector<uint8_t> reloc_data = ParseHexString(
+ "00 10 04 00 10 00 00 00 C0 32 18 A3 F8 A7 FF 0F "
+ "00 20 04 00 10 00 00 00 80 A0 65 31 F8 37 BC 3A");
+ reloc_region_ = {0x600, reloc_data.size()};
+ std::copy(reloc_data.begin(), reloc_data.end(),
+ image_data.begin() + reloc_region_.lo());
+ image_ = {image_data.data(), image_data.size()};
+ offset_t image_size = base::checked_cast<offset_t>(image_.size());
+
+ AddressTranslator translator;
+ translator.Initialize({{0, image_size, kBaseRva, image_size}});
+
+ // Precompute |reloc_block_offsets_|.
+ EXPECT_TRUE(RelocRvaReaderWin32::FindRelocBlocks(image_, reloc_region_,
+ &reloc_block_offsets_));
+ EXPECT_EQ(std::vector<uint32_t>({0x600U, 0x610U}), reloc_block_offsets_);
+
+ // Focus on x86.
+ constexpr uint16_t kRelocTypeX86 = 3;
+ constexpr offset_t kVAWidthX86 = 4;
+
+ // Make RelocRvaReaderWin32.
+ RelocRvaReaderWin32 reloc_rva_reader(image_, reloc_region_,
+ reloc_block_offsets_, 0, image_size);
+ offset_t offset_bound = image_size - kVAWidthX86 + 1;
+
+ // Make RelocReaderWin32 that wraps |reloc_rva_reader|.
+ auto reader = std::make_unique<RelocReaderWin32>(
+ std::move(reloc_rva_reader), kRelocTypeX86, offset_bound, translator);
+
+ // Read all references and check.
+ std::vector<Reference> refs;
+ for (absl::optional<Reference> ref = reader->GetNext(); ref.has_value();
+ ref = reader->GetNext()) {
+ refs.push_back(ref.value());
+ }
+ std::vector<Reference> exp_refs{
+ {0x608, 0x12C0}, {0x61A, 0x2165}, {0x61C, 0x27F8}, {0x61E, 0x2ABC}};
+ EXPECT_EQ(exp_refs, refs);
+
+ // Write reference, extract bytes and check.
+ MutableBufferView mutable_image(&image_data[0], image_data.size());
+ auto writer = std::make_unique<RelocWriterWin32>(
+ kRelocTypeX86, mutable_image, reloc_region_, reloc_block_offsets_,
+ translator);
+
+ writer->PutNext({0x608, 0x1F83});
+ std::vector<uint8_t> exp_reloc_data1 = ParseHexString(
+ "00 10 04 00 10 00 00 00 83 3F 18 A3 F8 A7 FF 0F "
+ "00 20 04 00 10 00 00 00 80 A0 65 31 F8 37 BC 3A");
+ EXPECT_EQ(exp_reloc_data1,
+ Sub(image_data, reloc_region_.lo(), reloc_region_.hi()));
+
+ writer->PutNext({0x61C, 0x2950});
+ std::vector<uint8_t> exp_reloc_data2 = ParseHexString(
+ "00 10 04 00 10 00 00 00 83 3F 18 A3 F8 A7 FF 0F "
+ "00 20 04 00 10 00 00 00 80 A0 65 31 50 39 BC 3A");
+ EXPECT_EQ(exp_reloc_data2,
+ Sub(image_data, reloc_region_.lo(), reloc_region_.hi()));
+}
+
+} // namespace zucchini
diff --git a/suffix_array.h b/suffix_array.h
new file mode 100644
index 0000000..75b3a38
--- /dev/null
+++ b/suffix_array.h
@@ -0,0 +1,475 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_SUFFIX_ARRAY_H_
+#define COMPONENTS_ZUCCHINI_SUFFIX_ARRAY_H_
+
+#include <algorithm>
+#include <iterator>
+#include <numeric>
+#include <vector>
+
+#include "base/check.h"
+
+namespace zucchini {
+
+// A functor class that implements the naive suffix sorting algorithm that uses
+// std::sort with lexicographical compare. This is only meant as reference of
+// the interface.
+class NaiveSuffixSort {
+ public:
+ // Type requirements:
+ // |InputRng| is an input random access range.
+ // |KeyType| is an unsigned integer type.
+ // |SAIt| is a random access iterator with mutable references.
+ template <class InputRng, class KeyType, class SAIt>
+ // |str| is the input string on which suffix sort is applied.
+ // Characters found in |str| must be in the range [0, |key_bound|)
+ // |suffix_array| is the beginning of the destination range, which is at least
+ // as large as |str|.
+ void operator()(const InputRng& str,
+ KeyType key_bound,
+ SAIt suffix_array) const {
+ using size_type = typename SAIt::value_type;
+
+ size_type n = static_cast<size_type>(std::end(str) - std::begin(str));
+
+ // |suffix_array| is first filled with ordered indices of |str|.
+ // Those indices are then sorted with lexicographical comparisons in |str|.
+ std::iota(suffix_array, suffix_array + n, 0);
+ std::sort(suffix_array, suffix_array + n, [&str](size_type i, size_type j) {
+ return std::lexicographical_compare(std::begin(str) + i, std::end(str),
+ std::begin(str) + j, std::end(str));
+ });
+ }
+};
+
+// A functor class that implements suffix array induced sorting (SA-IS)
+// algorithm with linear time and memory complexity,
+// see http://ieeexplore.ieee.org/abstract/document/5582081/
+class InducedSuffixSort {
+ public:
+ // Type requirements:
+ // |InputRng| is an input random access range.
+ // |KeyType| is an unsigned integer type.
+ // |SAIt| is a random access iterator with mutable values.
+ template <class InputRng, class KeyType, class SAIt>
+ // |str| is the input string on which suffix sort is applied.
+ // Characters found in |str| must be in the range [0, |key_bound|)
+ // |suffix_array| is the beginning of the destination range, which is at least
+ // as large as |str|.
+ void operator()(const InputRng& str,
+ KeyType key_bound,
+ SAIt suffix_array) const {
+ using value_type = typename InputRng::value_type;
+ using size_type = typename SAIt::value_type;
+
+ static_assert(std::is_unsigned<value_type>::value,
+ "SA-IS only supports input string with unsigned values");
+ static_assert(std::is_unsigned<KeyType>::value, "KeyType must be unsigned");
+
+ size_type n = static_cast<size_type>(std::end(str) - std::begin(str));
+
+ Implementation<size_type, KeyType>::SuffixSort(std::begin(str), n,
+ key_bound, suffix_array);
+ }
+
+ // Given string S of length n. We assume S is terminated by a unique sentinel
+ // $, which is considered as the smallest character. This sentinel does not
+ // exist in memory and is only treated implicitly, hence |n| does not count
+ // the sentinel in this implementation. We denote suf(S,i) the suffix formed
+ // by S[i..n).
+
+ // A suffix suf(S,i) is said to be S-type or L-type, if suf(S,i) < suf(S,i+1)
+ // or suf(S,i) > suf(S,i+1), respectively.
+ enum SLType : bool { SType, LType };
+
+ // A character S[i] is said to be S-type or L-type if the suffix suf(S,i) is
+ // S-type or L-type, respectively.
+
+ // A character S[i] is called LMS (leftmost S-type), if S[i] is S-type and
+ // S[i-1] is L-type. A suffix suf(S,i) is called LMS, if S[i] is an LMS
+ // character.
+
+ // A substring S[i..j) is an LMS-substring if
+ // (1) S[i] is LMS, S[j] is LMS or the sentinel $, and S[i..j) has no other
+ // LMS characters, or
+ // (2) S[i..j) is the sentinel $.
+
+ template <class SizeType, class KeyType>
+ struct Implementation {
+ static_assert(std::is_unsigned<SizeType>::value,
+ "SizeType must be unsigned");
+ static_assert(std::is_unsigned<KeyType>::value, "KeyType must be unsigned");
+ using size_type = SizeType;
+ using key_type = KeyType;
+
+ using iterator = typename std::vector<size_type>::iterator;
+ using const_iterator = typename std::vector<size_type>::const_iterator;
+
+ // Partition every suffix based on SL-type. Returns the number of LMS
+ // suffixes.
+ template <class StrIt>
+ static size_type BuildSLPartition(
+ StrIt str,
+ size_type length,
+ key_type key_bound,
+ std::vector<SLType>::reverse_iterator sl_partition_it) {
+ // We will count LMS suffixes (S to L-type or last S-type).
+ size_type lms_count = 0;
+
+ // |previous_type| is initialized to L-type to avoid counting an extra
+ // LMS suffix at the end
+ SLType previous_type = LType;
+
+ // Initialized to dummy, impossible key.
+ key_type previous_key = key_bound;
+
+ // We're travelling backward to determine the partition,
+ // as if we prepend one character at a time to the string, ex:
+ // b$ is L-type because b > $.
+ // ab$ is S-type because a < b, implying ab$ < b$.
+ // bab$ is L-type because b > a, implying bab$ > ab$.
+ // bbab$ is L-type, because bab$ was also L-type, implying bbab$ > bab$.
+ for (auto str_it = std::reverse_iterator<StrIt>(str + length);
+ str_it != std::reverse_iterator<StrIt>(str);
+ ++str_it, ++sl_partition_it) {
+ key_type current_key = *str_it;
+
+ if (current_key > previous_key || previous_key == key_bound) {
+ // S[i] > S[i + 1] or S[i] is last character.
+ if (previous_type == SType)
+ // suf(S,i) is L-type and suf(S,i + 1) is S-type, therefore,
+ // suf(S,i+1) was a LMS suffix.
+ ++lms_count;
+
+ previous_type = LType; // For next round.
+ } else if (current_key < previous_key) {
+ // S[i] < S[i + 1]
+ previous_type = SType; // For next round.
+ }
+ // Else, S[i] == S[i + 1]:
+ // The next character that differs determines the SL-type,
+ // so we reuse the last seen type.
+
+ *sl_partition_it = previous_type;
+ previous_key = current_key; // For next round.
+ }
+
+ return lms_count;
+ }
+
+ // Find indices of LMS suffixes and write result to |lms_indices|.
+ static void FindLmsSuffixes(const std::vector<SLType>& sl_partition,
+ iterator lms_indices) {
+ // |previous_type| is initialized to S-type to avoid counting an extra
+ // LMS suffix at the beginning
+ SLType previous_type = SType;
+ for (size_type i = 0; i < sl_partition.size(); ++i) {
+ if (sl_partition[i] == SType && previous_type == LType)
+ *lms_indices++ = i;
+ previous_type = sl_partition[i];
+ }
+ }
+
+ template <class StrIt>
+ static std::vector<size_type> MakeBucketCount(StrIt str,
+ size_type length,
+ key_type key_bound) {
+ // Occurrence of every unique character is counted in |buckets|
+ std::vector<size_type> buckets(static_cast<size_type>(key_bound));
+
+ for (auto it = str; it != str + length; ++it)
+ ++buckets[*it];
+ return buckets;
+ }
+
+ // Apply induced sort from |lms_indices| to |suffix_array| associated with
+ // the string |str|.
+ template <class StrIt, class SAIt>
+ static void InducedSort(StrIt str,
+ size_type length,
+ const std::vector<SLType>& sl_partition,
+ const std::vector<size_type>& lms_indices,
+ const std::vector<size_type>& buckets,
+ SAIt suffix_array) {
+ // All indices are first marked as unset with the illegal value |length|.
+ std::fill(suffix_array, suffix_array + length, length);
+
+ // Used to mark bucket boundaries (head or end) as indices in str.
+ DCHECK(!buckets.empty());
+ std::vector<size_type> bucket_bounds(buckets.size());
+
+ // Step 1: Assign indices for LMS suffixes, populating the end of
+ // respective buckets but keeping relative order.
+
+ // Find the end of each bucket and write it to |bucket_bounds|.
+ std::partial_sum(buckets.begin(), buckets.end(), bucket_bounds.begin());
+
+ // Process each |lms_indices| backward, and assign them to the end of
+ // their respective buckets, so relative order is preserved.
+ for (auto it = lms_indices.crbegin(); it != lms_indices.crend(); ++it) {
+ key_type key = str[*it];
+ suffix_array[--bucket_bounds[key]] = *it;
+ }
+
+ // Step 2
+ // Scan forward |suffix_array|; for each modified suf(S,i) for which
+ // suf(S,SA(i) - 1) is L-type, place suf(S,SA(i) - 1) to the current
+ // head of the corresponding bucket and forward the bucket head to the
+ // right.
+
+ // Find the head of each bucket and write it to |bucket_bounds|. Since
+ // only LMS suffixes where inserted in |suffix_array| during Step 1,
+ // |bucket_bounds| does not contains the head of each bucket and needs to
+ // be updated.
+ bucket_bounds[0] = 0;
+ std::partial_sum(buckets.begin(), buckets.end() - 1,
+ bucket_bounds.begin() + 1);
+
+ // From Step 1, the sentinel $, which we treat implicitly, would have
+ // been placed at the beginning of |suffix_array|, since $ is always
+ // considered as the smallest character. We then have to deal with the
+ // previous (last) suffix.
+ if (sl_partition[length - 1] == LType) {
+ key_type key = str[length - 1];
+ suffix_array[bucket_bounds[key]++] = length - 1;
+ }
+ for (auto it = suffix_array; it != suffix_array + length; ++it) {
+ size_type suffix_index = *it;
+
+ // While the original algorithm marks unset suffixes with -1,
+ // we found that marking them with |length| is also possible and more
+ // convenient because we are working with unsigned integers.
+ if (suffix_index != length && suffix_index > 0 &&
+ sl_partition[--suffix_index] == LType) {
+ key_type key = str[suffix_index];
+ suffix_array[bucket_bounds[key]++] = suffix_index;
+ }
+ }
+
+ // Step 3
+ // Scan backward |suffix_array|; for each modified suf(S, i) for which
+ // suf(S,SA(i) - 1) is S-type, place suf(S,SA(i) - 1) to the current
+ // end of the corresponding bucket and forward the bucket head to the
+ // left.
+
+ // Find the end of each bucket and write it to |bucket_bounds|. Since
+ // only L-type suffixes where inserted in |suffix_array| during Step 2,
+ // |bucket_bounds| does not contain the end of each bucket and needs to
+ // be updated.
+ std::partial_sum(buckets.begin(), buckets.end(), bucket_bounds.begin());
+
+ for (auto it = std::reverse_iterator<SAIt>(suffix_array + length);
+ it != std::reverse_iterator<SAIt>(suffix_array); ++it) {
+ size_type suffix_index = *it;
+ if (suffix_index != length && suffix_index > 0 &&
+ sl_partition[--suffix_index] == SType) {
+ key_type key = str[suffix_index];
+ suffix_array[--bucket_bounds[key]] = suffix_index;
+ }
+ }
+ // Deals with the last suffix, because of the sentinel.
+ if (sl_partition[length - 1] == SType) {
+ key_type key = str[length - 1];
+ suffix_array[--bucket_bounds[key]] = length - 1;
+ }
+ }
+
+ // Given a string S starting at |str| with length |length|, an array
+ // starting at |substring_array| containing lexicographically ordered LMS
+ // terminated substring indices of S and an SL-Type partition |sl_partition|
+ // of S, assigns a unique label to every unique LMS substring. The sorted
+ // labels for all LMS substrings are written to |lms_str|, while the indices
+ // of LMS suffixes are written to |lms_indices|. In addition, returns the
+ // total number of unique labels.
+ template <class StrIt, class SAIt>
+ static size_type LabelLmsSubstrings(StrIt str,
+ size_type length,
+ const std::vector<SLType>& sl_partition,
+ SAIt suffix_array,
+ iterator lms_indices,
+ iterator lms_str) {
+ // Labelling starts at 0.
+ size_type label = 0;
+
+ // |previous_lms| is initialized to 0 to indicate it is unset.
+ // Note that suf(S,0) is never a LMS suffix. Substrings will be visited in
+ // lexicographical order.
+ size_type previous_lms = 0;
+ for (auto it = suffix_array; it != suffix_array + length; ++it) {
+ if (*it > 0 && sl_partition[*it] == SType &&
+ sl_partition[*it - 1] == LType) {
+ // suf(S, *it) is a LMS suffix.
+
+ size_type current_lms = *it;
+ if (previous_lms != 0) {
+ // There was a previous LMS suffix. Check if the current LMS
+ // substring is equal to the previous one.
+ SLType current_lms_type = SType;
+ SLType previous_lms_type = SType;
+ for (size_type k = 0;; ++k) {
+ // |current_lms_end| and |previous_lms_end| denote whether we have
+ // reached the end of the current and previous LMS substring,
+ // respectively
+ bool current_lms_end = false;
+ bool previous_lms_end = false;
+
+ // Check for both previous and current substring ends.
+ // Note that it is more convenient to check if
+ // suf(S,current_lms + k) is an LMS suffix than to retrieve it
+ // from lms_indices.
+ if (current_lms + k >= length ||
+ (current_lms_type == LType &&
+ sl_partition[current_lms + k] == SType)) {
+ current_lms_end = true;
+ }
+ if (previous_lms + k >= length ||
+ (previous_lms_type == LType &&
+ sl_partition[previous_lms + k] == SType)) {
+ previous_lms_end = true;
+ }
+
+ if (current_lms_end && previous_lms_end) {
+ break; // Previous and current substrings are identical.
+ } else if (current_lms_end != previous_lms_end ||
+ str[current_lms + k] != str[previous_lms + k]) {
+ // Previous and current substrings differ, a new label is used.
+ ++label;
+ break;
+ }
+
+ current_lms_type = sl_partition[current_lms + k];
+ previous_lms_type = sl_partition[previous_lms + k];
+ }
+ }
+ *lms_indices++ = *it;
+ *lms_str++ = label;
+ previous_lms = current_lms;
+ }
+ }
+
+ return label + 1;
+ }
+
+ // Implementation of the SA-IS algorithm. |str| must be a random access
+ // iterator pointing at the beginning of S with length |length|. The result
+ // is writtend in |suffix_array|, a random access iterator.
+ template <class StrIt, class SAIt>
+ static void SuffixSort(StrIt str,
+ size_type length,
+ key_type key_bound,
+ SAIt suffix_array) {
+ if (length == 1)
+ *suffix_array = 0;
+ if (length < 2)
+ return;
+
+ std::vector<SLType> sl_partition(length);
+ size_type lms_count =
+ BuildSLPartition(str, length, key_bound, sl_partition.rbegin());
+ std::vector<size_type> lms_indices(lms_count);
+ FindLmsSuffixes(sl_partition, lms_indices.begin());
+ std::vector<size_type> buckets = MakeBucketCount(str, length, key_bound);
+
+ if (lms_indices.size() > 1) {
+ // Given |lms_indices| in the same order they appear in |str|, induce
+ // LMS substrings relative order and write result to |suffix_array|.
+ InducedSort(str, length, sl_partition, lms_indices, buckets,
+ suffix_array);
+ std::vector<size_type> lms_str(lms_indices.size());
+
+ // Given LMS substrings in relative order found in |suffix_array|,
+ // map LMS substrings to unique labels to form a new string, |lms_str|.
+ size_type label_count =
+ LabelLmsSubstrings(str, length, sl_partition, suffix_array,
+ lms_indices.begin(), lms_str.begin());
+
+ if (label_count < lms_str.size()) {
+ // Reorder |lms_str| to have LMS suffixes in the same order they
+ // appear in |str|.
+ for (size_type i = 0; i < lms_indices.size(); ++i)
+ suffix_array[lms_indices[i]] = lms_str[i];
+
+ SLType previous_type = SType;
+ for (size_type i = 0, j = 0; i < sl_partition.size(); ++i) {
+ if (sl_partition[i] == SType && previous_type == LType) {
+ lms_str[j] = suffix_array[i];
+ lms_indices[j++] = i;
+ }
+ previous_type = sl_partition[i];
+ }
+
+ // Recursively apply SuffixSort on |lms_str|, which is formed from
+ // labeled LMS suffixes in the same order they appear in |str|.
+ // Note that |KeyType| will be size_type because |lms_str| contains
+ // indices. |lms_str| is at most half the length of |str|.
+ Implementation<size_type, size_type>::SuffixSort(
+ lms_str.begin(), static_cast<size_type>(lms_str.size()),
+ label_count, suffix_array);
+
+ // Map LMS labels back to indices in |str| and write result to
+ // |lms_indices|. We're using |suffix_array| as a temporary buffer.
+ for (size_type i = 0; i < lms_indices.size(); ++i)
+ suffix_array[i] = lms_indices[suffix_array[i]];
+ std::copy_n(suffix_array, lms_indices.size(), lms_indices.begin());
+
+ // At this point, |lms_indices| contains sorted LMS suffixes of |str|.
+ }
+ }
+ // Given |lms_indices| where LMS suffixes are sorted, induce the full
+ // order of suffixes in |str|.
+ InducedSort(str, length, sl_partition, lms_indices, buckets,
+ suffix_array);
+ }
+
+ Implementation() = delete;
+ Implementation(const Implementation&) = delete;
+ const Implementation& operator=(const Implementation&) = delete;
+ };
+};
+
+// Generates a sorted suffix array for the input string |str| using the functor
+// |Algorithm| which provides an interface equivalent to NaiveSuffixSort.
+/// Characters found in |str| are assumed to be in range [0, |key_bound|).
+// Returns the suffix array as a vector.
+// |StrRng| is an input random access range.
+// |KeyType| is an unsigned integer type.
+template <class Algorithm, class StrRng, class KeyType>
+std::vector<typename StrRng::size_type> MakeSuffixArray(const StrRng& str,
+ KeyType key_bound) {
+ Algorithm sort;
+ std::vector<typename StrRng::size_type> suffix_array(str.end() - str.begin());
+ sort(str, key_bound, suffix_array.begin());
+ return suffix_array;
+}
+
+// Type requirements:
+// |SARng| is an input random access range.
+// |StrIt1| is a random access iterator.
+// |StrIt2| is a forward iterator.
+template <class SARng, class StrIt1, class StrIt2>
+// Lexicographical lower bound using binary search for
+// [|str2_first|, |str2_last|) in the suffix array |suffix_array| of a string
+// starting at |str1_first|. This does not necessarily return the index of
+// the longest matching substring.
+auto SuffixLowerBound(const SARng& suffix_array,
+ StrIt1 str1_first,
+ StrIt2 str2_first,
+ StrIt2 str2_last) -> decltype(std::begin(suffix_array)) {
+ using size_type = typename SARng::value_type;
+
+ size_t n = std::end(suffix_array) - std::begin(suffix_array);
+ auto it = std::lower_bound(
+ std::begin(suffix_array), std::end(suffix_array), str2_first,
+ [str1_first, str2_last, n](size_type a, StrIt2 b) {
+ return std::lexicographical_compare(str1_first + a, str1_first + n, b,
+ str2_last);
+ });
+ return it;
+}
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_SUFFIX_ARRAY_H_
diff --git a/suffix_array_unittest.cc b/suffix_array_unittest.cc
new file mode 100644
index 0000000..69fca94
--- /dev/null
+++ b/suffix_array_unittest.cc
@@ -0,0 +1,342 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/suffix_array.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <initializer_list>
+#include <string>
+#include <vector>
+
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+namespace {
+
+using SLType = InducedSuffixSort::SLType;
+
+} // namespace
+
+using ustring = std::basic_string<unsigned char>;
+
+constexpr uint16_t kNumChar = 256;
+
+ustring MakeUnsignedString(const std::string& str) {
+ return {str.begin(), str.end()};
+}
+
+template <class T>
+std::vector<T> MakeVector(const std::initializer_list<T>& ilist) {
+ return {ilist.begin(), ilist.end()};
+}
+
+void TestSlPartition(std::initializer_list<SLType> expected_sl_partition,
+ std::initializer_list<size_t> expected_lms_indices,
+ std::string str) {
+ using SaisImpl = InducedSuffixSort::Implementation<size_t, uint16_t>;
+
+ std::vector<SLType> sl_partition(str.size());
+ EXPECT_EQ(expected_lms_indices.size(),
+ SaisImpl::BuildSLPartition(str.begin(), str.size(), kNumChar,
+ sl_partition.rbegin()));
+ EXPECT_EQ(MakeVector(expected_sl_partition), sl_partition);
+
+ std::vector<size_t> lms_indices(expected_lms_indices.size());
+ SaisImpl::FindLmsSuffixes(expected_sl_partition, lms_indices.begin());
+ EXPECT_EQ(MakeVector(expected_lms_indices), lms_indices);
+}
+
+TEST(InducedSuffixSortTest, BuildSLPartition) {
+ TestSlPartition({}, {}, "");
+ TestSlPartition(
+ {
+ SLType::LType,
+ },
+ {}, "a");
+ TestSlPartition(
+ {
+ SLType::LType,
+ SLType::LType,
+ },
+ {}, "ba");
+ TestSlPartition(
+ {
+ SLType::SType,
+ SLType::LType,
+ },
+ {}, "ab");
+ TestSlPartition(
+ {
+ SLType::SType,
+ SLType::SType,
+ SLType::LType,
+ },
+ {}, "aab");
+ TestSlPartition(
+ {
+ SLType::LType,
+ SLType::LType,
+ SLType::LType,
+ },
+ {}, "bba");
+ TestSlPartition(
+ {
+ SLType::LType,
+ SLType::SType,
+ SLType::LType,
+ },
+ {1}, "bab");
+ TestSlPartition(
+ {
+ SLType::LType,
+ SLType::SType,
+ SLType::SType,
+ SLType::LType,
+ },
+ {1}, "baab");
+
+ TestSlPartition(
+ {
+ SLType::LType, // zucchini
+ SLType::LType, // ucchini
+ SLType::SType, // cchini
+ SLType::SType, // chini
+ SLType::SType, // hini
+ SLType::SType, // ini
+ SLType::LType, // ni
+ SLType::LType, // i
+ },
+ {2}, "zucchini");
+}
+
+std::vector<size_t> BucketCount(const std::initializer_list<unsigned char> str,
+ uint16_t max_key) {
+ using SaisImpl = InducedSuffixSort::Implementation<size_t, uint16_t>;
+ return SaisImpl::MakeBucketCount(str.begin(), str.size(), max_key);
+}
+
+TEST(InducedSuffixSortTest, BucketCount) {
+ using vec = std::vector<size_t>;
+
+ EXPECT_EQ(vec({0, 0, 0, 0}), BucketCount({}, 4));
+ EXPECT_EQ(vec({1, 0, 0, 0}), BucketCount({0}, 4));
+ EXPECT_EQ(vec({0, 2, 0, 1}), BucketCount({1, 1, 3}, 4));
+}
+
+std::vector<size_t> InducedSortSubstring(ustring str) {
+ using SaisImpl = InducedSuffixSort::Implementation<size_t, uint16_t>;
+ std::vector<SLType> sl_partition(str.size());
+ size_t lms_count = SaisImpl::BuildSLPartition(
+ str.begin(), str.size(), kNumChar, sl_partition.rbegin());
+ std::vector<size_t> lms_indices(lms_count);
+ SaisImpl::FindLmsSuffixes(sl_partition, lms_indices.begin());
+ auto buckets = SaisImpl::MakeBucketCount(str.begin(), str.size(), kNumChar);
+
+ std::vector<size_t> suffix_array(str.size());
+ SaisImpl::InducedSort(str, str.size(), sl_partition, lms_indices, buckets,
+ suffix_array.begin());
+
+ return suffix_array;
+}
+
+TEST(InducedSuffixSortTest, InducedSortSubstring) {
+ using vec = std::vector<size_t>;
+
+ auto us = MakeUnsignedString;
+
+ // L; a$
+ EXPECT_EQ(vec({0}), InducedSortSubstring(us("a")));
+
+ // SL; ab$, b$
+ EXPECT_EQ(vec({0, 1}), InducedSortSubstring(us("ab")));
+
+ // LL; a$, ba$
+ EXPECT_EQ(vec({1, 0}), InducedSortSubstring(us("ba")));
+
+ // SLL; a$, aba$, ba$
+ EXPECT_EQ(vec({2, 0, 1}), InducedSortSubstring(us("aba")));
+
+ // LSL; ab$, b$, ba
+ EXPECT_EQ(vec({1, 2, 0}), InducedSortSubstring(us("bab")));
+
+ // SSL; aab$, ab$, b$
+ EXPECT_EQ(vec({0, 1, 2}), InducedSortSubstring(us("aab")));
+
+ // LSSL; aab$, ab$, b$, ba
+ EXPECT_EQ(vec({1, 2, 3, 0}), InducedSortSubstring(us("baab")));
+}
+
+template <class Algorithm>
+void TestSuffixSort(ustring test_str) {
+ std::vector<size_t> suffix_array =
+ MakeSuffixArray<Algorithm>(test_str, kNumChar);
+ EXPECT_EQ(test_str.size(), suffix_array.size());
+
+ // Expect that I[] is a permutation of [0, len].
+ std::vector<size_t> sorted_suffix(suffix_array.begin(), suffix_array.end());
+ std::sort(sorted_suffix.begin(), sorted_suffix.end());
+ for (size_t i = 0; i < test_str.size(); ++i)
+ EXPECT_EQ(i, sorted_suffix[i]);
+
+ // Expect that all suffixes are strictly ordered.
+ auto end = test_str.end();
+ for (size_t i = 1; i < test_str.size(); ++i) {
+ auto suf1 = test_str.begin() + suffix_array[i - 1];
+ auto suf2 = test_str.begin() + suffix_array[i];
+ bool is_less = std::lexicographical_compare(suf1, end, suf2, end);
+ EXPECT_TRUE(is_less);
+ }
+}
+
+constexpr const char* test_strs[] = {
+ "",
+ "a",
+ "aa",
+ "za",
+ "CACAO",
+ "aaaaa",
+ "banana",
+ "tobeornottobe",
+ "The quick brown fox jumps over the lazy dog.",
+ "elephantelephantelephantelephantelephant",
+ "walawalawashington",
+ "-------------------------",
+ "011010011001011010010110011010010",
+ "3141592653589793238462643383279502884197169399375105",
+ "\xFF\xFE\xFF\xFE\xFD\x80\x30\x31\x32\x80\x30\xFF\x01\xAB\xCD",
+ "abccbaabccbaabccbaabccbaabccbaabccbaabccbaabccba",
+ "0123456789876543210",
+ "9876543210123456789",
+ "aababcabcdabcdeabcdefabcdefg",
+ "asdhklgalksdjghalksdjghalksdjgh",
+};
+
+TEST(SuffixSortTest, NaiveSuffixSort) {
+ for (const std::string& test_str : test_strs) {
+ TestSuffixSort<NaiveSuffixSort>(MakeUnsignedString(test_str));
+ }
+}
+
+TEST(SuffixSortTest, InducedSuffixSortSort) {
+ for (const std::string& test_str : test_strs) {
+ TestSuffixSort<InducedSuffixSort>(MakeUnsignedString(test_str));
+ }
+}
+
+// Test with sequence that has every character.
+TEST(SuffixSortTest, AllChar) {
+ std::vector<unsigned char> all_char(kNumChar);
+ std::iota(all_char.begin(), all_char.end(), 0);
+
+ {
+ std::vector<size_t> suffix_array =
+ MakeSuffixArray<InducedSuffixSort>(all_char, kNumChar);
+ for (size_t i = 0; i < kNumChar; ++i)
+ EXPECT_EQ(i, suffix_array[i]);
+ }
+
+ std::vector<unsigned char> all_char_reverse(all_char.rbegin(),
+ all_char.rend());
+ {
+ std::vector<size_t> suffix_array =
+ MakeSuffixArray<InducedSuffixSort>(all_char_reverse, kNumChar);
+ for (size_t i = 0; i < kNumChar; ++i)
+ EXPECT_EQ(kNumChar - i - 1, suffix_array[i]);
+ }
+}
+
+void TestSuffixLowerBound(ustring base_str, ustring search_str) {
+ std::vector<size_t> suffix_array =
+ MakeSuffixArray<NaiveSuffixSort>(base_str, kNumChar);
+
+ auto pos = SuffixLowerBound(suffix_array, base_str.begin(),
+ search_str.begin(), search_str.end());
+
+ auto end = base_str.end();
+ if (pos != suffix_array.begin()) {
+ // Previous suffix is less than |search_str|.
+ auto suf = base_str.begin() + pos[-1];
+ bool is_less = std::lexicographical_compare(suf, end, search_str.begin(),
+ search_str.end());
+ EXPECT_TRUE(is_less);
+ }
+ if (pos != suffix_array.end()) {
+ // Current suffix is greater of equal to |search_str|.
+ auto suf = base_str.begin() + *pos;
+ bool is_less = std::lexicographical_compare(suf, end, search_str.begin(),
+ search_str.end());
+ EXPECT_FALSE(is_less);
+ }
+}
+
+TEST(SuffixArrayTest, LowerBound) {
+ auto us = MakeUnsignedString;
+
+ TestSuffixLowerBound(us(""), us(""));
+ TestSuffixLowerBound(us(""), us("a"));
+ TestSuffixLowerBound(us("b"), us(""));
+ TestSuffixLowerBound(us("b"), us("a"));
+ TestSuffixLowerBound(us("b"), us("c"));
+ TestSuffixLowerBound(us("b"), us("bc"));
+ TestSuffixLowerBound(us("aa"), us("a"));
+ TestSuffixLowerBound(us("aa"), us("aa"));
+
+ ustring sentence = us("the quick brown fox jumps over the lazy dog.");
+ // Entire string: exact and unique.
+ TestSuffixLowerBound(sentence, sentence);
+ // Empty string: exact and non-unique.
+ TestSuffixLowerBound(sentence, us(""));
+ // Exact and unique suffix matches.
+ TestSuffixLowerBound(sentence, us("."));
+ TestSuffixLowerBound(sentence, us("the lazy dog."));
+ // Exact and unique non-suffix matches.
+ TestSuffixLowerBound(sentence, us("quick"));
+ TestSuffixLowerBound(sentence, us("the quick"));
+ // Partial and unique matches.
+ TestSuffixLowerBound(sentence, us("fox jumps with the hosps"));
+ TestSuffixLowerBound(sentence, us("xyz"));
+ // Exact and non-unique match: take lexicographical first.
+ TestSuffixLowerBound(sentence, us("the"));
+ TestSuffixLowerBound(sentence, us(" "));
+ // Partial and non-unique match.
+ // query < "the l"... < "the q"...
+ TestSuffixLowerBound(sentence, us("the apple"));
+ // "the l"... < query < "the q"...
+ TestSuffixLowerBound(sentence, us("the opera"));
+ // "the l"... < "the q"... < query
+ TestSuffixLowerBound(sentence, us("the zebra"));
+ // Prefix match dominates suffix match (unique).
+ TestSuffixLowerBound(sentence, us("over quick brown fox"));
+ // Empty matchs.
+ TestSuffixLowerBound(sentence, us(","));
+ TestSuffixLowerBound(sentence, us("1234"));
+ TestSuffixLowerBound(sentence, us("THE QUICK BROWN FOX"));
+ TestSuffixLowerBound(sentence, us("(the"));
+}
+
+TEST(SuffixArrayTest, LowerBoundExact) {
+ for (const std::string& test_str : test_strs) {
+ ustring test_ustr = MakeUnsignedString(test_str);
+
+ std::vector<size_t> suffix_array =
+ MakeSuffixArray<InducedSuffixSort>(test_ustr, kNumChar);
+
+ for (size_t lo = 0; lo < test_str.size(); ++lo) {
+ for (size_t hi = lo + 1; hi <= test_str.size(); ++hi) {
+ ustring query(test_ustr.begin() + lo, test_ustr.begin() + hi);
+ ASSERT_EQ(query.size(), hi - lo);
+ auto pos = SuffixLowerBound(suffix_array, test_ustr.begin(),
+ query.begin(), query.end());
+ EXPECT_TRUE(
+ std::equal(query.begin(), query.end(), test_ustr.begin() + *pos));
+ }
+ }
+ }
+}
+
+} // namespace zucchini
diff --git a/target_pool.cc b/target_pool.cc
new file mode 100644
index 0000000..23551fd
--- /dev/null
+++ b/target_pool.cc
@@ -0,0 +1,84 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/target_pool.h"
+
+#include <algorithm>
+#include <iterator>
+#include <utility>
+
+#include "base/check.h"
+#include "components/zucchini/algorithm.h"
+#include "components/zucchini/equivalence_map.h"
+
+namespace zucchini {
+
+TargetPool::TargetPool() = default;
+
+TargetPool::TargetPool(std::vector<offset_t>&& targets) {
+ DCHECK(targets_.empty());
+ DCHECK(std::is_sorted(targets.begin(), targets.end()));
+ targets_ = std::move(targets);
+}
+
+TargetPool::TargetPool(TargetPool&&) = default;
+TargetPool::TargetPool(const TargetPool&) = default;
+TargetPool::~TargetPool() = default;
+
+void TargetPool::InsertTargets(const std::vector<offset_t>& targets) {
+ std::copy(targets.begin(), targets.end(), std::back_inserter(targets_));
+ SortAndUniquify(&targets_);
+}
+
+void TargetPool::InsertTargets(TargetSource* targets) {
+ for (auto target = targets->GetNext(); target.has_value();
+ target = targets->GetNext()) {
+ targets_.push_back(*target);
+ }
+ // InsertTargets() can be called many times (number of reference types for the
+ // pool) in succession. Calling SortAndUniquify() every time enables deduping
+ // to occur more often. This prioritizes peak memory reduction over running
+ // time.
+ SortAndUniquify(&targets_);
+}
+
+void TargetPool::InsertTargets(const std::vector<Reference>& references) {
+ // This can be called many times, so it's better to let std::back_inserter()
+ // manage |targets_| resize, instead of manually reserving space.
+ std::transform(references.begin(), references.end(),
+ std::back_inserter(targets_),
+ [](const Reference& ref) { return ref.target; });
+ SortAndUniquify(&targets_);
+}
+
+void TargetPool::InsertTargets(ReferenceReader&& references) {
+ for (auto ref = references.GetNext(); ref.has_value();
+ ref = references.GetNext()) {
+ targets_.push_back(ref->target);
+ }
+ SortAndUniquify(&targets_);
+}
+
+key_t TargetPool::KeyForOffset(offset_t offset) const {
+ auto pos = std::lower_bound(targets_.begin(), targets_.end(), offset);
+ DCHECK(pos != targets_.end() && *pos == offset);
+ return static_cast<offset_t>(pos - targets_.begin());
+}
+
+key_t TargetPool::KeyForNearestOffset(offset_t offset) const {
+ auto pos = std::lower_bound(targets_.begin(), targets_.end(), offset);
+ if (pos != targets_.begin()) {
+ // If distances are equal, prefer lower key.
+ if (pos == targets_.end() || *pos - offset >= offset - pos[-1])
+ --pos;
+ }
+ return static_cast<offset_t>(pos - targets_.begin());
+}
+
+void TargetPool::FilterAndProject(const OffsetMapper& offset_mapper) {
+ offset_mapper.ForwardProjectAll(&targets_);
+ std::sort(targets_.begin(), targets_.end());
+}
+
+} // namespace zucchini
diff --git a/target_pool.h b/target_pool.h
new file mode 100644
index 0000000..27884d6
--- /dev/null
+++ b/target_pool.h
@@ -0,0 +1,80 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_TARGET_POOL_H_
+#define COMPONENTS_ZUCCHINI_TARGET_POOL_H_
+
+#include <stddef.h>
+
+#include <vector>
+
+#include "components/zucchini/image_utils.h"
+#include "components/zucchini/patch_reader.h"
+
+namespace zucchini {
+
+class OffsetMapper;
+class TargetSource;
+
+// Ordered container of distinct targets that have the same semantics, along
+// with a list of associated reference types, only used during patch generation.
+class TargetPool {
+ public:
+ using const_iterator = std::vector<offset_t>::const_iterator;
+
+ TargetPool();
+ // Initializes the object with given sorted and unique |targets|.
+ explicit TargetPool(std::vector<offset_t>&& targets);
+ TargetPool(TargetPool&&);
+ TargetPool(const TargetPool&);
+ ~TargetPool();
+
+ // Insert new targets from various sources. These invalidate all previous key
+ // lookups.
+ // - From a list of targets, useful for adding extra targets in Zucchini-gen:
+ void InsertTargets(const std::vector<offset_t>& targets);
+ // - From TargetSource, useful for adding extra targets in Zucchini-apply:
+ void InsertTargets(TargetSource* targets);
+ // - From list of References, useful for listing targets in Zucchini-gen:
+ void InsertTargets(const std::vector<Reference>& references);
+ // - From ReferenceReader, useful for listing targets in Zucchini-apply:
+ void InsertTargets(ReferenceReader&& references);
+
+ // Adds |type| as a reference type associated with the pool of targets.
+ void AddType(TypeTag type) { types_.push_back(type); }
+
+ // Returns a canonical key associated with a valid target at |offset|.
+ key_t KeyForOffset(offset_t offset) const;
+
+ // Returns a canonical key associated with the target nearest to |offset|.
+ key_t KeyForNearestOffset(offset_t offset) const;
+
+ // Returns the target for a |key|, which is assumed to be valid and held by
+ // this class.
+ offset_t OffsetForKey(key_t key) const { return targets_[key]; }
+
+ // Returns whether a particular key is valid.
+ bool KeyIsValid(key_t key) const { return key < targets_.size(); }
+
+ // Uses |offset_mapper| to transform "old" |targets_| to "new" |targets_|,
+ // resulting in sorted and unique targets.
+ void FilterAndProject(const OffsetMapper& offset_mapper);
+
+ // Accessors for testing.
+ const std::vector<offset_t>& targets() const { return targets_; }
+ const std::vector<TypeTag>& types() const { return types_; }
+
+ // Returns the number of targets.
+ size_t size() const { return targets_.size(); }
+ const_iterator begin() const { return targets_.cbegin(); }
+ const_iterator end() const { return targets_.cend(); }
+
+ private:
+ std::vector<TypeTag> types_; // Enumerates type_tag for this pool.
+ std::vector<offset_t> targets_; // Targets for pool in ascending order.
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_TARGET_POOL_H_
diff --git a/target_pool_unittest.cc b/target_pool_unittest.cc
new file mode 100644
index 0000000..4c3efec
--- /dev/null
+++ b/target_pool_unittest.cc
@@ -0,0 +1,64 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/target_pool.h"
+
+#include <cmath>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "components/zucchini/image_utils.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+namespace {
+
+using OffsetVector = std::vector<offset_t>;
+
+} // namespace
+
+TEST(TargetPoolTest, InsertTargetsFromReferences) {
+ auto test_insert = [](std::vector<Reference>&& references) -> OffsetVector {
+ TargetPool target_pool;
+ target_pool.InsertTargets(references);
+ // Return copy since |target_pool| goes out of scope.
+ return target_pool.targets();
+ };
+
+ EXPECT_EQ(OffsetVector(), test_insert({}));
+ EXPECT_EQ(OffsetVector({0, 1}), test_insert({{0, 0}, {10, 1}}));
+ EXPECT_EQ(OffsetVector({0, 1}), test_insert({{0, 1}, {10, 0}}));
+ EXPECT_EQ(OffsetVector({0, 1, 2}), test_insert({{0, 1}, {10, 0}, {20, 2}}));
+ EXPECT_EQ(OffsetVector({0}), test_insert({{0, 0}, {10, 0}}));
+ EXPECT_EQ(OffsetVector({0, 1}), test_insert({{0, 0}, {10, 0}, {20, 1}}));
+}
+
+TEST(TargetPoolTest, KeyOffset) {
+ auto test_key_offset = [](const std::string& nearest_offsets_key,
+ OffsetVector&& targets) {
+ TargetPool target_pool(std::move(targets));
+ for (offset_t offset : target_pool.targets()) {
+ offset_t key = target_pool.KeyForOffset(offset);
+ EXPECT_LT(key, target_pool.size());
+ EXPECT_EQ(offset, target_pool.OffsetForKey(key));
+ }
+ for (offset_t offset = 0; offset < nearest_offsets_key.size(); ++offset) {
+ key_t key = target_pool.KeyForNearestOffset(offset);
+ EXPECT_EQ(key, static_cast<key_t>(nearest_offsets_key[offset] - '0'));
+ }
+ };
+ test_key_offset("0000000000000000", {});
+ test_key_offset("0000000000000000", {0});
+ test_key_offset("0000000000000000", {1});
+ test_key_offset("0111111111111111", {0, 1});
+ test_key_offset("0011111111111111", {0, 2});
+ test_key_offset("0011111111111111", {1, 2});
+ test_key_offset("0001111111111111", {1, 3});
+ test_key_offset("0001112223334444", {1, 3, 7, 9, 13});
+ test_key_offset("0000011112223333", {1, 7, 9, 13});
+}
+
+} // namespace zucchini
diff --git a/targets_affinity.cc b/targets_affinity.cc
new file mode 100644
index 0000000..d083787
--- /dev/null
+++ b/targets_affinity.cc
@@ -0,0 +1,108 @@
+// Copyright 2016 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/targets_affinity.h"
+
+#include <algorithm>
+
+#include "base/check_op.h"
+#include "components/zucchini/equivalence_map.h"
+
+namespace zucchini {
+
+namespace {
+
+constexpr uint32_t kNoLabel = 0;
+}
+
+TargetsAffinity::TargetsAffinity() = default;
+TargetsAffinity::~TargetsAffinity() = default;
+
+void TargetsAffinity::InferFromSimilarities(
+ const EquivalenceMap& equivalences,
+ const std::vector<offset_t>& old_targets,
+ const std::vector<offset_t>& new_targets) {
+ forward_association_.assign(old_targets.size(), {});
+ backward_association_.assign(new_targets.size(), {});
+
+ if (old_targets.empty() || new_targets.empty())
+ return;
+
+ key_t new_key = 0;
+ for (auto candidate : equivalences) { // Sorted by |dst_offset|.
+ DCHECK_GT(candidate.similarity, 0.0);
+ while (new_key < new_targets.size() &&
+ new_targets[new_key] < candidate.eq.dst_offset) {
+ ++new_key;
+ }
+
+ // Visit each new target covered by |candidate.eq| and find / update its
+ // associated old target.
+ for (; new_key < new_targets.size() &&
+ new_targets[new_key] < candidate.eq.dst_end();
+ ++new_key) {
+ if (backward_association_[new_key].affinity >= candidate.similarity)
+ continue;
+
+ DCHECK_GE(new_targets[new_key], candidate.eq.dst_offset);
+ offset_t old_target = new_targets[new_key] - candidate.eq.dst_offset +
+ candidate.eq.src_offset;
+ auto old_it =
+ std::lower_bound(old_targets.begin(), old_targets.end(), old_target);
+ // If new target can be mapped via |candidate.eq| to an old target, then
+ // attempt to associate them. Multiple new targets can compete for the
+ // same old target. The heuristic here makes selections to maximize
+ // |candidate.similarity|, and if a tie occurs, minimize new target offset
+ // (by first-come, first-served).
+ if (old_it != old_targets.end() && *old_it == old_target) {
+ key_t old_key = static_cast<key_t>(old_it - old_targets.begin());
+ if (candidate.similarity > forward_association_[old_key].affinity) {
+ // Reset other associations.
+ if (forward_association_[old_key].affinity > 0.0)
+ backward_association_[forward_association_[old_key].other] = {};
+ if (backward_association_[new_key].affinity > 0.0)
+ forward_association_[backward_association_[new_key].other] = {};
+ // Assign new association.
+ forward_association_[old_key] = {new_key, candidate.similarity};
+ backward_association_[new_key] = {old_key, candidate.similarity};
+ }
+ }
+ }
+ }
+}
+
+uint32_t TargetsAffinity::AssignLabels(double min_affinity,
+ std::vector<uint32_t>* old_labels,
+ std::vector<uint32_t>* new_labels) {
+ old_labels->assign(forward_association_.size(), kNoLabel);
+ new_labels->assign(backward_association_.size(), kNoLabel);
+
+ uint32_t label = kNoLabel + 1;
+ for (key_t old_key = 0; old_key < forward_association_.size(); ++old_key) {
+ Association association = forward_association_[old_key];
+ if (association.affinity >= min_affinity) {
+ (*old_labels)[old_key] = label;
+ DCHECK_EQ(0U, (*new_labels)[association.other]);
+ (*new_labels)[association.other] = label;
+ ++label;
+ }
+ }
+ return label;
+}
+
+double TargetsAffinity::AffinityBetween(key_t old_key, key_t new_key) const {
+ DCHECK_LT(old_key, forward_association_.size());
+ DCHECK_LT(new_key, backward_association_.size());
+ if (forward_association_[old_key].affinity > 0.0 &&
+ forward_association_[old_key].other == new_key) {
+ DCHECK_EQ(backward_association_[new_key].other, old_key);
+ DCHECK_EQ(forward_association_[old_key].affinity,
+ backward_association_[new_key].affinity);
+ return forward_association_[old_key].affinity;
+ }
+ return -std::max(forward_association_[old_key].affinity,
+ backward_association_[new_key].affinity);
+}
+
+} // namespace zucchini
diff --git a/targets_affinity.h b/targets_affinity.h
new file mode 100644
index 0000000..dff1741
--- /dev/null
+++ b/targets_affinity.h
@@ -0,0 +1,73 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_TARGETS_AFFINITY_H_
+#define COMPONENTS_ZUCCHINI_TARGETS_AFFINITY_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "components/zucchini/image_utils.h"
+
+namespace zucchini {
+
+class EquivalenceMap;
+
+// Computes and stores affinity between old and new targets for a single target
+// pool. This is only used during patch generation.
+class TargetsAffinity {
+ public:
+ TargetsAffinity();
+ TargetsAffinity(const TargetsAffinity&) = delete;
+ const TargetsAffinity& operator=(const TargetsAffinity&) = delete;
+ ~TargetsAffinity();
+
+ // Infers affinity between |old_targets| and |new_targets| using similarities
+ // described by |equivalence_map|, and updates internal state for retrieval of
+ // affinity scores. Both |old_targets| and |new_targets| are targets in the
+ // same pool and are sorted in ascending order.
+ void InferFromSimilarities(const EquivalenceMap& equivalence_map,
+ const std::vector<offset_t>& old_targets,
+ const std::vector<offset_t>& new_targets);
+
+ // Assigns labels to targets based on associations previously inferred, using
+ // |min_affinity| to reject associations with weak |affinity|. Label 0 is
+ // assigned to unassociated targets. Labels for old targets are written to
+ // |old_labels| and labels for new targets are written to |new_labels|.
+ // Returns the upper bound on assigned labels (>= 1 since 0 is used).
+ uint32_t AssignLabels(double min_affinity,
+ std::vector<uint32_t>* old_labels,
+ std::vector<uint32_t>* new_labels);
+
+ // Returns the affinity score between targets identified by |old_key| and
+ // |new_keys|. Affinity > 0 means an association is likely, < 0 means
+ // incompatible association, and 0 means neither targets have been associated.
+ double AffinityBetween(key_t old_key, key_t new_key) const;
+
+ private:
+ struct Association {
+ key_t other = 0;
+ double affinity = 0.0;
+ };
+
+ // Forward and backward associations between old and new targets. For each
+ // Association element, if |affinity == 0.0| then no association is defined
+ // (and |other| is meaningless|. Otherwise |affinity > 0.0|, and the
+ // association between |old_labels[old_key]| and |new_labels[new_key]| is
+ // represented by:
+ // forward_association_[old_key].other == new_key;
+ // backward_association_[new_key].other == old_key;
+ // forward_association_[old_key].affinity ==
+ // backward_association_[new_key].affinity;
+ // The two lists contain the same information, but having both enables quick
+ // lookup, given |old_key| or |new_key|.
+ std::vector<Association> forward_association_;
+ std::vector<Association> backward_association_;
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_TARGETS_AFFINITY_H_
diff --git a/targets_affinity_unittest.cc b/targets_affinity_unittest.cc
new file mode 100644
index 0000000..86182f9
--- /dev/null
+++ b/targets_affinity_unittest.cc
@@ -0,0 +1,131 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/targets_affinity.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "components/zucchini/equivalence_map.h"
+#include "components/zucchini/image_utils.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+TEST(TargetsAffinityTest, AffinityBetween) {
+ using AffinityVector = std::vector<std::vector<double>>;
+
+ // A common TargetsAffinity is used across independent tests. This is to
+ // reflect actual usage, in which common TargetsAffinity is used so that
+ // internal buffers get reused.
+ TargetsAffinity targets_affinity;
+
+ auto test_affinity = [&targets_affinity](
+ const EquivalenceMap& equivalence_map,
+ const std::vector<offset_t>& old_targets,
+ const std::vector<offset_t>& new_targets) {
+ targets_affinity.InferFromSimilarities(equivalence_map, old_targets,
+ new_targets);
+ AffinityVector affinities(old_targets.size());
+ for (key_t i = 0; i < old_targets.size(); ++i) {
+ for (key_t j = 0; j < new_targets.size(); ++j) {
+ affinities[i].push_back(targets_affinity.AffinityBetween(i, j));
+ }
+ }
+ return affinities;
+ };
+
+ EXPECT_EQ(AffinityVector({}), test_affinity(EquivalenceMap(), {}, {}));
+ EXPECT_EQ(AffinityVector({}),
+ test_affinity(EquivalenceMap({{{0, 0, 8}, 1.0}}), {}, {}));
+
+ EXPECT_EQ(AffinityVector({{0.0, 0.0}, {0.0, 0.0}}),
+ test_affinity(EquivalenceMap(), {0, 10}, {0, 5}));
+
+ EXPECT_EQ(AffinityVector({{1.0, -1.0}, {-1.0, 0.0}}),
+ test_affinity(EquivalenceMap({{{0, 0, 1}, 1.0}}), {0, 10}, {0, 5}));
+
+ EXPECT_EQ(AffinityVector({{1.0, -1.0}, {-1.0, 0.0}}),
+ test_affinity(EquivalenceMap({{{0, 0, 2}, 1.0}}), {1, 10}, {1, 5}));
+
+ EXPECT_EQ(AffinityVector({{0.0, 0.0}, {0.0, 0.0}}),
+ test_affinity(EquivalenceMap({{{0, 1, 2}, 1.0}}), {1, 10}, {1, 5}));
+
+ EXPECT_EQ(AffinityVector({{1.0, -1.0}, {-1.0, 0.0}}),
+ test_affinity(EquivalenceMap({{{0, 1, 2}, 1.0}}), {0, 10}, {1, 5}));
+
+ EXPECT_EQ(AffinityVector({{2.0, -2.0}, {-2.0, 0.0}}),
+ test_affinity(EquivalenceMap({{{0, 0, 1}, 2.0}}), {0, 10}, {0, 5}));
+
+ EXPECT_EQ(
+ AffinityVector({{1.0, -1.0}, {-1.0, 1.0}, {-1.0, -1.0}}),
+ test_affinity(EquivalenceMap({{{0, 0, 6}, 1.0}}), {0, 5, 10}, {0, 5}));
+
+ EXPECT_EQ(AffinityVector({{-2.0, 2.0}, {1.0, -2.0}, {-1.0, -2.0}}),
+ test_affinity(EquivalenceMap({{{5, 0, 2}, 1.0}, {{0, 5, 2}, 2.0}}),
+ {0, 5, 10}, {0, 5}));
+
+ EXPECT_EQ(AffinityVector({{-2.0, 2.0}, {0.0, -2.0}, {0.0, -2.0}}),
+ test_affinity(EquivalenceMap({{{0, 0, 2}, 1.0}, {{0, 5, 2}, 2.0}}),
+ {0, 5, 10}, {0, 5}));
+}
+
+TEST(TargetsAffinityTest, AssignLabels) {
+ // A common TargetsAffinity is used across independent tests. This is to
+ // reflect actual usage, in which common TargetsAffinity is used so that
+ // internal buffers get reused.
+ TargetsAffinity targets_affinity;
+
+ auto test_labels_assignment =
+ [&targets_affinity](const EquivalenceMap& equivalence_map,
+ const std::vector<offset_t>& old_targets,
+ const std::vector<offset_t>& new_targets,
+ double min_affinity,
+ const std::vector<uint32_t>& expected_old_labels,
+ const std::vector<uint32_t>& expected_new_labels) {
+ targets_affinity.InferFromSimilarities(equivalence_map, old_targets,
+ new_targets);
+ std::vector<uint32_t> old_labels;
+ std::vector<uint32_t> new_labels;
+ size_t bound = targets_affinity.AssignLabels(min_affinity, &old_labels,
+ &new_labels);
+ EXPECT_EQ(expected_old_labels, old_labels);
+ EXPECT_EQ(expected_new_labels, new_labels);
+ return bound;
+ };
+
+ EXPECT_EQ(1U, test_labels_assignment(EquivalenceMap(), {}, {}, 1.0, {}, {}));
+ EXPECT_EQ(1U, test_labels_assignment(EquivalenceMap({{{0, 0, 8}, 1.0}}), {},
+ {}, 1.0, {}, {}));
+
+ EXPECT_EQ(1U, test_labels_assignment(EquivalenceMap(), {0, 10}, {0, 5}, 1.0,
+ {0, 0}, {0, 0}));
+
+ EXPECT_EQ(2U, test_labels_assignment(EquivalenceMap({{{0, 0, 1}, 1.0}}),
+ {0, 10}, {0, 5}, 1.0, {1, 0}, {1, 0}));
+ EXPECT_EQ(1U, test_labels_assignment(EquivalenceMap({{{0, 0, 1}, 0.99}}),
+ {0, 10}, {0, 5}, 1.0, {0, 0}, {0, 0}));
+ EXPECT_EQ(1U, test_labels_assignment(EquivalenceMap({{{0, 0, 1}, 1.0}}),
+ {0, 10}, {0, 5}, 1.01, {0, 0}, {0, 0}));
+ EXPECT_EQ(1U, test_labels_assignment(EquivalenceMap({{{0, 0, 1}, 1.0}}),
+ {0, 10}, {0, 5}, 15.0, {0, 0}, {0, 0}));
+ EXPECT_EQ(2U, test_labels_assignment(EquivalenceMap({{{0, 0, 1}, 15.0}}),
+ {0, 10}, {0, 5}, 15.0, {1, 0}, {1, 0}));
+
+ EXPECT_EQ(2U, test_labels_assignment(EquivalenceMap({{{0, 1, 2}, 1.0}}),
+ {0, 10}, {1, 5}, 1.0, {1, 0}, {1, 0}));
+ EXPECT_EQ(
+ 3U, test_labels_assignment(EquivalenceMap({{{0, 0, 6}, 1.0}}), {0, 5, 10},
+ {0, 5}, 1.0, {1, 2, 0}, {1, 2}));
+ EXPECT_EQ(3U, test_labels_assignment(
+ EquivalenceMap({{{5, 0, 2}, 1.0}, {{0, 5, 2}, 2.0}}),
+ {0, 5, 10}, {0, 5}, 1.0, {1, 2, 0}, {2, 1}));
+ EXPECT_EQ(2U, test_labels_assignment(
+ EquivalenceMap({{{0, 0, 2}, 1.0}, {{0, 5, 2}, 2.0}}),
+ {0, 5, 10}, {0, 5}, 1.0, {1, 0, 0}, {0, 1}));
+}
+
+} // namespace zucchini
diff --git a/test_disassembler.cc b/test_disassembler.cc
new file mode 100644
index 0000000..2d6727b
--- /dev/null
+++ b/test_disassembler.cc
@@ -0,0 +1,61 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/test_disassembler.h"
+
+#include "components/zucchini/test_reference_reader.h"
+
+namespace zucchini {
+
+// |num_equivalence_iterations_| = 2 to cover common case for testing.
+TestDisassembler::TestDisassembler(const ReferenceTypeTraits& traits1,
+ const std::vector<Reference>& refs1,
+ const ReferenceTypeTraits& traits2,
+ const std::vector<Reference>& refs2,
+ const ReferenceTypeTraits& traits3,
+ const std::vector<Reference>& refs3)
+ : Disassembler(2),
+ traits_{traits1, traits2, traits3},
+ refs_{refs1, refs2, refs3} {}
+
+TestDisassembler::~TestDisassembler() = default;
+
+ExecutableType TestDisassembler::GetExeType() const {
+ return kExeTypeUnknown;
+}
+
+std::string TestDisassembler::GetExeTypeString() const {
+ return "(Unknown)";
+}
+
+std::vector<ReferenceGroup> TestDisassembler::MakeReferenceGroups() const {
+ return {
+ {traits_[0], &TestDisassembler::MakeReadRefs1,
+ &TestDisassembler::MakeWriteRefs1},
+ {traits_[1], &TestDisassembler::MakeReadRefs2,
+ &TestDisassembler::MakeWriteRefs2},
+ {traits_[2], &TestDisassembler::MakeReadRefs3,
+ &TestDisassembler::MakeWriteRefs3},
+ };
+}
+
+bool TestDisassembler::Parse(ConstBufferView image) {
+ return true;
+}
+
+std::unique_ptr<ReferenceReader> TestDisassembler::MakeReadRefs(int type) {
+ return std::make_unique<TestReferenceReader>(refs_[type]);
+}
+
+std::unique_ptr<ReferenceWriter> TestDisassembler::MakeWriteRefs(
+ MutableBufferView image) {
+ class NoOpWriter : public ReferenceWriter {
+ public:
+ // ReferenceWriter:
+ void PutNext(Reference) override {}
+ };
+ return std::make_unique<NoOpWriter>();
+}
+
+} // namespace zucchini
diff --git a/test_disassembler.h b/test_disassembler.h
new file mode 100644
index 0000000..e434fc4
--- /dev/null
+++ b/test_disassembler.h
@@ -0,0 +1,77 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_TEST_DISASSEMBLER_H_
+#define COMPONENTS_ZUCCHINI_TEST_DISASSEMBLER_H_
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/disassembler.h"
+#include "components/zucchini/image_utils.h"
+
+namespace zucchini {
+
+// A trivial Disassembler that reads injected references of 3 different types.
+// This is only meant for testing and is not a full implementation of a
+// disassembler. Reading reference ignores bounds, and writing references does
+// nothing.
+class TestDisassembler : public Disassembler {
+ public:
+ TestDisassembler(const ReferenceTypeTraits& traits1,
+ const std::vector<Reference>& refs1,
+ const ReferenceTypeTraits& traits2,
+ const std::vector<Reference>& refs2,
+ const ReferenceTypeTraits& traits3,
+ const std::vector<Reference>& refs3);
+ TestDisassembler(const TestDisassembler&) = delete;
+ const TestDisassembler& operator=(const TestDisassembler&) = delete;
+ ~TestDisassembler() override;
+
+ // Disassembler:
+ ExecutableType GetExeType() const override;
+ std::string GetExeTypeString() const override;
+ std::vector<ReferenceGroup> MakeReferenceGroups() const override;
+
+ // Disassembler::ReaderFactory:
+ std::unique_ptr<ReferenceReader> MakeReadRefs1(offset_t /*lower*/,
+ offset_t /*upper*/) {
+ return MakeReadRefs(0);
+ }
+ std::unique_ptr<ReferenceReader> MakeReadRefs2(offset_t /*lower*/,
+ offset_t /*upper*/) {
+ return MakeReadRefs(1);
+ }
+ std::unique_ptr<ReferenceReader> MakeReadRefs3(offset_t /*lower*/,
+ offset_t /*upper*/) {
+ return MakeReadRefs(2);
+ }
+
+ // Disassembler::WriterFactory:
+ std::unique_ptr<ReferenceWriter> MakeWriteRefs1(MutableBufferView image) {
+ return MakeWriteRefs(image);
+ }
+ std::unique_ptr<ReferenceWriter> MakeWriteRefs2(MutableBufferView image) {
+ return MakeWriteRefs(image);
+ }
+ std::unique_ptr<ReferenceWriter> MakeWriteRefs3(MutableBufferView image) {
+ return MakeWriteRefs(image);
+ }
+
+ private:
+ // Disassembler:
+ bool Parse(ConstBufferView image) override;
+
+ std::unique_ptr<ReferenceReader> MakeReadRefs(int type);
+ std::unique_ptr<ReferenceWriter> MakeWriteRefs(MutableBufferView image);
+
+ ReferenceTypeTraits traits_[3];
+ std::vector<Reference> refs_[3];
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_TEST_DISASSEMBLER_H_
diff --git a/test_reference_reader.cc b/test_reference_reader.cc
new file mode 100644
index 0000000..b7f8ece
--- /dev/null
+++ b/test_reference_reader.cc
@@ -0,0 +1,20 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/test_reference_reader.h"
+
+namespace zucchini {
+
+TestReferenceReader::TestReferenceReader(const std::vector<Reference>& refs)
+ : references_(refs) {}
+
+TestReferenceReader::~TestReferenceReader() = default;
+
+absl::optional<Reference> TestReferenceReader::GetNext() {
+ if (index_ == references_.size())
+ return absl::nullopt;
+ return references_[index_++];
+}
+
+} // namespace zucchini
diff --git a/test_reference_reader.h b/test_reference_reader.h
new file mode 100644
index 0000000..cc8c0de
--- /dev/null
+++ b/test_reference_reader.h
@@ -0,0 +1,32 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_TEST_REFERENCE_READER_H_
+#define COMPONENTS_ZUCCHINI_TEST_REFERENCE_READER_H_
+
+#include <stddef.h>
+
+#include <vector>
+
+#include "components/zucchini/image_utils.h"
+#include "third_party/abseil-cpp/absl/types/optional.h"
+
+namespace zucchini {
+
+// A trivial ReferenceReader that reads injected references.
+class TestReferenceReader : public ReferenceReader {
+ public:
+ explicit TestReferenceReader(const std::vector<Reference>& refs);
+ ~TestReferenceReader() override;
+
+ absl::optional<Reference> GetNext() override;
+
+ private:
+ std::vector<Reference> references_;
+ size_t index_ = 0;
+};
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_TEST_REFERENCE_READER_H_
diff --git a/test_utils.cc b/test_utils.cc
new file mode 100644
index 0000000..bc912b4
--- /dev/null
+++ b/test_utils.cc
@@ -0,0 +1,26 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/test_utils.h"
+
+#include <ios>
+#include <sstream>
+
+#include "base/check_op.h"
+
+namespace zucchini {
+
+std::vector<uint8_t> ParseHexString(const std::string& hex_string) {
+ std::vector<uint8_t> ret;
+ std::istringstream iss(hex_string);
+ iss >> std::hex;
+ uint32_t temp = 0; // Cannot be uint8_t: istringstream treats this as char!
+ while (iss >> temp) {
+ CHECK_LE(temp, 0xFFU);
+ ret.push_back(temp);
+ }
+ return ret;
+}
+
+} // namespace zucchini
diff --git a/test_utils.h b/test_utils.h
new file mode 100644
index 0000000..e922343
--- /dev/null
+++ b/test_utils.h
@@ -0,0 +1,35 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_TEST_UTILS_H_
+#define COMPONENTS_ZUCCHINI_TEST_UTILS_H_
+
+#include <stdint.h>
+
+#include <string>
+#include <vector>
+
+namespace zucchini {
+
+// Parses space-separated list of byte hex values into list.
+std::vector<uint8_t> ParseHexString(const std::string& hex_string);
+
+// Returns a vector that's the contatenation of two vectors of the same type.
+// Elements are copied by value.
+template <class T>
+std::vector<T> Cat(const std::vector<T>& a, const std::vector<T>& b) {
+ std::vector<T> ret(a);
+ ret.insert(ret.end(), b.begin(), b.end());
+ return ret;
+}
+
+// Returns a subvector of a vector. Elements are copied by value.
+template <class T>
+std::vector<T> Sub(const std::vector<T>& a, size_t lo, size_t hi) {
+ return std::vector<T>(a.begin() + lo, a.begin() + hi);
+}
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_TEST_UTILS_H_
diff --git a/testdata/chrome64_1.exe.sha1 b/testdata/chrome64_1.exe.sha1
new file mode 100644
index 0000000..9b4f113
--- /dev/null
+++ b/testdata/chrome64_1.exe.sha1
@@ -0,0 +1 @@
+4970ef6f342f6a0da9ae7a4ed462f93ef68f142c \ No newline at end of file
diff --git a/testdata/chrome64_2.exe.sha1 b/testdata/chrome64_2.exe.sha1
new file mode 100644
index 0000000..e4a96a2
--- /dev/null
+++ b/testdata/chrome64_2.exe.sha1
@@ -0,0 +1 @@
+c3a974589d50956a3c8c17572fee078b9276ad9b \ No newline at end of file
diff --git a/testdata/setup1.exe.sha1 b/testdata/setup1.exe.sha1
new file mode 100644
index 0000000..2304621
--- /dev/null
+++ b/testdata/setup1.exe.sha1
@@ -0,0 +1 @@
+5d0e8fed8e9e091e184adb2e2e0e668def9cd2c5 \ No newline at end of file
diff --git a/testdata/setup2.exe.sha1 b/testdata/setup2.exe.sha1
new file mode 100644
index 0000000..9fa4d0c
--- /dev/null
+++ b/testdata/setup2.exe.sha1
@@ -0,0 +1 @@
+12194273e8d509b6e81e4a6b63621081e1426028 \ No newline at end of file
diff --git a/type_dex.h b/type_dex.h
new file mode 100644
index 0000000..432a031
--- /dev/null
+++ b/type_dex.h
@@ -0,0 +1,291 @@
+// Copyright 2018 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_TYPE_DEX_H_
+#define COMPONENTS_ZUCCHINI_TYPE_DEX_H_
+
+#include <stdint.h>
+
+namespace zucchini {
+namespace dex {
+// Contains types that models DEX executable format data structures.
+// See https://source.android.com/devices/tech/dalvik/dex-format
+
+// The supported versions are 035 and 037.
+
+enum class FormatId : uint8_t {
+ b, // 22b.
+ c, // 21c, 22c, 31c, 35c, 3rc.
+ h, // 21h.
+ i, // 31i.
+ l, // 51l.
+ n, // 11n.
+ s, // 21s, 22s.
+ t, // 10t, 20t, 21t, 22t, 30t, 31t.
+ x, // 10x, 11x, 12x, 22x, 23x, 32x.
+};
+
+struct Instruction {
+ Instruction() = default;
+ constexpr Instruction(uint8_t opcode_in,
+ uint8_t layout_in,
+ FormatId format_in,
+ uint8_t variant_in = 1)
+ : opcode(opcode_in),
+ layout(layout_in),
+ format(format_in),
+ variant(variant_in) {}
+
+ // The opcode that identifies the instruction.
+ uint8_t opcode;
+ // Number of uint16_t units for the instruction.
+ uint8_t layout;
+ // Identifier that groups similar instructions, as quick filter.
+ FormatId format;
+ // Number of successive opcodes that have the same format.
+ uint8_t variant = 1;
+};
+
+constexpr Instruction kByteCode[] = {
+ {0x00, 1, FormatId::x},
+ {0x01, 1, FormatId::x},
+ {0x02, 2, FormatId::x},
+ {0x03, 3, FormatId::x},
+ {0x04, 1, FormatId::x},
+ {0x05, 2, FormatId::x},
+ {0x06, 3, FormatId::x},
+ {0x07, 1, FormatId::x},
+ {0x08, 2, FormatId::x},
+ {0x09, 3, FormatId::x},
+ {0x0A, 1, FormatId::x},
+ {0x0B, 1, FormatId::x},
+ {0x0C, 1, FormatId::x},
+ {0x0D, 1, FormatId::x},
+ {0x0E, 1, FormatId::x},
+ {0x0F, 1, FormatId::x},
+ {0x10, 1, FormatId::x},
+ {0x11, 1, FormatId::x},
+ {0x12, 1, FormatId::n},
+ {0x13, 2, FormatId::s},
+ {0x14, 3, FormatId::i},
+ {0x15, 2, FormatId::h},
+ {0x16, 2, FormatId::s},
+ {0x17, 3, FormatId::i},
+ {0x18, 5, FormatId::l},
+ {0x19, 2, FormatId::h},
+ {0x1A, 2, FormatId::c},
+ {0x1B, 3, FormatId::c},
+ {0x1C, 2, FormatId::c},
+ {0x1D, 1, FormatId::x},
+ {0x1E, 1, FormatId::x},
+ {0x1F, 2, FormatId::c},
+ {0x20, 2, FormatId::c},
+ {0x21, 1, FormatId::x},
+ {0x22, 2, FormatId::c},
+ {0x23, 2, FormatId::c},
+ {0x24, 3, FormatId::c},
+ {0x25, 3, FormatId::c},
+ {0x26, 3, FormatId::t},
+ {0x27, 1, FormatId::x},
+ {0x28, 1, FormatId::t},
+ {0x29, 2, FormatId::t},
+ {0x2A, 3, FormatId::t},
+ {0x2B, 3, FormatId::t},
+ {0x2C, 3, FormatId::t},
+ {0x2D, 2, FormatId::x, 5},
+ {0x32, 2, FormatId::t, 6},
+ {0x38, 2, FormatId::t, 6},
+ // {0x3E, 1, FormatId::x, 6}, unused
+ {0x44, 2, FormatId::x, 14},
+ {0x52, 2, FormatId::c, 14},
+ {0x60, 2, FormatId::c, 14},
+ {0x6E, 3, FormatId::c, 5},
+ // {0x73, 1, FormatId::x}, unused
+ {0x74, 3, FormatId::c, 5},
+ // {0x79, 1, FormatId::x, 2}, unused
+ {0x7B, 1, FormatId::x, 21},
+ {0x90, 2, FormatId::x, 32},
+ {0xB0, 1, FormatId::x, 32},
+ {0xD0, 2, FormatId::s, 8},
+ {0xD8, 2, FormatId::b, 11},
+ // {0xE3, 1, FormatId::x, 29}, unused
+};
+
+// Supported by MSVC, g++, and clang++. Ensures no gaps in packing.
+#pragma pack(push, 1)
+
+// header_item: Appears in the header section.
+struct HeaderItem {
+ uint8_t magic[8];
+ uint32_t checksum;
+ uint8_t signature[20];
+ uint32_t file_size;
+ uint32_t header_size;
+ uint32_t endian_tag;
+ uint32_t link_size;
+ uint32_t link_off;
+ uint32_t map_off;
+ uint32_t string_ids_size;
+ uint32_t string_ids_off;
+ uint32_t type_ids_size;
+ uint32_t type_ids_off;
+ uint32_t proto_ids_size;
+ uint32_t proto_ids_off;
+ uint32_t field_ids_size;
+ uint32_t field_ids_off;
+ uint32_t method_ids_size;
+ uint32_t method_ids_off;
+ uint32_t class_defs_size;
+ uint32_t class_defs_off;
+ uint32_t data_size;
+ uint32_t data_off;
+};
+
+// string_id_item: String identifiers list.
+struct StringIdItem {
+ uint32_t string_data_off;
+};
+
+// type_id_item: Type identifiers list.
+struct TypeIdItem {
+ uint32_t descriptor_idx;
+};
+
+// proto_id_item: Method prototype identifiers list.
+struct ProtoIdItem {
+ uint32_t shorty_idx;
+ uint32_t return_type_idx;
+ uint32_t parameters_off;
+};
+
+// field_id_item: Field identifiers list.
+struct FieldIdItem {
+ uint16_t class_idx;
+ uint16_t type_idx;
+ uint32_t name_idx;
+};
+
+// method_id_item: Method identifiers list.
+struct MethodIdItem {
+ uint16_t class_idx;
+ uint16_t proto_idx;
+ uint32_t name_idx;
+};
+
+// class_def_item: Class definitions list.
+struct ClassDefItem {
+ uint32_t class_idx;
+ uint32_t access_flags;
+ uint32_t superclass_idx;
+ uint32_t interfaces_off;
+ uint32_t source_file_idx;
+ uint32_t annotations_off;
+ uint32_t class_data_off;
+ uint32_t static_values_off;
+};
+
+// code_item: Header of a code item.
+struct CodeItem {
+ uint16_t registers_size;
+ uint16_t ins_size;
+ uint16_t outs_size;
+ uint16_t tries_size;
+ uint32_t debug_info_off;
+ uint32_t insns_size;
+ // Variable length data follow for complete code item.
+};
+
+constexpr uint32_t kMaxItemListSize = 18;
+
+// map_item
+struct MapItem {
+ uint16_t type;
+ uint16_t unused;
+ uint32_t size;
+ uint32_t offset;
+};
+
+// map_list
+struct MapList {
+ uint32_t size;
+ MapItem list[kMaxItemListSize];
+};
+
+// type_item
+struct TypeItem {
+ uint16_t type_idx;
+};
+
+// annotation_set_ref_item
+struct AnnotationSetRefItem {
+ uint32_t annotations_off;
+};
+
+// annotation_off_item
+struct AnnotationOffItem {
+ uint32_t annotation_off;
+};
+
+// field_annotation
+struct FieldAnnotation {
+ uint32_t field_idx;
+ uint32_t annotations_off;
+};
+
+// method_annotation
+struct MethodAnnotation {
+ uint32_t method_idx;
+ uint32_t annotations_off;
+};
+
+// parameter_annotation
+struct ParameterAnnotation {
+ uint32_t method_idx;
+ uint32_t annotations_off;
+};
+
+// annotations_directory_item
+struct AnnotationsDirectoryItem {
+ uint32_t class_annotations_off;
+ uint32_t fields_size;
+ uint32_t annotated_methods_size;
+ uint32_t annotated_parameters_size;
+ // FieldAnnotation field_annotations[fields_size];
+ // MethodAnnotation method_annotations[annotated_methods_size];
+ // ParameterAnnotation parameter_annotations[annotated_parameters_size];
+ // All *Annotation are 8 bytes each.
+};
+
+// try_item
+struct TryItem {
+ uint32_t start_addr;
+ uint16_t insn_count;
+ uint16_t handler_off;
+};
+
+constexpr uint16_t kTypeHeaderItem = 0x0000;
+constexpr uint16_t kTypeStringIdItem = 0x0001;
+constexpr uint16_t kTypeTypeIdItem = 0x0002;
+constexpr uint16_t kTypeProtoIdItem = 0x0003;
+constexpr uint16_t kTypeFieldIdItem = 0x0004;
+constexpr uint16_t kTypeMethodIdItem = 0x0005;
+constexpr uint16_t kTypeClassDefItem = 0x0006;
+constexpr uint16_t kTypeMapList = 0x1000;
+constexpr uint16_t kTypeTypeList = 0x1001;
+constexpr uint16_t kTypeAnnotationSetRefList = 0x1002;
+constexpr uint16_t kTypeAnnotationSetItem = 0x1003;
+constexpr uint16_t kTypeClassDataItem = 0x2000;
+constexpr uint16_t kTypeCodeItem = 0x2001;
+constexpr uint16_t kTypeStringDataItem = 0x2002;
+constexpr uint16_t kTypeDebugInfoItem = 0x2003;
+constexpr uint16_t kTypeAnnotationItem = 0x2004;
+constexpr uint16_t kTypeEncodedArrayItem = 0x2005;
+constexpr uint16_t kTypeAnnotationsDirectoryItem = 0x2006;
+
+#pragma pack(pop)
+
+} // namespace dex
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_TYPE_DEX_H_
diff --git a/type_elf.h b/type_elf.h
new file mode 100644
index 0000000..2a522b1
--- /dev/null
+++ b/type_elf.h
@@ -0,0 +1,283 @@
+// Copyright 2018 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_TYPE_ELF_H_
+#define COMPONENTS_ZUCCHINI_TYPE_ELF_H_
+
+#include <stdint.h>
+
+namespace zucchini {
+
+// Structures and constants taken from linux/elf.h and following identical
+// layout. This is used for parsing of Executable and Linkable Format (ELF).
+namespace elf {
+// Supported by MSVC, g++, and clang++. Ensures no gaps in packing.
+#pragma pack(push, 1)
+
+// This header defines various types from the ELF file spec, but no code
+// related to using them.
+
+typedef uint32_t Elf32_Addr; // Unsigned program address.
+typedef uint16_t Elf32_Half; // Unsigned medium integer.
+typedef uint32_t Elf32_Off; // Unsigned file offset.
+typedef int32_t Elf32_Sword; // Signed large integer.
+typedef uint32_t Elf32_Word; // Unsigned large integer.
+
+typedef uint64_t Elf64_Addr; // Unsigned program address.
+typedef uint16_t Elf64_Half; // Unsigned medium integer.
+typedef uint64_t Elf64_Off; // Unsigned file offset.
+typedef int32_t Elf64_Sword; // Signed large integer.
+typedef uint32_t Elf64_Word; // Unsigned large integer.
+typedef int64_t Elf64_Sxword; // Signed extra large integer.
+typedef uint64_t Elf64_Xword; // Unsigned extra large integer.
+
+// The header at the top of the file.
+struct Elf32_Ehdr {
+ unsigned char e_ident[16];
+ Elf32_Half e_type;
+ Elf32_Half e_machine;
+ Elf32_Word e_version;
+ Elf32_Addr e_entry;
+ Elf32_Off e_phoff;
+ Elf32_Off e_shoff;
+ Elf32_Word e_flags;
+ Elf32_Half e_ehsize;
+ Elf32_Half e_phentsize;
+ Elf32_Half e_phnum;
+ Elf32_Half e_shentsize;
+ Elf32_Half e_shnum;
+ Elf32_Half e_shstrndx;
+};
+
+struct Elf64_Ehdr {
+ unsigned char e_ident[16];
+ Elf64_Half e_type;
+ Elf64_Half e_machine;
+ Elf64_Word e_version;
+ Elf64_Addr e_entry;
+ Elf64_Off e_phoff;
+ Elf64_Off e_shoff;
+ Elf64_Word e_flags;
+ Elf64_Half e_ehsize;
+ Elf64_Half e_phentsize;
+ Elf64_Half e_phnum;
+ Elf64_Half e_shentsize;
+ Elf64_Half e_shnum;
+ Elf64_Half e_shstrndx;
+};
+
+// Identification Indexes in header->e_ident.
+enum IdentificationIndex {
+ EI_MAG0 = 0, // File identification.
+ EI_MAG1 = 1, // File identification.
+ EI_MAG2 = 2, // File identification.
+ EI_MAG3 = 3, // File identification.
+ EI_CLASS = 4, // File class.
+ EI_DATA = 5, // Data encoding.
+ EI_VERSION = 6, // File version.
+ EI_OSABI = 7, // Operating system/ABI identification.
+ EI_ABIVERSION = 8, // ABI version.
+ EI_PAD = 9, // Start of padding bytes.
+ EI_NIDENT = 16 // Size of e_ident[].
+};
+
+// Values for header->e_ident[EI_CLASS].
+enum FileClass {
+ ELFCLASSNONE = 0, // Invalid class.
+ ELFCLASS32 = 1, // 32-bit objects.
+ ELFCLASS64 = 2 // 64-bit objects.
+};
+
+// Values for header->e_type.
+enum FileType {
+ ET_NONE = 0, // No file type
+ ET_REL = 1, // Relocatable file
+ ET_EXEC = 2, // Executable file
+ ET_DYN = 3, // Shared object file
+ ET_CORE = 4, // Core file
+ ET_LOPROC = 0xFF00, // Processor-specific
+ ET_HIPROC = 0xFFFF // Processor-specific
+};
+
+// Values for header->e_machine.
+enum MachineArchitecture {
+ EM_NONE = 0, // No machine.
+ EM_386 = 3, // Intel Architecture.
+ EM_ARM = 40, // ARM Architecture.
+ EM_X86_64 = 62, // Intel x86-64 Architecture.
+ EM_AARCH64 = 183, // ARM Architecture, 64-bit.
+ // Other values skipped.
+};
+
+// A section header in the section header table.
+struct Elf32_Shdr {
+ Elf32_Word sh_name;
+ Elf32_Word sh_type;
+ Elf32_Word sh_flags;
+ Elf32_Addr sh_addr;
+ Elf32_Off sh_offset;
+ Elf32_Word sh_size;
+ Elf32_Word sh_link;
+ Elf32_Word sh_info;
+ Elf32_Word sh_addralign;
+ Elf32_Word sh_entsize;
+};
+
+struct Elf64_Shdr {
+ Elf64_Word sh_name;
+ Elf64_Word sh_type;
+ Elf64_Xword sh_flags;
+ Elf64_Addr sh_addr;
+ Elf64_Off sh_offset;
+ Elf64_Xword sh_size;
+ Elf64_Word sh_link;
+ Elf64_Word sh_info;
+ Elf64_Xword sh_addralign;
+ Elf64_Xword sh_entsize;
+};
+
+// Values for the section type field in a section header.
+enum sh_type_values {
+ SHT_NULL = 0,
+ SHT_PROGBITS = 1,
+ SHT_SYMTAB = 2,
+ SHT_STRTAB = 3,
+ SHT_RELA = 4,
+ SHT_HASH = 5,
+ SHT_DYNAMIC = 6,
+ SHT_NOTE = 7,
+ SHT_NOBITS = 8,
+ SHT_REL = 9,
+ SHT_SHLIB = 10,
+ SHT_DYNSYM = 11,
+ SHT_INIT_ARRAY = 14,
+ SHT_FINI_ARRAY = 15,
+ SHT_LOPROC = 0x70000000,
+ SHT_HIPROC = 0x7FFFFFFF,
+ SHT_LOUSER = 0x80000000,
+ SHT_HIUSER = 0xFFFFFFFF
+};
+
+enum sh_flag_masks {
+ SHF_WRITE = 1 << 0,
+ SHF_ALLOC = 1 << 1,
+ SHF_EXECINSTR = 1 << 2,
+ // 1 << 3 is reserved.
+ SHF_MERGE = 1 << 4,
+ SHF_STRINGS = 1 << 5,
+ SHF_INFO_LINK = 1 << 6,
+ SHF_LINK_ORDER = 1 << 7,
+ SHF_OS_NONCONFORMING = 1 << 8,
+ SHF_GROUP = 1 << 9,
+ SHF_TLS = 1 << 10,
+ SHF_COMPRESSED = 1 << 11,
+};
+
+struct Elf32_Phdr {
+ Elf32_Word p_type;
+ Elf32_Off p_offset;
+ Elf32_Addr p_vaddr;
+ Elf32_Addr p_paddr;
+ Elf32_Word p_filesz;
+ Elf32_Word p_memsz;
+ Elf32_Word p_flags;
+ Elf32_Word p_align;
+};
+
+struct Elf64_Phdr {
+ Elf64_Word p_type;
+ Elf64_Word p_flags;
+ Elf64_Off p_offset;
+ Elf64_Addr p_vaddr;
+ Elf64_Addr p_paddr;
+ Elf64_Xword p_filesz;
+ Elf64_Xword p_memsz;
+ Elf64_Xword p_align;
+};
+
+// Values for the segment type field in a program segment header.
+enum ph_type_values {
+ PT_NULL = 0,
+ PT_LOAD = 1,
+ PT_DYNAMIC = 2,
+ PT_INTERP = 3,
+ PT_NOTE = 4,
+ PT_SHLIB = 5,
+ PT_PHDR = 6,
+ PT_LOPROC = 0x70000000,
+ PT_HIPROC = 0x7FFFFFFF
+};
+
+struct Elf32_Rel {
+ Elf32_Addr r_offset;
+ Elf32_Word r_info;
+};
+
+struct Elf64_Rel {
+ Elf64_Addr r_offset;
+ Elf64_Xword r_info;
+};
+
+struct Elf32_Rela {
+ Elf32_Addr r_offset;
+ Elf32_Word r_info;
+ Elf32_Sword r_addend;
+};
+
+struct Elf64_Rela {
+ Elf64_Addr r_offset;
+ Elf64_Xword r_info;
+ Elf64_Sxword r_addend;
+};
+
+enum elf32_rel_386_type_values {
+ R_386_NONE = 0,
+ R_386_32 = 1,
+ R_386_PC32 = 2,
+ R_386_GOT32 = 3,
+ R_386_PLT32 = 4,
+ R_386_COPY = 5,
+ R_386_GLOB_DAT = 6,
+ R_386_JMP_SLOT = 7,
+ R_386_RELATIVE = 8,
+ R_386_GOTOFF = 9,
+ R_386_GOTPC = 10,
+ R_386_TLS_TPOFF = 14,
+};
+
+enum elf32_rel_x86_64_type_values {
+ R_X86_64_NONE = 0,
+ R_X86_64_64 = 1,
+ R_X86_64_PC32 = 2,
+ R_X86_64_GOT32 = 3,
+ R_X86_64_PLT32 = 4,
+ R_X86_64_COPY = 5,
+ R_X86_64_GLOB_DAT = 6,
+ R_X86_64_JUMP_SLOT = 7,
+ R_X86_64_RELATIVE = 8,
+ R_X86_64_GOTPCREL = 9,
+ R_X86_64_32 = 10,
+ R_X86_64_32S = 11,
+ R_X86_64_16 = 12,
+ R_X86_64_PC16 = 13,
+ R_X86_64_8 = 14,
+ R_X86_64_PC8 = 15,
+};
+
+enum elf32_rel_arm_type_values {
+ R_ARM_RELATIVE = 23,
+};
+
+enum elf64_rel_aarch64_type_values {
+ R_AARCH64_GLOB_DAT = 0x401,
+ R_AARCH64_JUMP_SLOT = 0x402,
+ R_AARCH64_RELATIVE = 0x403,
+};
+
+#pragma pack(pop)
+
+} // namespace elf
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_TYPE_ELF_H_
diff --git a/type_win_pe.h b/type_win_pe.h
new file mode 100644
index 0000000..56996fe
--- /dev/null
+++ b/type_win_pe.h
@@ -0,0 +1,191 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_TYPE_WIN_PE_H_
+#define COMPONENTS_ZUCCHINI_TYPE_WIN_PE_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+namespace zucchini {
+
+// Structures and constants taken from WINNT.h and following identical layout.
+// This is used for parsing of Portable Executable (PE) file format.
+namespace pe {
+// Supported by MSVC, g++, and clang++. Ensures no gaps in packing.
+#pragma pack(push, 1)
+
+// IMAGE_NUMBEROF_DIRECTORY_ENTRIES
+constexpr size_t kImageNumberOfDirectoryEntries = 16;
+
+// IMAGE_FILE_BASE_RELOCATION_TABLE
+constexpr size_t kIndexOfBaseRelocationTable = 5;
+
+constexpr uint32_t kImageScnMemExecute = 0x20000000; // IMAGE_SCN_MEM_EXECUTE
+constexpr uint32_t kImageScnMemRead = 0x40000000; // IMAGE_SCN_MEM_READ
+
+// IMAGE_DOS_HEADER
+struct ImageDOSHeader {
+ uint16_t e_magic; // 0x00
+ uint16_t e_cblp;
+ uint16_t e_cp;
+ uint16_t e_crlc;
+ uint16_t e_cparhdr;
+ uint16_t e_minalloc;
+ uint16_t e_maxalloc;
+ uint16_t e_ss;
+ uint16_t e_sp; // 0x10
+ uint16_t e_csum;
+ uint16_t e_ip;
+ uint16_t e_cs;
+ uint16_t e_lfarlc;
+ uint16_t e_ovno;
+ uint16_t e_res[4];
+ uint16_t e_oemid; // 0x24
+ uint16_t e_oeminfo;
+ uint16_t e_res2[10];
+ uint32_t e_lfanew; // 0x3C
+};
+static_assert(sizeof(ImageDOSHeader) == 0x40,
+ "DOS header size should be 0x40 bytes");
+
+// IMAGE_SECTION_HEADER
+struct ImageSectionHeader {
+ char name[8];
+ uint32_t virtual_size;
+ uint32_t virtual_address;
+ uint32_t size_of_raw_data;
+ uint32_t file_offset_of_raw_data;
+ uint32_t pointer_to_relocations; // Always zero in an image.
+ uint32_t pointer_to_line_numbers; // Always zero in an image.
+ uint16_t number_of_relocations; // Always zero in an image.
+ uint16_t number_of_line_numbers; // Always zero in an image.
+ uint32_t characteristics;
+};
+static_assert(sizeof(ImageSectionHeader) == 0x28,
+ "Section header size should be 0x28 bytes");
+
+// IMAGE_DATA_DIRECTORY
+struct ImageDataDirectory {
+ uint32_t virtual_address;
+ uint32_t size;
+};
+static_assert(sizeof(ImageDataDirectory) == 0x08,
+ "Data directory size should be 0x08 bytes");
+
+// IMAGE_FILE_HEADER
+struct ImageFileHeader {
+ uint16_t machine;
+ uint16_t number_of_sections;
+ uint32_t time_date_stamp;
+ uint32_t pointer_to_symbol_table;
+ uint32_t number_of_symbols;
+ uint16_t size_of_optional_header;
+ uint16_t characteristics;
+};
+static_assert(sizeof(ImageFileHeader) == 0x14,
+ "File header size should be 0x14 bytes");
+
+// IMAGE_OPTIONAL_HEADER
+struct ImageOptionalHeader {
+ uint16_t magic; // 0x00: 0x10B
+ uint8_t major_linker_version;
+ uint8_t minor_linker_version;
+ uint32_t size_of_code;
+ uint32_t size_of_initialized_data;
+ uint32_t size_of_uninitialized_data;
+ uint32_t address_of_entry_point; // 0x10
+ uint32_t base_of_code;
+ uint32_t base_of_data;
+
+ uint32_t image_base;
+ uint32_t section_alignment; // 0x20
+ uint32_t file_alignment;
+ uint16_t major_operating_system_version;
+ uint16_t minor_operating_system_version;
+ uint16_t major_image_version;
+ uint16_t minor_image_version;
+ uint16_t major_subsystem_version; // 0x30
+ uint16_t minor_subsystem_version;
+ uint32_t win32_version_value;
+ uint32_t size_of_image;
+ uint32_t size_of_headers;
+ uint32_t check_sum; // 0x40
+ uint16_t subsystem;
+ uint16_t dll_characteristics;
+ uint32_t size_of_stack_reserve;
+ uint32_t size_of_stack_commit;
+ uint32_t size_of_heap_reserve; // 0x50
+ uint32_t size_of_heap_commit;
+ uint32_t loader_flags;
+ uint32_t number_of_rva_and_sizes;
+
+ // The number of elements is actually |number_of_rva_and_sizes|, so accesses
+ // to |data_directory| should be checked against the bound.
+ ImageDataDirectory data_directory[kImageNumberOfDirectoryEntries]; // 0x60
+ /* 0xE0 */
+};
+static_assert(sizeof(ImageOptionalHeader) == 0xE0,
+ "Optional header (32) size should be 0xE0 bytes");
+
+// IMAGE_OPTIONAL_HEADER64
+struct ImageOptionalHeader64 {
+ uint16_t magic; // 0x00: 0x20B
+ uint8_t major_linker_version;
+ uint8_t minor_linker_version;
+ uint32_t size_of_code;
+ uint32_t size_of_initialized_data;
+ uint32_t size_of_uninitialized_data;
+ uint32_t address_of_entry_point; // 0x10
+ uint32_t base_of_code;
+
+ uint64_t image_base;
+ uint32_t section_alignment; // 0x20
+ uint32_t file_alignment;
+ uint16_t major_operating_system_version;
+ uint16_t minor_operating_system_version;
+ uint16_t major_image_version;
+ uint16_t minor_image_version;
+ uint16_t major_subsystem_version; // 0x30
+ uint16_t minor_subsystem_version;
+ uint32_t win32_version_value;
+ uint32_t size_of_image;
+ uint32_t size_of_headers;
+ uint32_t check_sum; // 0x40
+ uint16_t subsystem;
+ uint16_t dll_characteristics;
+ uint64_t size_of_stack_reserve;
+ uint64_t size_of_stack_commit; // 0x50
+ uint64_t size_of_heap_reserve;
+ uint64_t size_of_heap_commit; // 0x60
+ uint32_t loader_flags;
+ uint32_t number_of_rva_and_sizes;
+ ImageDataDirectory data_directory[kImageNumberOfDirectoryEntries]; // 0x70
+ /* 0xF0 */
+};
+static_assert(sizeof(ImageOptionalHeader64) == 0xF0,
+ "Optional header (64) size should be 0xF0 bytes");
+
+struct RelocHeader {
+ uint32_t rva_hi;
+ uint32_t size;
+};
+static_assert(sizeof(RelocHeader) == 8, "RelocHeader size should be 8 bytes");
+
+#pragma pack(pop)
+
+} // namespace pe
+
+// Constants and offsets gleaned from WINNT.h and various articles on the
+// format of Windows PE executables.
+
+constexpr char const* kTextSectionName = ".text";
+
+// Bitfield with characteristics usually associated with code sections.
+const uint32_t kCodeCharacteristics =
+ pe::kImageScnMemExecute | pe::kImageScnMemRead;
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_TYPE_WIN_PE_H_
diff --git a/type_ztf.h b/type_ztf.h
new file mode 100644
index 0000000..8ecc9ca
--- /dev/null
+++ b/type_ztf.h
@@ -0,0 +1,54 @@
+// Copyright 2018 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_TYPE_ZTF_H_
+#define COMPONENTS_ZUCCHINI_TYPE_ZTF_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+namespace zucchini {
+
+namespace ztf {
+
+typedef int16_t dim_t;
+
+// A exclusive upper bound on number of lines and/or columns. Throughout the ZTF
+// code a dimension (dim) refers to a block of 1-3 digits which contain a line
+// or column number.
+enum : size_t { kMaxDimValue = 1000 };
+
+enum SignChar : uint8_t {
+ kMinus = '-',
+ kPlus = '+',
+};
+
+// Lines and columns are 1-based to follow the convention of most modern text
+// editing software. |line| and |col| should be positive, but int16_t is used to
+// limit ranges such that it matches DeltaLineCol.
+struct LineCol {
+ dim_t line;
+ dim_t col;
+};
+
+struct DeltaLineCol {
+ dim_t line;
+ dim_t col;
+};
+
+constexpr DeltaLineCol operator-(const LineCol& lhs, const LineCol& rhs) {
+ return DeltaLineCol{static_cast<dim_t>(lhs.line - rhs.line),
+ static_cast<dim_t>(lhs.col - rhs.col)};
+}
+
+constexpr LineCol operator+(const LineCol& lhs, const DeltaLineCol& rhs) {
+ return LineCol{static_cast<dim_t>(lhs.line + rhs.line),
+ static_cast<dim_t>(lhs.col + rhs.col)};
+}
+
+} // namespace ztf
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_TYPE_ZTF_H_
diff --git a/typed_value.h b/typed_value.h
new file mode 100644
index 0000000..868397c
--- /dev/null
+++ b/typed_value.h
@@ -0,0 +1,57 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_TYPED_VALUE_H_
+#define COMPONENTS_ZUCCHINI_TYPED_VALUE_H_
+
+#include <ostream>
+
+namespace zucchini {
+
+// Strong typed values, with compare and convert functions for underlying data.
+// Typically one would use strongly typed enums for this. However, for Zucchini,
+// the number of bytes is not fixed, and must be represented as an integer for
+// iteration.
+// |Tag| is a type tag used to uniquely identify TypedValue.
+// |T| is an integral type used to hold values.
+// Example:
+// struct Foo : TypedValue<Foo, int> {
+// using Foo::TypedValue::TypedValue; // inheriting constructor.
+// };
+// Foo will be used to hold values of type |int|, but with a distinct type from
+// any other TypedValue.
+template <class Tag, class T>
+class TypedValue {
+ public:
+ constexpr TypedValue() = default;
+ explicit constexpr TypedValue(const T& value) : value_(value) {}
+
+ explicit operator T() const { return value_; }
+ const T value() const { return value_; }
+
+ friend bool operator==(const TypedValue& a, const TypedValue& b) {
+ return a.value_ == b.value_;
+ }
+ friend bool operator!=(const TypedValue& a, const TypedValue& b) {
+ return !(a == b);
+ }
+ friend bool operator<(const TypedValue& a, const TypedValue& b) {
+ return a.value_ < b.value_;
+ }
+ friend bool operator>(const TypedValue& a, const TypedValue& b) {
+ return b < a;
+ }
+
+ private:
+ T value_ = {};
+};
+
+template <class Tag, class T>
+std::ostream& operator<<(std::ostream& os, const TypedValue<Tag, T>& tag) {
+ return os << tag.value();
+}
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_TYPED_VALUE_H_
diff --git a/typed_value_unittest.cc b/typed_value_unittest.cc
new file mode 100644
index 0000000..bc0d4f1
--- /dev/null
+++ b/typed_value_unittest.cc
@@ -0,0 +1,40 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/typed_value.h"
+
+#include <type_traits>
+
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+struct ValueA : TypedValue<ValueA, int> {
+ using ValueA::TypedValue::TypedValue;
+};
+
+struct ValueB : TypedValue<ValueB, int> {
+ using ValueB::TypedValue::TypedValue;
+};
+
+TEST(TypedIdTest, Value) {
+ EXPECT_EQ(42, ValueA(42).value());
+ EXPECT_EQ(42, static_cast<int>(ValueA(42))); // explicit cast
+}
+
+TEST(TypedIdTest, Comparison) {
+ EXPECT_TRUE(ValueA(0) == ValueA(0));
+ EXPECT_FALSE(ValueA(0) == ValueA(42));
+ EXPECT_FALSE(ValueA(0) != ValueA(0));
+ EXPECT_TRUE(ValueA(0) != ValueA(42));
+}
+
+TEST(TypedIdTest, StrongType) {
+ static_assert(!std::is_convertible<ValueA, ValueB>::value,
+ "ValueA should not be convertible to ValueB");
+ static_assert(!std::is_convertible<ValueB, ValueA>::value,
+ "ValueB should not be convertible to ValueA");
+}
+
+} // namespace zucchini
diff --git a/zucchini.h b/zucchini.h
new file mode 100644
index 0000000..9847440
--- /dev/null
+++ b/zucchini.h
@@ -0,0 +1,72 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_ZUCCHINI_H_
+#define COMPONENTS_ZUCCHINI_ZUCCHINI_H_
+
+#include <string>
+
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/patch_reader.h"
+#include "components/zucchini/patch_writer.h"
+
+// Core Zucchini library, consisting of:
+// - Global constants.
+// - Patch gen and apply functions, where "old" and "new" data are represented
+// as buffers, and patch data represented as EnsemblePatchWriter or
+// EnsemblePatchReader.
+
+namespace zucchini {
+
+namespace status {
+
+// Zucchini status code, which can also be used as process exit code. Therefore
+// success is explicitly 0.
+enum Code {
+ kStatusSuccess = 0,
+ kStatusInvalidParam = 1,
+ kStatusFileReadError = 2,
+ kStatusFileWriteError = 3,
+ kStatusPatchReadError = 4,
+ kStatusPatchWriteError = 5,
+ kStatusInvalidOldImage = 6,
+ kStatusInvalidNewImage = 7,
+ kStatusFatal = 8,
+};
+
+} // namespace status
+
+// Generates ensemble patch from |old_image| to |new_image| using the default
+// element detection and matching heuristics, writes the results to
+// |patch_writer|, and returns a status::Code.
+status::Code GenerateBuffer(ConstBufferView old_image,
+ ConstBufferView new_image,
+ EnsemblePatchWriter* patch_writer);
+
+// Same as GenerateEnsemble(), but if |imposed_matches| is non-empty, then
+// overrides default element detection and matching heuristics with custom
+// element matching encoded in |imposed_matches|, which should be formatted as:
+// "#+#=#+#,#+#=#+#,..." (e.g., "1+2=3+4", "1+2=3+4,5+6=7+8"),
+// where "#+#=#+#" encodes a match as 4 unsigned integers:
+// [offset in "old", size in "old", offset in "new", size in "new"].
+status::Code GenerateBufferImposed(ConstBufferView old_image,
+ ConstBufferView new_image,
+ std::string imposed_matches,
+ EnsemblePatchWriter* patch_writer);
+
+// Generates raw patch from |old_image| to |new_image|, and writes it to
+// |patch_writer|.
+status::Code GenerateBufferRaw(ConstBufferView old_image,
+ ConstBufferView new_image,
+ EnsemblePatchWriter* patch_writer);
+
+// Applies |patch_reader| to |old_image| to build |new_image|, which refers to
+// preallocated memory of sufficient size.
+status::Code ApplyBuffer(ConstBufferView old_image,
+ const EnsemblePatchReader& patch_reader,
+ MutableBufferView new_image);
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_ZUCCHINI_H_
diff --git a/zucchini_apply.cc b/zucchini_apply.cc
new file mode 100644
index 0000000..10c5638
--- /dev/null
+++ b/zucchini_apply.cc
@@ -0,0 +1,217 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/zucchini_apply.h"
+
+#include <algorithm>
+#include <map>
+#include <memory>
+#include <utility>
+
+#include "base/logging.h"
+#include "base/numerics/safe_conversions.h"
+#include "components/zucchini/disassembler.h"
+#include "components/zucchini/element_detection.h"
+#include "components/zucchini/equivalence_map.h"
+#include "components/zucchini/image_index.h"
+
+namespace zucchini {
+
+bool ApplyEquivalenceAndExtraData(ConstBufferView old_image,
+ const PatchElementReader& patch_reader,
+ MutableBufferView new_image) {
+ EquivalenceSource equiv_source = patch_reader.GetEquivalenceSource();
+ ExtraDataSource extra_data_source = patch_reader.GetExtraDataSource();
+ MutableBufferView::iterator dst_it = new_image.begin();
+
+ for (auto equivalence = equiv_source.GetNext(); equivalence.has_value();
+ equivalence = equiv_source.GetNext()) {
+ MutableBufferView::iterator next_dst_it =
+ new_image.begin() + equivalence->dst_offset;
+ CHECK(next_dst_it >= dst_it);
+
+ offset_t gap = static_cast<offset_t>(next_dst_it - dst_it);
+ absl::optional<ConstBufferView> extra_data = extra_data_source.GetNext(gap);
+ if (!extra_data) {
+ LOG(ERROR) << "Error reading extra_data";
+ return false;
+ }
+ // |extra_data| length is based on what was parsed from the patch so this
+ // copy should be valid.
+ dst_it = std::copy(extra_data->begin(), extra_data->end(), dst_it);
+ CHECK_EQ(dst_it, next_dst_it);
+ dst_it = std::copy_n(old_image.begin() + equivalence->src_offset,
+ equivalence->length, dst_it);
+ CHECK_EQ(dst_it, next_dst_it + equivalence->length);
+ }
+ offset_t gap = static_cast<offset_t>(new_image.end() - dst_it);
+ absl::optional<ConstBufferView> extra_data = extra_data_source.GetNext(gap);
+ if (!extra_data) {
+ LOG(ERROR) << "Error reading extra_data";
+ return false;
+ }
+ std::copy(extra_data->begin(), extra_data->end(), dst_it);
+ if (!equiv_source.Done() || !extra_data_source.Done()) {
+ LOG(ERROR) << "Found trailing equivalence and extra_data";
+ return false;
+ }
+ return true;
+}
+
+bool ApplyRawDelta(const PatchElementReader& patch_reader,
+ MutableBufferView new_image) {
+ EquivalenceSource equiv_source = patch_reader.GetEquivalenceSource();
+ RawDeltaSource raw_delta_source = patch_reader.GetRawDeltaSource();
+ // Traverse |equiv_source| and |raw_delta_source| in lockstep.
+ auto equivalence = equiv_source.GetNext();
+ offset_t base_copy_offset = 0;
+ for (auto delta = raw_delta_source.GetNext(); delta.has_value();
+ delta = raw_delta_source.GetNext()) {
+ while (equivalence.has_value() &&
+ base_copy_offset + equivalence->length <= delta->copy_offset) {
+ base_copy_offset += equivalence->length;
+ equivalence = equiv_source.GetNext();
+ }
+ if (!equivalence.has_value()) {
+ LOG(ERROR) << "Error reading equivalences";
+ return false;
+ }
+ CHECK_GE(delta->copy_offset, base_copy_offset);
+ CHECK_LT(delta->copy_offset, base_copy_offset + equivalence->length);
+
+ // Invert byte diff.
+ new_image[equivalence->dst_offset - base_copy_offset +
+ delta->copy_offset] += delta->diff;
+ }
+ if (!raw_delta_source.Done()) {
+ LOG(ERROR) << "Found trailing raw_delta";
+ return false;
+ }
+ return true;
+}
+
+bool ApplyReferencesCorrection(ExecutableType exe_type,
+ ConstBufferView old_image,
+ const PatchElementReader& patch,
+ MutableBufferView new_image) {
+ auto old_disasm = MakeDisassemblerOfType(old_image, exe_type);
+ auto new_disasm =
+ MakeDisassemblerOfType(ConstBufferView(new_image), exe_type);
+ if (!old_disasm || !new_disasm) {
+ LOG(ERROR) << "Failed to create Disassembler";
+ return false;
+ }
+ if (old_disasm->size() != old_image.size() ||
+ new_disasm->size() != new_image.size()) {
+ LOG(ERROR) << "Disassembler and element size mismatch";
+ return false;
+ }
+
+ ReferenceDeltaSource ref_delta_source = patch.GetReferenceDeltaSource();
+ std::map<PoolTag, std::vector<ReferenceGroup>> pool_groups;
+ for (const auto& ref_group : old_disasm->MakeReferenceGroups())
+ pool_groups[ref_group.pool_tag()].push_back(ref_group);
+
+ OffsetMapper offset_mapper(patch.GetEquivalenceSource(),
+ base::checked_cast<offset_t>(old_image.size()),
+ base::checked_cast<offset_t>(new_image.size()));
+
+ std::vector<ReferenceGroup> new_groups = new_disasm->MakeReferenceGroups();
+ for (const auto& pool_and_sub_groups : pool_groups) {
+ PoolTag pool_tag = pool_and_sub_groups.first;
+ const std::vector<ReferenceGroup>& sub_groups = pool_and_sub_groups.second;
+
+ TargetPool targets;
+ // Load "old" targets, then filter and map them to "new" targets.
+ for (ReferenceGroup group : sub_groups)
+ targets.InsertTargets(std::move(*group.GetReader(old_disasm.get())));
+ targets.FilterAndProject(offset_mapper);
+
+ // Load extra targets from patch.
+ TargetSource target_source = patch.GetExtraTargetSource(pool_tag);
+ targets.InsertTargets(&target_source);
+ if (!target_source.Done()) {
+ LOG(ERROR) << "Found trailing extra_targets";
+ return false;
+ }
+
+ // Correct all new references, and write results to |new_disasm|.
+ for (ReferenceGroup group : sub_groups) {
+ std::unique_ptr<ReferenceWriter> ref_writer =
+ new_groups[group.type_tag().value()].GetWriter(new_image,
+ new_disasm.get());
+
+ EquivalenceSource equiv_source = patch.GetEquivalenceSource();
+ for (auto equivalence = equiv_source.GetNext(); equivalence.has_value();
+ equivalence = equiv_source.GetNext()) {
+ std::unique_ptr<ReferenceReader> ref_gen = group.GetReader(
+ equivalence->src_offset, equivalence->src_end(), old_disasm.get());
+ for (auto ref = ref_gen->GetNext(); ref.has_value();
+ ref = ref_gen->GetNext()) {
+ DCHECK_GE(ref->location, equivalence->src_offset);
+ DCHECK_LT(ref->location, equivalence->src_end());
+
+ offset_t projected_target =
+ offset_mapper.ExtendedForwardProject(ref->target);
+ offset_t expected_key = targets.KeyForNearestOffset(projected_target);
+ auto delta = ref_delta_source.GetNext();
+ if (!delta.has_value()) {
+ LOG(ERROR) << "Error reading reference_delta";
+ return false;
+ }
+ const key_t key = expected_key + delta.value();
+ if (!targets.KeyIsValid(key)) {
+ LOG(ERROR) << "Invalid reference_delta";
+ return false;
+ }
+ ref->target = targets.OffsetForKey(expected_key + delta.value());
+ ref->location =
+ ref->location - equivalence->src_offset + equivalence->dst_offset;
+ ref_writer->PutNext(*ref);
+ }
+ }
+ }
+ }
+ if (!ref_delta_source.Done()) {
+ LOG(ERROR) << "Found trailing ref_delta_source";
+ return false;
+ }
+ return true;
+}
+
+bool ApplyElement(ExecutableType exe_type,
+ ConstBufferView old_image,
+ const PatchElementReader& patch_reader,
+ MutableBufferView new_image) {
+ return ApplyEquivalenceAndExtraData(old_image, patch_reader, new_image) &&
+ ApplyRawDelta(patch_reader, new_image) &&
+ ApplyReferencesCorrection(exe_type, old_image, patch_reader,
+ new_image);
+}
+
+/******** Exported Functions ********/
+
+status::Code ApplyBuffer(ConstBufferView old_image,
+ const EnsemblePatchReader& patch_reader,
+ MutableBufferView new_image) {
+ if (!patch_reader.CheckOldFile(old_image)) {
+ LOG(ERROR) << "Invalid old_image.";
+ return status::kStatusInvalidOldImage;
+ }
+
+ for (const auto& element_patch : patch_reader.elements()) {
+ ElementMatch match = element_patch.element_match();
+ if (!ApplyElement(match.exe_type(), old_image[match.old_element.region()],
+ element_patch, new_image[match.new_element.region()]))
+ return status::kStatusFatal;
+ }
+
+ if (!patch_reader.CheckNewFile(ConstBufferView(new_image))) {
+ LOG(ERROR) << "Invalid new_image.";
+ return status::kStatusInvalidNewImage;
+ }
+ return status::kStatusSuccess;
+}
+
+} // namespace zucchini
diff --git a/zucchini_apply.h b/zucchini_apply.h
new file mode 100644
index 0000000..abab384
--- /dev/null
+++ b/zucchini_apply.h
@@ -0,0 +1,41 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_ZUCCHINI_APPLY_H_
+#define COMPONENTS_ZUCCHINI_ZUCCHINI_APPLY_H_
+
+#include "components/zucchini/image_utils.h"
+#include "components/zucchini/patch_reader.h"
+#include "components/zucchini/zucchini.h"
+
+namespace zucchini {
+
+// Reads equivalences from |patch_reader| to form preliminary |new_image|,
+// copying regions from |old_image| and writing extra data from |patch_reader|.
+bool ApplyEquivalenceAndExtraData(ConstBufferView old_image,
+ const PatchElementReader& patch_reader,
+ MutableBufferView new_image);
+
+// Reads raw delta from |patch_reader| and applies corrections to |new_image|.
+bool ApplyRawDelta(const PatchElementReader& patch_reader,
+ MutableBufferView new_image);
+
+// Corrects references in |new_image| by projecting references from |old_image|
+// and applying corrections from |patch_reader|. Both |old_image| and
+// |new_image| are matching elements associated with |exe_type|.
+bool ApplyReferencesCorrection(ExecutableType exe_type,
+ ConstBufferView old_image,
+ const PatchElementReader& patch_reader,
+ MutableBufferView new_image);
+
+// Applies patch element with type |exe_type| from |patch_reader| on |old_image|
+// to produce |new_image|.
+bool ApplyElement(ExecutableType exe_type,
+ ConstBufferView old_image,
+ const PatchElementReader& patch_reader,
+ MutableBufferView new_image);
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_ZUCCHINI_APPLY_H_
diff --git a/zucchini_apply_unittest.cc b/zucchini_apply_unittest.cc
new file mode 100644
index 0000000..f1cb853
--- /dev/null
+++ b/zucchini_apply_unittest.cc
@@ -0,0 +1,14 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/zucchini_apply.h"
+
+#include "components/zucchini/image_utils.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+// TODO(huangs): Add tests.
+
+} // namespace zucchini
diff --git a/zucchini_commands.cc b/zucchini_commands.cc
new file mode 100644
index 0000000..93929bd
--- /dev/null
+++ b/zucchini_commands.cc
@@ -0,0 +1,141 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/zucchini_commands.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <ostream>
+#include <string>
+#include <utility>
+
+#include "base/command_line.h"
+#include "base/files/file.h"
+#include "base/files/file_path.h"
+#include "base/files/memory_mapped_file.h"
+#include "base/logging.h"
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/crc32.h"
+#include "components/zucchini/io_utils.h"
+#include "components/zucchini/mapped_file.h"
+#include "components/zucchini/patch_writer.h"
+#include "components/zucchini/zucchini_integration.h"
+#include "components/zucchini/zucchini_tools.h"
+
+namespace {
+
+/******** Command-line Switches ********/
+
+constexpr char kSwitchDump[] = "dump";
+constexpr char kSwitchImpose[] = "impose";
+constexpr char kSwitchKeep[] = "keep";
+constexpr char kSwitchRaw[] = "raw";
+
+} // namespace
+
+zucchini::status::Code MainGen(MainParams params) {
+ CHECK_EQ(3U, params.file_paths.size());
+ return zucchini::Generate(
+ params.file_paths[0], params.file_paths[1], params.file_paths[2],
+ params.command_line.HasSwitch(kSwitchKeep),
+ params.command_line.HasSwitch(kSwitchRaw),
+ params.command_line.GetSwitchValueASCII(kSwitchImpose));
+}
+
+zucchini::status::Code MainApply(MainParams params) {
+ CHECK_EQ(3U, params.file_paths.size());
+ return zucchini::Apply(params.file_paths[0], params.file_paths[1],
+ params.file_paths[2],
+ params.command_line.HasSwitch(kSwitchKeep));
+}
+
+zucchini::status::Code MainRead(MainParams params) {
+ CHECK_EQ(1U, params.file_paths.size());
+ base::File input_file(params.file_paths[0],
+ base::File::FLAG_OPEN | base::File::FLAG_READ |
+ base::File::FLAG_SHARE_DELETE);
+ zucchini::MappedFileReader input(std::move(input_file));
+ if (input.HasError()) {
+ LOG(ERROR) << "Error with file " << params.file_paths[0].value() << ": "
+ << input.error();
+ return zucchini::status::kStatusFileReadError;
+ }
+
+ bool do_dump = params.command_line.HasSwitch(kSwitchDump);
+ zucchini::status::Code status = zucchini::ReadReferences(
+ {input.data(), input.length()}, do_dump, params.out);
+ if (status != zucchini::status::kStatusSuccess)
+ params.err << "Fatal error found when dumping references." << std::endl;
+ return status;
+}
+
+zucchini::status::Code MainDetect(MainParams params) {
+ CHECK_EQ(1U, params.file_paths.size());
+ base::File input_file(params.file_paths[0],
+ base::File::FLAG_OPEN | base::File::FLAG_READ |
+ base::File::FLAG_SHARE_DELETE);
+ zucchini::MappedFileReader input(std::move(input_file));
+ if (input.HasError()) {
+ LOG(ERROR) << "Error with file " << params.file_paths[0].value() << ": "
+ << input.error();
+ return zucchini::status::kStatusFileReadError;
+ }
+
+ std::vector<zucchini::ConstBufferView> sub_image_list;
+ zucchini::status::Code result = zucchini::DetectAll(
+ {input.data(), input.length()}, params.out, &sub_image_list);
+ if (result != zucchini::status::kStatusSuccess)
+ params.err << "Fatal error found when detecting executables." << std::endl;
+ return result;
+}
+
+zucchini::status::Code MainMatch(MainParams params) {
+ CHECK_EQ(2U, params.file_paths.size());
+ using base::File;
+ File old_file(params.file_paths[0], File::FLAG_OPEN | File::FLAG_READ |
+ base::File::FLAG_SHARE_DELETE);
+ zucchini::MappedFileReader old_image(std::move(old_file));
+ if (old_image.HasError()) {
+ LOG(ERROR) << "Error with file " << params.file_paths[0].value() << ": "
+ << old_image.error();
+ return zucchini::status::kStatusFileReadError;
+ }
+ File new_file(params.file_paths[1], File::FLAG_OPEN | File::FLAG_READ |
+ base::File::FLAG_SHARE_DELETE);
+ zucchini::MappedFileReader new_image(std::move(new_file));
+ if (new_image.HasError()) {
+ LOG(ERROR) << "Error with file " << params.file_paths[1].value() << ": "
+ << new_image.error();
+ return zucchini::status::kStatusFileReadError;
+ }
+
+ std::string imposed_matches =
+ params.command_line.GetSwitchValueASCII(kSwitchImpose);
+ zucchini::status::Code status =
+ zucchini::MatchAll({old_image.data(), old_image.length()},
+ {new_image.data(), new_image.length()},
+ std::move(imposed_matches), params.out);
+ if (status != zucchini::status::kStatusSuccess)
+ params.err << "Fatal error found when matching executables." << std::endl;
+ return status;
+}
+
+zucchini::status::Code MainCrc32(MainParams params) {
+ CHECK_EQ(1U, params.file_paths.size());
+ base::File image_file(params.file_paths[0],
+ base::File::FLAG_OPEN | base::File::FLAG_READ |
+ base::File::FLAG_SHARE_DELETE);
+ zucchini::MappedFileReader image(std::move(image_file));
+ if (image.HasError()) {
+ LOG(ERROR) << "Error with file " << params.file_paths[0].value() << ": "
+ << image.error();
+ return zucchini::status::kStatusFileReadError;
+ }
+
+ uint32_t crc =
+ zucchini::CalculateCrc32(image.data(), image.data() + image.length());
+ params.out << "CRC32: " << zucchini::AsHex<8>(crc) << std::endl;
+ return zucchini::status::kStatusSuccess;
+}
diff --git a/zucchini_commands.h b/zucchini_commands.h
new file mode 100644
index 0000000..cef18dc
--- /dev/null
+++ b/zucchini_commands.h
@@ -0,0 +1,51 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_ZUCCHINI_COMMANDS_H_
+#define COMPONENTS_ZUCCHINI_ZUCCHINI_COMMANDS_H_
+
+#include <iosfwd>
+#include <vector>
+
+#include "base/files/file_path.h"
+#include "components/zucchini/zucchini.h"
+
+// Zucchini commands and tools that can be invoked from command-line.
+
+namespace base {
+
+class CommandLine;
+
+} // namespace base
+
+// Aggregated parameter for Main*() functions, to simplify interface.
+struct MainParams {
+ const base::CommandLine& command_line;
+ const std::vector<base::FilePath>& file_paths;
+ std::ostream& out;
+ std::ostream& err;
+};
+
+// Signature of a Zucchini Command Function.
+using CommandFunction = zucchini::status::Code (*)(MainParams);
+
+// Command Function: Patch generation.
+zucchini::status::Code MainGen(MainParams params);
+
+// Command Function: Patch application.
+zucchini::status::Code MainApply(MainParams params);
+
+// Command Function: Read and dump references from an executable.
+zucchini::status::Code MainRead(MainParams params);
+
+// Command Function: Scan an archive file and detect executables.
+zucchini::status::Code MainDetect(MainParams params);
+
+// Command Function: Scan two archive files and match detected executables.
+zucchini::status::Code MainMatch(MainParams params);
+
+// Command Function: Compute CRC-32 of a file.
+zucchini::status::Code MainCrc32(MainParams params);
+
+#endif // COMPONENTS_ZUCCHINI_ZUCCHINI_COMMANDS_H_
diff --git a/zucchini_exe_version.rc.version b/zucchini_exe_version.rc.version
new file mode 100644
index 0000000..9d46a4b
--- /dev/null
+++ b/zucchini_exe_version.rc.version
@@ -0,0 +1,46 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <verrsrc.h>
+
+/////////////////////////////////////////////////////////////////////////////
+//
+// Version
+//
+
+VS_VERSION_INFO VERSIONINFO
+ FILEVERSION @MAJOR@,@MINOR@,@BUILD@,@PATCH@
+ PRODUCTVERSION @MAJOR@,@MINOR@,@BUILD@,@PATCH@
+ FILEFLAGSMASK 0x17L
+#ifdef _DEBUG
+ FILEFLAGS 0x1L
+#else
+ FILEFLAGS 0x0L
+#endif
+ FILEOS 0x4L
+ FILETYPE 0x1L
+ FILESUBTYPE 0x0L
+BEGIN
+ BLOCK "StringFileInfo"
+ BEGIN
+ BLOCK "040904b0"
+ BEGIN
+ VALUE "CompanyName", "@COMPANY_FULLNAME@"
+ VALUE "FileDescription", "Zucchini"
+ VALUE "FileVersion", "@MAJOR@.@MINOR@.@BUILD@.@PATCH@"
+ VALUE "InternalName", "zucchini"
+ VALUE "LegalCopyright", "@COPYRIGHT@"
+ VALUE "ProductName", "Zucchini"
+ VALUE "ProductVersion", "@MAJOR@.@MINOR@.@BUILD@.@PATCH@"
+ VALUE "CompanyShortName", "@COMPANY_SHORTNAME@"
+ VALUE "ProductShortName", "Zucchini"
+ VALUE "LastChange", "@LASTCHANGE@"
+ VALUE "Official Build", "@OFFICIAL_BUILD@"
+ END
+ END
+ BLOCK "VarFileInfo"
+ BEGIN
+ VALUE "Translation", 0x409, 1200
+ END
+END
diff --git a/zucchini_gen.cc b/zucchini_gen.cc
new file mode 100644
index 0000000..3735d0f
--- /dev/null
+++ b/zucchini_gen.cc
@@ -0,0 +1,461 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/zucchini_gen.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <map>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "base/logging.h"
+#include "base/numerics/safe_conversions.h"
+#include "components/zucchini/disassembler.h"
+#include "components/zucchini/element_detection.h"
+#include "components/zucchini/encoded_view.h"
+#include "components/zucchini/ensemble_matcher.h"
+#include "components/zucchini/equivalence_map.h"
+#include "components/zucchini/heuristic_ensemble_matcher.h"
+#include "components/zucchini/image_index.h"
+#include "components/zucchini/imposed_ensemble_matcher.h"
+#include "components/zucchini/patch_writer.h"
+#include "components/zucchini/reference_bytes_mixer.h"
+#include "components/zucchini/suffix_array.h"
+#include "components/zucchini/targets_affinity.h"
+
+namespace zucchini {
+
+namespace {
+
+// Parameters for patch generation.
+constexpr double kMinEquivalenceSimilarity = 12.0;
+constexpr double kMinLabelAffinity = 64.0;
+
+} // namespace
+
+std::vector<offset_t> FindExtraTargets(const TargetPool& projected_old_targets,
+ const TargetPool& new_targets) {
+ std::vector<offset_t> extra_targets;
+ std::set_difference(
+ new_targets.begin(), new_targets.end(), projected_old_targets.begin(),
+ projected_old_targets.end(), std::back_inserter(extra_targets));
+ return extra_targets;
+}
+
+// Label matching (between "old" and "new") can guide EquivalenceMap
+// construction; but EquivalenceMap induces Label matching. This apparent "chick
+// and egg" problem is solved by alternating 2 steps |num_iterations| times:
+// - Associate targets based on previous EquivalenceMap. Note on the first
+// iteration, EquivalenceMap is empty, resulting in a no-op.
+// - Construct refined EquivalenceMap based on new targets associations.
+EquivalenceMap CreateEquivalenceMap(const ImageIndex& old_image_index,
+ const ImageIndex& new_image_index,
+ int num_iterations) {
+ size_t pool_count = old_image_index.PoolCount();
+ // |target_affinities| is outside the loop to reduce allocation.
+ std::vector<TargetsAffinity> target_affinities(pool_count);
+
+ EquivalenceMap equivalence_map;
+ for (int i = 0; i < num_iterations; ++i) {
+ EncodedView old_view(old_image_index);
+ EncodedView new_view(new_image_index);
+
+ // Associate targets from "old" to "new" image based on |equivalence_map|
+ // for each reference pool.
+ for (const auto& old_pool_tag_and_targets :
+ old_image_index.target_pools()) {
+ PoolTag pool_tag = old_pool_tag_and_targets.first;
+ target_affinities[pool_tag.value()].InferFromSimilarities(
+ equivalence_map, old_pool_tag_and_targets.second.targets(),
+ new_image_index.pool(pool_tag).targets());
+
+ // Creates labels for strongly associated targets.
+ std::vector<uint32_t> old_labels;
+ std::vector<uint32_t> new_labels;
+ size_t label_bound = target_affinities[pool_tag.value()].AssignLabels(
+ kMinLabelAffinity, &old_labels, &new_labels);
+ old_view.SetLabels(pool_tag, std::move(old_labels), label_bound);
+ new_view.SetLabels(pool_tag, std::move(new_labels), label_bound);
+ }
+ // Build equivalence map, where references in "old" and "new" that share
+ // common semantics (i.e., their respective targets were associated earlier
+ // on) are considered equivalent.
+ equivalence_map.Build(
+ MakeSuffixArray<InducedSuffixSort>(old_view, old_view.Cardinality()),
+ old_view, new_view, target_affinities, kMinEquivalenceSimilarity);
+ }
+
+ return equivalence_map;
+}
+
+bool GenerateEquivalencesAndExtraData(ConstBufferView new_image,
+ const EquivalenceMap& equivalence_map,
+ PatchElementWriter* patch_writer) {
+ // Make 2 passes through |equivalence_map| to reduce write churn.
+ // Pass 1: Write all equivalences.
+ EquivalenceSink equivalences_sink;
+ for (const EquivalenceCandidate& candidate : equivalence_map)
+ equivalences_sink.PutNext(candidate.eq);
+ patch_writer->SetEquivalenceSink(std::move(equivalences_sink));
+
+ // Pass 2: Write data in gaps in |new_image| before / between after
+ // |equivalence_map| as "extra data".
+ ExtraDataSink extra_data_sink;
+ offset_t dst_offset = 0;
+ for (const EquivalenceCandidate& candidate : equivalence_map) {
+ extra_data_sink.PutNext(
+ new_image[{dst_offset, candidate.eq.dst_offset - dst_offset}]);
+ dst_offset = candidate.eq.dst_end();
+ DCHECK_LE(dst_offset, new_image.size());
+ }
+ extra_data_sink.PutNext(
+ new_image[{dst_offset, new_image.size() - dst_offset}]);
+ patch_writer->SetExtraDataSink(std::move(extra_data_sink));
+ return true;
+}
+
+bool GenerateRawDelta(ConstBufferView old_image,
+ ConstBufferView new_image,
+ const EquivalenceMap& equivalence_map,
+ const ImageIndex& new_image_index,
+ ReferenceBytesMixer* reference_bytes_mixer,
+ PatchElementWriter* patch_writer) {
+ RawDeltaSink raw_delta_sink;
+
+ // Visit |equivalence_map| blocks in |new_image| order. Find and emit all
+ // bytewise differences.
+ offset_t base_copy_offset = 0;
+ for (const EquivalenceCandidate& candidate : equivalence_map) {
+ Equivalence equivalence = candidate.eq;
+ // For each bytewise delta from |old_image| to |new_image|, compute "copy
+ // offset" and pass it along with delta to the sink.
+ for (offset_t i = 0; i < equivalence.length;) {
+ if (new_image_index.IsReference(equivalence.dst_offset + i)) {
+ DCHECK(new_image_index.IsToken(equivalence.dst_offset + i));
+ TypeTag type_tag =
+ new_image_index.LookupType(equivalence.dst_offset + i);
+
+ // Reference delta has its own flow. On some architectures (e.g., x86)
+ // this does not involve raw delta, so we skip. On other architectures
+ // (e.g., ARM) references are mixed with other bits that may change, so
+ // we need to "mix" data and store some changed bits into raw delta.
+ int num_bytes = reference_bytes_mixer->NumBytes(type_tag.value());
+ if (num_bytes) {
+ ConstBufferView mixed_ref_bytes = reference_bytes_mixer->Mix(
+ type_tag.value(), old_image, equivalence.src_offset + i,
+ new_image, equivalence.dst_offset + i);
+ for (int j = 0; j < num_bytes; ++j) {
+ int8_t diff =
+ mixed_ref_bytes[j] - old_image[equivalence.src_offset + i + j];
+ if (diff)
+ raw_delta_sink.PutNext({base_copy_offset + i + j, diff});
+ }
+ }
+ i += new_image_index.refs(type_tag).width();
+ DCHECK_LE(i, equivalence.length);
+ } else {
+ int8_t diff = new_image[equivalence.dst_offset + i] -
+ old_image[equivalence.src_offset + i];
+ if (diff)
+ raw_delta_sink.PutNext({base_copy_offset + i, diff});
+ ++i;
+ }
+ }
+ base_copy_offset += equivalence.length;
+ }
+ patch_writer->SetRawDeltaSink(std::move(raw_delta_sink));
+ return true;
+}
+
+bool GenerateReferencesDelta(const ReferenceSet& src_refs,
+ const ReferenceSet& dst_refs,
+ const TargetPool& projected_target_pool,
+ const OffsetMapper& offset_mapper,
+ const EquivalenceMap& equivalence_map,
+ ReferenceDeltaSink* reference_delta_sink) {
+ size_t ref_width = src_refs.width();
+ auto dst_ref = dst_refs.begin();
+
+ // For each equivalence, for each covered |dst_ref| and the matching
+ // |src_ref|, emit the delta between the respective target labels. Note: By
+ // construction, each reference location (with |ref_width|) lies either
+ // completely inside an equivalence or completely outside. We perform
+ // "straddle checks" throughout to verify this assertion.
+ for (const auto& candidate : equivalence_map) {
+ const Equivalence equiv = candidate.eq;
+ // Increment |dst_ref| until it catches up to |equiv|.
+ while (dst_ref != dst_refs.end() && dst_ref->location < equiv.dst_offset)
+ ++dst_ref;
+ if (dst_ref == dst_refs.end())
+ break;
+ if (dst_ref->location >= equiv.dst_end())
+ continue;
+ // Straddle check.
+ DCHECK_LE(dst_ref->location + ref_width, equiv.dst_end());
+
+ offset_t src_loc =
+ equiv.src_offset + (dst_ref->location - equiv.dst_offset);
+ auto src_ref = std::lower_bound(
+ src_refs.begin(), src_refs.end(), src_loc,
+ [](const Reference& a, offset_t b) { return a.location < b; });
+ for (; dst_ref != dst_refs.end() &&
+ dst_ref->location + ref_width <= equiv.dst_end();
+ ++dst_ref, ++src_ref) {
+ // Local offset of |src_ref| should match that of |dst_ref|.
+ DCHECK_EQ(src_ref->location - equiv.src_offset,
+ dst_ref->location - equiv.dst_offset);
+ offset_t old_offset = src_ref->target;
+ offset_t new_estimated_offset =
+ offset_mapper.ExtendedForwardProject(old_offset);
+ offset_t new_estimated_key =
+ projected_target_pool.KeyForNearestOffset(new_estimated_offset);
+ offset_t new_offset = dst_ref->target;
+ offset_t new_key = projected_target_pool.KeyForOffset(new_offset);
+
+ reference_delta_sink->PutNext(
+ static_cast<int32_t>(new_key - new_estimated_key));
+ }
+ if (dst_ref == dst_refs.end())
+ break; // Done.
+ // Straddle check.
+ DCHECK_GE(dst_ref->location, equiv.dst_end());
+ }
+ return true;
+}
+
+bool GenerateExtraTargets(const std::vector<offset_t>& extra_targets,
+ PoolTag pool_tag,
+ PatchElementWriter* patch_writer) {
+ TargetSink target_sink;
+ for (offset_t target : extra_targets)
+ target_sink.PutNext(target);
+ patch_writer->SetTargetSink(pool_tag, std::move(target_sink));
+ return true;
+}
+
+bool GenerateRawElement(const std::vector<offset_t>& old_sa,
+ ConstBufferView old_image,
+ ConstBufferView new_image,
+ PatchElementWriter* patch_writer) {
+ ImageIndex old_image_index(old_image);
+ ImageIndex new_image_index(new_image);
+
+ EquivalenceMap equivalences;
+ equivalences.Build(old_sa, EncodedView(old_image_index),
+ EncodedView(new_image_index), {},
+ kMinEquivalenceSimilarity);
+
+ patch_writer->SetReferenceDeltaSink({});
+
+ ReferenceBytesMixer no_op_bytes_mixer;
+ return GenerateEquivalencesAndExtraData(new_image, equivalences,
+ patch_writer) &&
+ GenerateRawDelta(old_image, new_image, equivalences, new_image_index,
+ &no_op_bytes_mixer, patch_writer);
+}
+
+bool GenerateExecutableElement(ExecutableType exe_type,
+ ConstBufferView old_image,
+ ConstBufferView new_image,
+ PatchElementWriter* patch_writer) {
+ // Initialize Disassemblers.
+ std::unique_ptr<Disassembler> old_disasm =
+ MakeDisassemblerOfType(old_image, exe_type);
+ std::unique_ptr<Disassembler> new_disasm =
+ MakeDisassemblerOfType(new_image, exe_type);
+ if (!old_disasm || !new_disasm) {
+ LOG(ERROR) << "Failed to create Disassembler.";
+ return false;
+ }
+ DCHECK_EQ(old_disasm->GetExeType(), new_disasm->GetExeType());
+
+ // Initialize ImageIndexes.
+ ImageIndex old_image_index(old_image);
+ ImageIndex new_image_index(new_image);
+ if (!old_image_index.Initialize(old_disasm.get()) ||
+ !new_image_index.Initialize(new_disasm.get())) {
+ LOG(ERROR) << "Failed to create ImageIndex: Overlapping references found?";
+ return false;
+ }
+ DCHECK_EQ(old_image_index.PoolCount(), new_image_index.PoolCount());
+
+ EquivalenceMap equivalences =
+ CreateEquivalenceMap(old_image_index, new_image_index,
+ new_disasm->num_equivalence_iterations());
+ OffsetMapper offset_mapper(equivalences,
+ base::checked_cast<offset_t>(old_image.size()),
+ base::checked_cast<offset_t>(new_image.size()));
+
+ ReferenceDeltaSink reference_delta_sink;
+ for (const auto& old_targets : old_image_index.target_pools()) {
+ PoolTag pool_tag = old_targets.first;
+ TargetPool projected_old_targets = old_targets.second;
+ projected_old_targets.FilterAndProject(offset_mapper);
+ std::vector<offset_t> extra_target =
+ FindExtraTargets(projected_old_targets, new_image_index.pool(pool_tag));
+ projected_old_targets.InsertTargets(extra_target);
+
+ if (!GenerateExtraTargets(extra_target, pool_tag, patch_writer))
+ return false;
+ for (TypeTag type_tag : old_targets.second.types()) {
+ if (!GenerateReferencesDelta(old_image_index.refs(type_tag),
+ new_image_index.refs(type_tag),
+ projected_old_targets, offset_mapper,
+ equivalences, &reference_delta_sink)) {
+ return false;
+ }
+ }
+ }
+ patch_writer->SetReferenceDeltaSink(std::move(reference_delta_sink));
+ std::unique_ptr<ReferenceBytesMixer> reference_bytes_mixer =
+ ReferenceBytesMixer::Create(*old_disasm, *new_disasm);
+ return GenerateEquivalencesAndExtraData(new_image, equivalences,
+ patch_writer) &&
+ GenerateRawDelta(old_image, new_image, equivalences, new_image_index,
+ reference_bytes_mixer.get(), patch_writer);
+}
+
+status::Code GenerateBufferCommon(ConstBufferView old_image,
+ ConstBufferView new_image,
+ std::unique_ptr<EnsembleMatcher> matcher,
+ EnsemblePatchWriter* patch_writer) {
+ if (!matcher->RunMatch(old_image, new_image)) {
+ LOG(INFO) << "RunMatch() failed, generating raw patch.";
+ return GenerateBufferRaw(old_image, new_image, patch_writer);
+ }
+
+ const std::vector<ElementMatch>& matches = matcher->matches();
+ LOG(INFO) << "Matching: Found " << matches.size()
+ << " nontrivial matches and " << matcher->num_identical()
+ << " identical matches.";
+ size_t num_elements = matches.size();
+ if (num_elements == 0) {
+ LOG(INFO) << "No nontrival matches, generating raw patch.";
+ return GenerateBufferRaw(old_image, new_image, patch_writer);
+ }
+
+ // "Gaps" are |new_image| bytes not covered by new_elements in |matches|.
+ // These are treated as raw data, and patched against the entire |old_image|.
+
+ // |patch_element_map| (keyed by "new" offsets) stores PatchElementWriter
+ // results so elements and "gap" results can be computed separately (to reduce
+ // peak memory usage), and later, properly serialized to |patch_writer|
+ // ordered by "new" offset.
+ std::map<offset_t, PatchElementWriter> patch_element_map;
+
+ // Variables to track element patching successes.
+ std::vector<BufferRegion> covered_new_regions;
+ size_t covered_new_bytes = 0;
+
+ // Process elements first, since non-fatal failures may turn some into gaps.
+ for (const ElementMatch& match : matches) {
+ BufferRegion new_region = match.new_element.region();
+ LOG(INFO) << "--- Match [" << new_region.lo() << "," << new_region.hi()
+ << ")";
+
+ auto it_and_success = patch_element_map.emplace(
+ base::checked_cast<offset_t>(new_region.lo()), match);
+ DCHECK(it_and_success.second);
+ PatchElementWriter& patch_element = it_and_success.first->second;
+
+ ConstBufferView old_sub_image = old_image[match.old_element.region()];
+ ConstBufferView new_sub_image = new_image[new_region];
+ if (GenerateExecutableElement(match.exe_type(), old_sub_image,
+ new_sub_image, &patch_element)) {
+ covered_new_regions.push_back(new_region);
+ covered_new_bytes += new_region.size;
+ } else {
+ LOG(INFO) << "Fall back to raw patching.";
+ patch_element_map.erase(it_and_success.first);
+ }
+ }
+
+ if (covered_new_bytes < new_image.size()) {
+ // Process all "gaps", which are patched against the entire "old" image. To
+ // compute equivalence maps, "gaps" share a common suffix array
+ // |old_sa_raw|, whose lifetime is kept separated from elements' suffix
+ // arrays to reduce peak memory.
+ Element entire_old_element(old_image.local_region(), kExeTypeNoOp);
+ ImageIndex old_image_index(old_image);
+ EncodedView old_view_raw(old_image_index);
+ std::vector<offset_t> old_sa_raw =
+ MakeSuffixArray<InducedSuffixSort>(old_view_raw, size_t(256));
+
+ offset_t gap_lo = 0;
+ // Add sentinel that points to end of "new" file, to simplify gap iteration.
+ covered_new_regions.emplace_back(BufferRegion{new_image.size(), 0});
+
+ for (const BufferRegion& covered : covered_new_regions) {
+ offset_t gap_hi = base::checked_cast<offset_t>(covered.lo());
+ DCHECK_GE(gap_hi, gap_lo);
+ offset_t gap_size = gap_hi - gap_lo;
+ if (gap_size > 0) {
+ LOG(INFO) << "--- Gap [" << gap_lo << "," << gap_hi << ")";
+
+ ElementMatch gap_match{{entire_old_element, kExeTypeNoOp},
+ {{gap_lo, gap_size}, kExeTypeNoOp}};
+ auto it_and_success = patch_element_map.emplace(gap_lo, gap_match);
+ DCHECK(it_and_success.second);
+ PatchElementWriter& patch_element = it_and_success.first->second;
+
+ ConstBufferView new_sub_image = new_image[{gap_lo, gap_size}];
+ if (!GenerateRawElement(old_sa_raw, old_image, new_sub_image,
+ &patch_element)) {
+ return status::kStatusFatal;
+ }
+ }
+ gap_lo = base::checked_cast<offset_t>(covered.hi());
+ }
+ }
+
+ // Write all PatchElementWriter sorted by "new" offset.
+ for (auto& new_lo_and_patch_element : patch_element_map)
+ patch_writer->AddElement(std::move(new_lo_and_patch_element.second));
+
+ return status::kStatusSuccess;
+}
+
+/******** Exported Functions ********/
+
+status::Code GenerateBuffer(ConstBufferView old_image,
+ ConstBufferView new_image,
+ EnsemblePatchWriter* patch_writer) {
+ return GenerateBufferCommon(
+ old_image, new_image, std::make_unique<HeuristicEnsembleMatcher>(nullptr),
+ patch_writer);
+}
+
+status::Code GenerateBufferImposed(ConstBufferView old_image,
+ ConstBufferView new_image,
+ std::string imposed_matches,
+ EnsemblePatchWriter* patch_writer) {
+ if (imposed_matches.empty())
+ return GenerateBuffer(old_image, new_image, patch_writer);
+
+ return GenerateBufferCommon(
+ old_image, new_image,
+ std::make_unique<ImposedEnsembleMatcher>(imposed_matches), patch_writer);
+}
+
+status::Code GenerateBufferRaw(ConstBufferView old_image,
+ ConstBufferView new_image,
+ EnsemblePatchWriter* patch_writer) {
+ ImageIndex old_image_index(old_image);
+ EncodedView old_view(old_image_index);
+ std::vector<offset_t> old_sa =
+ MakeSuffixArray<InducedSuffixSort>(old_view, old_view.Cardinality());
+
+ PatchElementWriter patch_element(
+ {Element(old_image.local_region()), Element(new_image.local_region())});
+ if (!GenerateRawElement(old_sa, old_image, new_image, &patch_element))
+ return status::kStatusFatal;
+ patch_writer->AddElement(std::move(patch_element));
+ return status::kStatusSuccess;
+}
+
+} // namespace zucchini
diff --git a/zucchini_gen.h b/zucchini_gen.h
new file mode 100644
index 0000000..ac28263
--- /dev/null
+++ b/zucchini_gen.h
@@ -0,0 +1,85 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_ZUCCHINI_GEN_H_
+#define COMPONENTS_ZUCCHINI_ZUCCHINI_GEN_H_
+
+#include <vector>
+
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/image_utils.h"
+#include "components/zucchini/zucchini.h"
+
+namespace zucchini {
+
+class EquivalenceMap;
+class OffsetMapper;
+class ImageIndex;
+class PatchElementWriter;
+class ReferenceBytesMixer;
+class ReferenceDeltaSink;
+class ReferenceSet;
+class TargetPool;
+
+// Extract all targets in |new_targets| with no associated target in
+// |projected_old_targets| and returns these targets in a new vector.
+std::vector<offset_t> FindExtraTargets(const TargetPool& projected_old_targets,
+ const TargetPool& new_targets);
+
+// Creates an EquivalenceMap from "old" image to "new" image and returns the
+// result. The params |*_image_index|:
+// - Provide "old" and "new" raw image data and references.
+// - Mediate Label matching, which links references between "old" and "new", and
+// guides EquivalenceMap construction.
+EquivalenceMap CreateEquivalenceMap(const ImageIndex& old_image_index,
+ const ImageIndex& new_image_index);
+
+// Writes equivalences from |equivalence_map|, and extra data from |new_image|
+// found in gaps between equivalences to |patch_writer|.
+bool GenerateEquivalencesAndExtraData(ConstBufferView new_image,
+ const EquivalenceMap& equivalence_map,
+ PatchElementWriter* patch_writer);
+
+// Writes raw delta between |old_image| and |new_image| matched by
+// |equivalence_map| to |patch_writer|, using |new_image_index| to ignore
+// reference bytes.
+bool GenerateRawDelta(ConstBufferView old_image,
+ ConstBufferView new_image,
+ const EquivalenceMap& equivalence_map,
+ const ImageIndex& new_image_index,
+ ReferenceBytesMixer* reference_bytes_mixer,
+ PatchElementWriter* patch_writer);
+
+// Writes reference delta between references from |old_refs| and from
+// |new_refs| to |patch_writer|. |projected_target_pool| contains projected
+// targets from old to new image for references pool associated with |new_refs|.
+bool GenerateReferencesDelta(const ReferenceSet& src_refs,
+ const ReferenceSet& dst_refs,
+ const TargetPool& projected_target_pool,
+ const OffsetMapper& offset_mapper,
+ const EquivalenceMap& equivalence_map,
+ ReferenceDeltaSink* reference_delta_sink);
+
+// Writes |extra_targets| associated with |pool_tag| to |patch_writer|.
+bool GenerateExtraTargets(const std::vector<offset_t>& extra_targets,
+ PoolTag pool_tag,
+ PatchElementWriter* patch_writer);
+
+// Generates raw patch element data between |old_image| and |new_image|, and
+// writes them to |patch_writer|. |old_sa| is the suffix array for |old_image|.
+bool GenerateRawElement(const std::vector<offset_t>& old_sa,
+ ConstBufferView old_image,
+ ConstBufferView new_image,
+ PatchElementWriter* patch_writer);
+
+// Generates patch element of type |exe_type| from |old_image| to |new_image|,
+// and writes it to |patch_writer|.
+bool GenerateExecutableElement(ExecutableType exe_type,
+ ConstBufferView old_image,
+ ConstBufferView new_image,
+ PatchElementWriter* patch_writer);
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_ZUCCHINI_GEN_H_
diff --git a/zucchini_gen_unittest.cc b/zucchini_gen_unittest.cc
new file mode 100644
index 0000000..3a6d2cb
--- /dev/null
+++ b/zucchini_gen_unittest.cc
@@ -0,0 +1,180 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/zucchini_gen.h"
+
+#include <stdint.h>
+
+#include <utility>
+#include <vector>
+
+#include "components/zucchini/equivalence_map.h"
+#include "components/zucchini/image_index.h"
+#include "components/zucchini/image_utils.h"
+#include "components/zucchini/test_disassembler.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+namespace {
+
+using OffsetVector = std::vector<offset_t>;
+
+// In normal usage, 0.0 is an unrealistic similarity value for an
+// EquivalenceCandiate. Since similarity doesn't affect results for various unit
+// tests in this file, we use this dummy value for simplicity.
+constexpr double kDummySim = 0.0;
+
+// Helper function wrapping GenerateReferencesDelta().
+std::vector<int32_t> GenerateReferencesDeltaTest(
+ std::vector<Reference>&& old_references,
+ std::vector<Reference>&& new_references,
+ std::vector<offset_t>&& exp_old_targets,
+ std::vector<offset_t>&& exp_projected_old_targets,
+ EquivalenceMap&& equivalence_map) {
+ // OffsetMapper needs image sizes for forward-projection overflow check. These
+ // are tested elsewhere, so just use arbitrary large value.
+ constexpr offset_t kOldImageSize = 1000000;
+ constexpr offset_t kNewImageSize = 1001000;
+
+ ReferenceDeltaSink reference_delta_sink;
+
+ TargetPool old_targets;
+ old_targets.InsertTargets(old_references);
+ ReferenceSet old_refs({1, TypeTag(0), PoolTag(0)}, old_targets);
+ old_refs.InitReferences(old_references);
+ EXPECT_EQ(exp_old_targets, old_targets.targets());
+
+ TargetPool new_targets;
+ new_targets.InsertTargets(new_references);
+ ReferenceSet new_refs({1, TypeTag(0), PoolTag(0)}, new_targets);
+ new_refs.InitReferences(new_references);
+
+ OffsetMapper offset_mapper(equivalence_map, kOldImageSize, kNewImageSize);
+ TargetPool projected_old_targets = old_targets;
+ projected_old_targets.FilterAndProject(offset_mapper);
+
+ std::vector<offset_t> extra_target =
+ FindExtraTargets(projected_old_targets, new_targets);
+ projected_old_targets.InsertTargets(extra_target);
+ EXPECT_EQ(exp_projected_old_targets, projected_old_targets.targets());
+
+ GenerateReferencesDelta(old_refs, new_refs, projected_old_targets,
+ offset_mapper, equivalence_map,
+ &reference_delta_sink);
+
+ // Serialize |reference_delta_sink| to patch format, and read it back as
+ // std::vector<int32_t>.
+ std::vector<uint8_t> buffer(reference_delta_sink.SerializedSize());
+ BufferSink sink(buffer.data(), buffer.size());
+ reference_delta_sink.SerializeInto(&sink);
+
+ BufferSource source(buffer.data(), buffer.size());
+ ReferenceDeltaSource reference_delta_source;
+ EXPECT_TRUE(reference_delta_source.Initialize(&source));
+ std::vector<int32_t> delta_vec;
+ for (auto delta = reference_delta_source.GetNext(); delta.has_value();
+ delta = reference_delta_source.GetNext()) {
+ delta_vec.push_back(*delta);
+ }
+ EXPECT_TRUE(reference_delta_source.Done());
+ return delta_vec;
+}
+
+} // namespace
+
+TEST(ZucchiniGenTest, FindExtraTargets) {
+ EXPECT_EQ(OffsetVector(), FindExtraTargets({}, {}));
+ EXPECT_EQ(OffsetVector(), FindExtraTargets(TargetPool({3}), {}));
+ EXPECT_EQ(OffsetVector(), FindExtraTargets(TargetPool({3}), TargetPool({3})));
+ EXPECT_EQ(OffsetVector({4}),
+ FindExtraTargets(TargetPool({3}), TargetPool({4})));
+ EXPECT_EQ(OffsetVector({4}),
+ FindExtraTargets(TargetPool({3}), TargetPool({3, 4})));
+ EXPECT_EQ(OffsetVector({4}),
+ FindExtraTargets(TargetPool({2, 3}), TargetPool({3, 4})));
+ EXPECT_EQ(OffsetVector({3, 5}),
+ FindExtraTargets(TargetPool({2, 4}), TargetPool({3, 5})));
+}
+
+TEST(ZucchiniGenTest, GenerateReferencesDelta) {
+ // No equivalences.
+ EXPECT_EQ(std::vector<int32_t>(),
+ GenerateReferencesDeltaTest({}, {}, {}, {}, EquivalenceMap()));
+ EXPECT_EQ(std::vector<int32_t>(),
+ GenerateReferencesDeltaTest({{10, 0}}, {{20, 0}}, {0}, {0},
+ EquivalenceMap()));
+
+ // Simple cases with one equivalence.
+ EXPECT_EQ(
+ std::vector<int32_t>({0}), // {0 - 0}.
+ GenerateReferencesDeltaTest(
+ {{10, 3}}, {{20, 3}}, {3}, {3},
+ EquivalenceMap({{{3, 3, 1}, kDummySim}, {{10, 20, 4}, kDummySim}})));
+ EXPECT_EQ(
+ std::vector<int32_t>({-1}), // {0 - 1}.
+ GenerateReferencesDeltaTest(
+ {{10, 3}}, {{20, 3}}, {3}, {3, 4},
+ EquivalenceMap({{{3, 4, 1}, kDummySim}, {{10, 20, 4}, kDummySim}})));
+ EXPECT_EQ(
+ std::vector<int32_t>({1}), // {1 - 0}.
+ GenerateReferencesDeltaTest(
+ {{10, 3}}, {{20, 3}}, {3}, {2, 3},
+ EquivalenceMap({{{3, 2, 1}, kDummySim}, {{10, 20, 4}, kDummySim}})));
+ EXPECT_EQ(std::vector<int32_t>({1, -1}), // {1 - 0, 0 - 1}.
+ GenerateReferencesDeltaTest(
+ {{10, 3}, {11, 4}}, {{20, 3}, {21, 4}}, {3, 4}, {2, 3, 4, 5},
+ EquivalenceMap({{{3, 2, 1}, kDummySim},
+ {{4, 5, 1}, kDummySim},
+ {{10, 20, 4}, kDummySim}})));
+
+ EXPECT_EQ(
+ std::vector<int32_t>({0, 0}), // {1 - 1, 2 - 2}.
+ GenerateReferencesDeltaTest(
+ {{10, 3}, {11, 4}, {12, 5}, {13, 6}},
+ {{20, 3}, {21, 4}, {22, 5}, {23, 6}}, {3, 4, 5, 6}, {3, 4, 5, 6},
+ EquivalenceMap({{{3, 3, 4}, kDummySim}, {{11, 21, 2}, kDummySim}})));
+
+ // Multiple equivalences.
+ EXPECT_EQ(std::vector<int32_t>({-1, 1}), // {0 - 1, 1 - 0}.
+ GenerateReferencesDeltaTest(
+ {{10, 0}, {12, 1}}, {{10, 0}, {12, 1}}, {0, 1}, {0, 1},
+ EquivalenceMap({{{0, 0, 2}, kDummySim},
+ {{12, 10, 2}, kDummySim},
+ {{10, 12, 2}, kDummySim}})));
+ EXPECT_EQ(
+ std::vector<int32_t>({0, 0}), // {0 - 0, 1 - 1}.
+ GenerateReferencesDeltaTest(
+ {{0, 0}, {2, 2}}, {{0, 0}, {2, 2}}, {0, 2}, {0, 2},
+ EquivalenceMap({{{2, 0, 2}, kDummySim}, {{0, 2, 2}, kDummySim}})));
+
+ EXPECT_EQ(std::vector<int32_t>({-2, 2}), // {0 - 2, 2 - 0}.
+ GenerateReferencesDeltaTest(
+ {{10, 0}, {12, 1}, {14, 2}}, {{10, 0}, {12, 1}, {14, 2}},
+ {0, 1, 2}, {0, 1, 2},
+ EquivalenceMap({{{0, 0, 3}, kDummySim},
+ {{14, 10, 2}, kDummySim},
+ {{10, 14, 2}, kDummySim}})));
+
+ EXPECT_EQ(std::vector<int32_t>({-2, 2}), // {0 - 2, 2 - 0}.
+ GenerateReferencesDeltaTest(
+ {{11, 0}, {14, 1}, {17, 2}}, {{11, 0}, {14, 1}, {17, 2}},
+ {0, 1, 2}, {0, 1, 2},
+ EquivalenceMap({{{0, 0, 3}, kDummySim},
+ {{16, 10, 3}, kDummySim},
+ {{10, 16, 3}, kDummySim}})));
+
+ EXPECT_EQ(
+ std::vector<int32_t>({-2, 2}), // {0 - 2, 2 - 0}.
+ GenerateReferencesDeltaTest({{10, 0}, {14, 2}, {16, 1}},
+ {{10, 0}, {14, 2}}, {0, 1, 2}, {0, 1, 2},
+ EquivalenceMap({{{0, 0, 3}, kDummySim},
+ {{14, 10, 2}, kDummySim},
+ {{12, 12, 2}, kDummySim},
+ {{10, 14, 2}, kDummySim}})));
+}
+
+// TODO(huangs): Add more tests.
+
+} // namespace zucchini
diff --git a/zucchini_integration.cc b/zucchini_integration.cc
new file mode 100644
index 0000000..ff7e792
--- /dev/null
+++ b/zucchini_integration.cc
@@ -0,0 +1,209 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/zucchini_integration.h"
+
+#include <utility>
+
+#include "base/logging.h"
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/mapped_file.h"
+#include "components/zucchini/patch_reader.h"
+
+namespace zucchini {
+
+namespace {
+
+struct FileNames {
+ FileNames() : is_dummy(true) {
+ // Use fake names.
+ old_name = old_name.AppendASCII("old_name");
+ new_name = new_name.AppendASCII("new_name");
+ patch_name = patch_name.AppendASCII("patch_name");
+ }
+
+ FileNames(const base::FilePath& old_name,
+ const base::FilePath& new_name,
+ const base::FilePath& patch_name)
+ : old_name(old_name),
+ new_name(new_name),
+ patch_name(patch_name),
+ is_dummy(false) {}
+
+ base::FilePath old_name;
+ base::FilePath new_name;
+ base::FilePath patch_name;
+
+ // A flag to decide whether the filenames are only for error output.
+ const bool is_dummy;
+};
+
+status::Code GenerateCommon(base::File old_file,
+ base::File new_file,
+ base::File patch_file,
+ const FileNames& names,
+ bool force_keep,
+ bool is_raw,
+ std::string imposed_matches) {
+ MappedFileReader mapped_old(std::move(old_file));
+ if (mapped_old.HasError()) {
+ LOG(ERROR) << "Error with file " << names.old_name.value() << ": "
+ << mapped_old.error();
+ return status::kStatusFileReadError;
+ }
+
+ MappedFileReader mapped_new(std::move(new_file));
+ if (mapped_new.HasError()) {
+ LOG(ERROR) << "Error with file " << names.new_name.value() << ": "
+ << mapped_new.error();
+ return status::kStatusFileReadError;
+ }
+
+ status::Code result = status::kStatusSuccess;
+ EnsemblePatchWriter patch_writer(mapped_old.region(), mapped_new.region());
+ if (is_raw) {
+ result = GenerateBufferRaw(mapped_old.region(), mapped_new.region(),
+ &patch_writer);
+ } else {
+ result = GenerateBufferImposed(mapped_old.region(), mapped_new.region(),
+ std::move(imposed_matches), &patch_writer);
+ }
+ if (result != status::kStatusSuccess) {
+ LOG(ERROR) << "Fatal error encountered when generating patch.";
+ return result;
+ }
+
+ // By default, delete patch on destruction, to avoid having lingering files in
+ // case of a failure. On Windows deletion can be done by the OS.
+ MappedFileWriter mapped_patch(names.patch_name, std::move(patch_file),
+ patch_writer.SerializedSize());
+ if (mapped_patch.HasError()) {
+ LOG(ERROR) << "Error with file " << names.patch_name.value() << ": "
+ << mapped_patch.error();
+ return status::kStatusFileWriteError;
+ }
+ if (force_keep)
+ mapped_patch.Keep();
+
+ if (!patch_writer.SerializeInto(mapped_patch.region()))
+ return status::kStatusPatchWriteError;
+
+ // Successfully created patch. Explicitly request file to be kept.
+ if (!mapped_patch.Keep())
+ return status::kStatusFileWriteError;
+ return status::kStatusSuccess;
+}
+
+status::Code ApplyCommon(base::File old_file,
+ base::File patch_file,
+ base::File new_file,
+ const FileNames& names,
+ bool force_keep) {
+ MappedFileReader mapped_patch(std::move(patch_file));
+ if (mapped_patch.HasError()) {
+ LOG(ERROR) << "Error with file " << names.patch_name.value() << ": "
+ << mapped_patch.error();
+ return status::kStatusFileReadError;
+ }
+
+ auto patch_reader = EnsemblePatchReader::Create(mapped_patch.region());
+ if (!patch_reader.has_value()) {
+ LOG(ERROR) << "Error reading patch header.";
+ return status::kStatusPatchReadError;
+ }
+
+ MappedFileReader mapped_old(std::move(old_file));
+ if (mapped_old.HasError()) {
+ LOG(ERROR) << "Error with file " << names.old_name.value() << ": "
+ << mapped_old.error();
+ return status::kStatusFileReadError;
+ }
+
+ PatchHeader header = patch_reader->header();
+ // By default, delete output on destruction, to avoid having lingering files
+ // in case of a failure. On Windows deletion can be done by the OS.
+ MappedFileWriter mapped_new(names.new_name, std::move(new_file),
+ header.new_size);
+ if (mapped_new.HasError()) {
+ LOG(ERROR) << "Error with file " << names.new_name.value() << ": "
+ << mapped_new.error();
+ return status::kStatusFileWriteError;
+ }
+ if (force_keep)
+ mapped_new.Keep();
+
+ status::Code result =
+ ApplyBuffer(mapped_old.region(), *patch_reader, mapped_new.region());
+ if (result != status::kStatusSuccess) {
+ LOG(ERROR) << "Fatal error encountered while applying patch.";
+ return result;
+ }
+
+ // Successfully patch |mapped_new|. Explicitly request file to be kept.
+ if (!mapped_new.Keep())
+ return status::kStatusFileWriteError;
+ return status::kStatusSuccess;
+}
+
+} // namespace
+
+status::Code Generate(base::File old_file,
+ base::File new_file,
+ base::File patch_file,
+ bool force_keep,
+ bool is_raw,
+ std::string imposed_matches) {
+ const FileNames file_names;
+ return GenerateCommon(std::move(old_file), std::move(new_file),
+ std::move(patch_file), file_names, force_keep, is_raw,
+ std::move(imposed_matches));
+}
+
+status::Code Generate(const base::FilePath& old_path,
+ const base::FilePath& new_path,
+ const base::FilePath& patch_path,
+ bool force_keep,
+ bool is_raw,
+ std::string imposed_matches) {
+ using base::File;
+ File old_file(old_path, File::FLAG_OPEN | File::FLAG_READ |
+ base::File::FLAG_SHARE_DELETE);
+ File new_file(new_path, File::FLAG_OPEN | File::FLAG_READ |
+ base::File::FLAG_SHARE_DELETE);
+ File patch_file(patch_path, File::FLAG_CREATE_ALWAYS | File::FLAG_READ |
+ File::FLAG_WRITE | File::FLAG_SHARE_DELETE |
+ File::FLAG_CAN_DELETE_ON_CLOSE);
+ const FileNames file_names(old_path, new_path, patch_path);
+ return GenerateCommon(std::move(old_file), std::move(new_file),
+ std::move(patch_file), file_names, force_keep, is_raw,
+ std::move(imposed_matches));
+}
+
+status::Code Apply(base::File old_file,
+ base::File patch_file,
+ base::File new_file,
+ bool force_keep) {
+ const FileNames file_names;
+ return ApplyCommon(std::move(old_file), std::move(patch_file),
+ std::move(new_file), file_names, force_keep);
+}
+
+status::Code Apply(const base::FilePath& old_path,
+ const base::FilePath& patch_path,
+ const base::FilePath& new_path,
+ bool force_keep) {
+ using base::File;
+ File old_file(old_path, File::FLAG_OPEN | File::FLAG_READ |
+ base::File::FLAG_SHARE_DELETE);
+ File patch_file(patch_path, File::FLAG_OPEN | File::FLAG_READ |
+ base::File::FLAG_SHARE_DELETE);
+ File new_file(new_path, File::FLAG_CREATE_ALWAYS | File::FLAG_READ |
+ File::FLAG_WRITE | File::FLAG_SHARE_DELETE |
+ File::FLAG_CAN_DELETE_ON_CLOSE);
+ const FileNames file_names(old_path, new_path, patch_path);
+ return ApplyCommon(std::move(old_file), std::move(patch_file),
+ std::move(new_file), file_names, force_keep);
+}
+
+} // namespace zucchini
diff --git a/zucchini_integration.h b/zucchini_integration.h
new file mode 100644
index 0000000..2ae6091
--- /dev/null
+++ b/zucchini_integration.h
@@ -0,0 +1,68 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_ZUCCHINI_INTEGRATION_H_
+#define COMPONENTS_ZUCCHINI_ZUCCHINI_INTEGRATION_H_
+
+#include <string>
+
+#include "base/files/file.h"
+#include "base/files/file_path.h"
+#include "components/zucchini/zucchini.h"
+
+// Zucchini integration interface to wrap core Zucchini library with file I/O.
+
+namespace zucchini {
+
+// Generates a patch to transform |old_file| to |new_file|, and writes the
+// result to |patch_file|. Since this uses memory mapped files, crashes are
+// expected in case of I/O errors. On Windows, |patch_file| is kept iff returned
+// code is kStatusSuccess or if |force_keep == true|, and is deleted otherwise.
+// For UNIX systems the caller needs to do cleanup since it has ownership of the
+// base::File params, and Zucchini has no knowledge of which base::FilePath to
+// delete. If |is_raw == true| then uses Raw Zucchini. If |imposed_matches| is
+// non-empty, then overrides default element detection and matching heuristics
+// with custom element matching encoded in |imposed_matches|, which should be
+// formatted as:
+// "#+#=#+#,#+#=#+#,..." (e.g., "1+2=3+4", "1+2=3+4,5+6=7+8"),
+// where "#+#=#+#" encodes a match as 4 unsigned integers:
+// [offset in "old", size in "old", offset in "new", size in "new"].
+status::Code Generate(base::File old_file,
+ base::File new_file,
+ base::File patch_file,
+ bool force_keep = false,
+ bool is_raw = false,
+ std::string imposed_matches = "");
+
+// Alternative Generate() interface that takes base::FilePath as arguments.
+// Performs proper cleanup in Windows and UNIX if failure occurs.
+status::Code Generate(const base::FilePath& old_path,
+ const base::FilePath& new_path,
+ const base::FilePath& patch_path,
+ bool force_keep = false,
+ bool is_raw = false,
+ std::string imposed_matches = "");
+
+// Applies the patch in |patch_file| to |old_file|, and writes the result to
+// |new_file|. Since this uses memory mapped files, crashes are expected in case
+// of I/O errors. On Windows, |new_file| is kept iff returned code is
+// kStatusSuccess or if |force_keep == true|, and is deleted otherwise. For UNIX
+// systems the caller needs to do cleanup since it has ownership of the
+// base::File params, and Zucchini has no knowledge of which base::FilePath to
+// delete.
+status::Code Apply(base::File old_file,
+ base::File patch_file,
+ base::File new_file,
+ bool force_keep = false);
+
+// Alternative Apply() interface that takes base::FilePath as arguments.
+// Performs proper cleanup in Windows and UNIX if failure occurs.
+status::Code Apply(const base::FilePath& old_path,
+ const base::FilePath& patch_path,
+ const base::FilePath& new_path,
+ bool force_keep = false);
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_ZUCCHINI_INTEGRATION_H_
diff --git a/zucchini_main.cc b/zucchini_main.cc
new file mode 100644
index 0000000..9b5e505
--- /dev/null
+++ b/zucchini_main.cc
@@ -0,0 +1,55 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <iostream>
+
+#include "base/command_line.h"
+#include "base/logging.h"
+#include "base/process/memory.h"
+#include "build/build_config.h"
+#include "components/zucchini/main_utils.h"
+
+#if defined(OS_WIN)
+#include "base/win/process_startup_helper.h"
+#endif // defined(OS_WIN)
+
+namespace {
+
+void InitLogging() {
+ logging::LoggingSettings settings;
+ settings.logging_dest =
+ logging::LOG_TO_SYSTEM_DEBUG_LOG | logging::LOG_TO_STDERR;
+ settings.log_file_path = nullptr;
+ settings.lock_log = logging::DONT_LOCK_LOG_FILE;
+ settings.delete_old = logging::APPEND_TO_OLD_LOG_FILE;
+ bool logging_res = logging::InitLogging(settings);
+ CHECK(logging_res);
+}
+
+void InitErrorHandling(const base::CommandLine& command_line) {
+ base::EnableTerminationOnHeapCorruption();
+ base::EnableTerminationOnOutOfMemory();
+#if defined(OS_WIN)
+ base::win::RegisterInvalidParamHandler();
+ base::win::SetupCRT(command_line);
+#endif // defined(OS_WIN)
+}
+
+} // namespace
+
+int main(int argc, const char* argv[]) {
+ // Initialize infrastructure from base.
+ base::CommandLine::Init(argc, argv);
+ const base::CommandLine& command_line =
+ *base::CommandLine::ForCurrentProcess();
+ InitLogging();
+ InitErrorHandling(command_line);
+ zucchini::status::Code status =
+ RunZucchiniCommand(command_line, std::cout, std::cerr);
+ if (!(status == zucchini::status::kStatusSuccess ||
+ status == zucchini::status::kStatusInvalidParam)) {
+ std::cerr << "Failed with code " << static_cast<int>(status) << std::endl;
+ }
+ return static_cast<int>(status);
+}
diff --git a/zucchini_tools.cc b/zucchini_tools.cc
new file mode 100644
index 0000000..e8bf734
--- /dev/null
+++ b/zucchini_tools.cc
@@ -0,0 +1,140 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/zucchini_tools.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <memory>
+#include <ostream>
+#include <utility>
+
+#include "base/bind.h"
+#include "base/check_op.h"
+#include "base/strings/stringprintf.h"
+#include "components/zucchini/disassembler.h"
+#include "components/zucchini/element_detection.h"
+#include "components/zucchini/ensemble_matcher.h"
+#include "components/zucchini/heuristic_ensemble_matcher.h"
+#include "components/zucchini/imposed_ensemble_matcher.h"
+#include "components/zucchini/io_utils.h"
+
+namespace zucchini {
+
+status::Code ReadReferences(ConstBufferView image,
+ bool do_dump,
+ std::ostream& out) {
+ std::unique_ptr<Disassembler> disasm = MakeDisassemblerWithoutFallback(image);
+ if (!disasm) {
+ out << "Input file not recognized as executable." << std::endl;
+ return status::kStatusInvalidOldImage;
+ }
+
+ std::vector<offset_t> targets;
+ for (const auto& group : disasm->MakeReferenceGroups()) {
+ targets.clear();
+ auto refs = group.GetReader(disasm.get());
+ for (auto ref = refs->GetNext(); ref.has_value(); ref = refs->GetNext())
+ targets.push_back(ref->target);
+
+ size_t num_locations = targets.size();
+ std::sort(targets.begin(), targets.end());
+ targets.erase(std::unique(targets.begin(), targets.end()), targets.end());
+ size_t num_targets = targets.size();
+
+ out << "Type " << int(group.type_tag().value())
+ << ": Pool=" << static_cast<uint32_t>(group.pool_tag().value())
+ << ", width=" << group.width() << ", #locations=" << num_locations
+ << ", #targets=" << num_targets;
+ if (num_targets > 0) {
+ double ratio = static_cast<double>(num_locations) / num_targets;
+ out << " (ratio=" << base::StringPrintf("%.4f", ratio) << ")";
+ }
+ out << std::endl;
+
+ if (do_dump) {
+ refs = group.GetReader(disasm.get());
+
+ for (auto ref = refs->GetNext(); ref; ref = refs->GetNext()) {
+ out << " " << AsHex<8>(ref->location) << " " << AsHex<8>(ref->target)
+ << std::endl;
+ }
+ }
+ }
+
+ return status::kStatusSuccess;
+}
+
+status::Code DetectAll(ConstBufferView image,
+ std::ostream& out,
+ std::vector<ConstBufferView>* sub_image_list) {
+ DCHECK_NE(sub_image_list, nullptr);
+ sub_image_list->clear();
+
+ const size_t size = image.size();
+ size_t last_out_pos = 0;
+ size_t total_bytes_found = 0;
+
+ auto print_range = [&out](size_t pos, size_t size, const std::string& msg) {
+ out << "-- " << AsHex<8, size_t>(pos) << " +" << AsHex<8, size_t>(size)
+ << ": " << msg << std::endl;
+ };
+
+ ElementFinder finder(image,
+ base::BindRepeating(DetectElementFromDisassembler));
+ for (auto element = finder.GetNext(); element.has_value();
+ element = finder.GetNext()) {
+ ConstBufferView sub_image = image[element->region()];
+ sub_image_list->push_back(sub_image);
+ size_t pos = sub_image.begin() - image.begin();
+ size_t prog_size = sub_image.size();
+ if (last_out_pos < pos)
+ print_range(last_out_pos, pos - last_out_pos, "?");
+ auto disasm = MakeDisassemblerOfType(sub_image, element->exe_type);
+ print_range(pos, prog_size, disasm->GetExeTypeString());
+ total_bytes_found += prog_size;
+ last_out_pos = pos + prog_size;
+ }
+ if (last_out_pos < size)
+ print_range(last_out_pos, size - last_out_pos, "?");
+ out << std::endl;
+
+ // Print summary, using decimal instead of hexadecimal.
+ out << "Detected " << total_bytes_found << "/" << size << " bytes => ";
+ double percent = total_bytes_found * 100.0 / size;
+ out << base::StringPrintf("%.2f", percent) << "%." << std::endl;
+
+ return status::kStatusSuccess;
+}
+
+status::Code MatchAll(ConstBufferView old_image,
+ ConstBufferView new_image,
+ std::string imposed_matches,
+ std::ostream& out) {
+ std::unique_ptr<EnsembleMatcher> matcher;
+ if (imposed_matches.empty()) {
+ matcher = std::make_unique<HeuristicEnsembleMatcher>(&out);
+ } else {
+ matcher =
+ std::make_unique<ImposedEnsembleMatcher>(std::move(imposed_matches));
+ }
+ if (!matcher->RunMatch(old_image, new_image)) {
+ out << "RunMatch() failed.";
+ return status::kStatusFatal;
+ }
+ out << "Found " << matcher->matches().size() << " nontrivial matches and "
+ << matcher->num_identical() << " identical matches." << std::endl
+ << "To impose the same matches by command line, use: " << std::endl
+ << " -impose=";
+ PrefixSep sep(",");
+ for (const ElementMatch& match : matcher->matches())
+ out << sep << match.ToString();
+ out << std::endl;
+
+ return status::kStatusSuccess;
+}
+
+} // namespace zucchini
diff --git a/zucchini_tools.h b/zucchini_tools.h
new file mode 100644
index 0000000..bf9a95c
--- /dev/null
+++ b/zucchini_tools.h
@@ -0,0 +1,45 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_ZUCCHINI_ZUCCHINI_TOOLS_H_
+#define COMPONENTS_ZUCCHINI_ZUCCHINI_TOOLS_H_
+
+#include <iosfwd>
+#include <string>
+#include <vector>
+
+#include "components/zucchini/buffer_view.h"
+#include "components/zucchini/zucchini.h"
+
+namespace zucchini {
+
+// The functions below are called to print diagnosis information, so outputs are
+// printed using std::ostream instead of LOG().
+
+// Prints stats on references found in |image|. If |do_dump| is true, then
+// prints all references (locations and targets).
+status::Code ReadReferences(ConstBufferView image,
+ bool do_dump,
+ std::ostream& out);
+
+// Prints regions and types of all detected executables in |image|. Appends
+// detected subregions to |sub_image_list|.
+status::Code DetectAll(ConstBufferView image,
+ std::ostream& out,
+ std::vector<ConstBufferView>* sub_image_list);
+
+// Prints all matched regions from |old_image| to |new_image|.
+// |imposed_matches|, if non-empty, encodes custom element matching to override
+// the default element detection and matching heuristics, and is formatted as:
+// "#+#=#+#,#+#=#+#,..." (e.g., "1+2=3+4", "1+2=3+4,5+6=7+8"),
+// where "#+#=#+#" encodes a match as 4 unsigned integers:
+// [offset in "old", size in "old", offset in "new", size in "new"].
+status::Code MatchAll(ConstBufferView old_image,
+ ConstBufferView new_image,
+ std::string imposed_matches,
+ std::ostream& out);
+
+} // namespace zucchini
+
+#endif // COMPONENTS_ZUCCHINI_ZUCCHINI_TOOLS_H_