From 06f1ae9aaca969ee95ef840f22b6b461c304542d Mon Sep 17 00:00:00 2001 From: Samuel Huang Date: Tue, 13 Mar 2018 18:19:34 +0000 Subject: [Zucchini] Move Zucchini from /chrome/installer/ to /components/. (Use "git log --follow" to see older revisions of files). /components/ is the most logical place to put Zucchini, which only depends on /base and /testing/gtest. This move also enables Zucchini to be used by the Component Updater. Details: - Move all files; run the following to change deps and guards: sed 's/chrome\/installer/components/' *.cc *.h -i sed 's/CHROME_INSTALLER/COMPONENTS/' *.cc *.h -i - Sorting works out pretty well! - Change all 'chrome/installer/zucchini' to 'components/zucchini' throughout other parts of the repo; sort if necessary. - Fix 6 'git cl lint' errors. - Change 1 Bind() usage to BindRepeated(). - Update OWNER. Bug: 729154 Change-Id: I50c5a7d411ea85f707b5994ab319dfb2a1acccf7 Reviewed-on: https://chromium-review.googlesource.com/954923 Reviewed-by: Greg Thompson Reviewed-by: Jochen Eisinger Reviewed-by: Samuel Huang Commit-Queue: Samuel Huang Cr-Commit-Position: refs/heads/master@{#542857} NOKEYCHECK=True GitOrigin-RevId: 577ef6c435e8d43be6e3e60ccbcbd1881780f4ec --- BUILD.gn | 195 ++++++++++++ OWNERS | 5 + README.md | 259 ++++++++++++++++ abs32_utils.cc | 201 +++++++++++++ abs32_utils.h | 137 +++++++++ abs32_utils_unittest.cc | 496 +++++++++++++++++++++++++++++++ address_translator.cc | 254 ++++++++++++++++ address_translator.h | 198 +++++++++++++ address_translator_unittest.cc | 556 +++++++++++++++++++++++++++++++++++ algorithm.h | 84 ++++++ algorithm_unittest.cc | 206 +++++++++++++ binary_data_histogram.cc | 91 ++++++ binary_data_histogram.h | 91 ++++++ binary_data_histogram_unittest.cc | 132 +++++++++ buffer_sink.cc | 11 + buffer_sink.h | 68 +++++ buffer_sink_unittest.cc | 71 +++++ buffer_source.cc | 105 +++++++ buffer_source.h | 141 +++++++++ buffer_source_unittest.cc | 347 ++++++++++++++++++++++ buffer_view.h | 201 +++++++++++++ buffer_view_unittest.cc | 242 +++++++++++++++ crc32.cc | 43 +++ crc32.h | 17 ++ crc32_unittest.cc | 47 +++ disassembler.cc | 36 +++ disassembler.h | 133 +++++++++ disassembler_no_op.cc | 28 ++ disassembler_no_op.h | 40 +++ disassembler_win32.cc | 392 +++++++++++++++++++++++++ disassembler_win32.h | 129 ++++++++ element_detection.cc | 84 ++++++ element_detection.h | 60 ++++ element_detection_unittest.cc | 78 +++++ encoded_view.cc | 77 +++++ encoded_view.h | 182 ++++++++++++ encoded_view_unittest.cc | 202 +++++++++++++ ensemble_matcher.cc | 24 ++ ensemble_matcher.h | 62 ++++ equivalence_map.cc | 482 ++++++++++++++++++++++++++++++ equivalence_map.h | 183 ++++++++++++ equivalence_map_unittest.cc | 446 ++++++++++++++++++++++++++++ heuristic_ensemble_matcher.cc | 369 +++++++++++++++++++++++ heuristic_ensemble_matcher.h | 39 +++ image_index.cc | 78 +++++ image_index.h | 116 ++++++++ image_index_unittest.cc | 131 +++++++++ image_utils.h | 206 +++++++++++++ image_utils_unittest.cc | 77 +++++ integration_test.cc | 104 +++++++ io_utils.cc | 52 ++++ io_utils.h | 146 +++++++++ io_utils_unittest.cc | 161 ++++++++++ label_manager.cc | 93 ++++++ label_manager.h | 113 +++++++ label_manager_unittest.cc | 137 +++++++++ main_utils.cc | 193 ++++++++++++ main_utils.h | 35 +++ mapped_file.cc | 70 +++++ mapped_file.h | 83 ++++++ mapped_file_unittest.cc | 61 ++++ patch_fuzzer.cc | 19 ++ patch_read_write_unittest.cc | 604 ++++++++++++++++++++++++++++++++++++++ patch_reader.cc | 345 ++++++++++++++++++++++ patch_reader.h | 277 +++++++++++++++++ patch_utils.h | 152 ++++++++++ patch_utils_unittest.cc | 171 +++++++++++ patch_writer.cc | 294 +++++++++++++++++++ patch_writer.h | 276 +++++++++++++++++ reference_set.cc | 68 +++++ reference_set.h | 66 +++++ reference_set_unittest.cc | 51 ++++ rel32_finder.cc | 137 +++++++++ rel32_finder.h | 189 ++++++++++++ rel32_finder_unittest.cc | 353 ++++++++++++++++++++++ rel32_utils.cc | 69 +++++ rel32_utils.h | 70 +++++ rel32_utils_unittest.cc | 128 ++++++++ reloc_utils.cc | 193 ++++++++++++ reloc_utils.h | 140 +++++++++ reloc_utils_unittest.cc | 273 +++++++++++++++++ suffix_array.h | 475 ++++++++++++++++++++++++++++++ suffix_array_unittest.cc | 331 +++++++++++++++++++++ target_pool.cc | 84 ++++++ target_pool.h | 77 +++++ target_pool_unittest.cc | 64 ++++ targets_affinity.cc | 108 +++++++ targets_affinity.h | 74 +++++ targets_affinity_unittest.cc | 131 +++++++++ test_disassembler.cc | 58 ++++ test_disassembler.h | 78 +++++ test_reference_reader.cc | 20 ++ test_reference_reader.h | 32 ++ test_utils.cc | 26 ++ test_utils.h | 20 ++ testdata/chrome64_1.exe.sha1 | 1 + testdata/chrome64_2.exe.sha1 | 1 + testdata/patch_fuzzer/empty.zuc | Bin 0 -> 80 bytes testdata/setup1.exe.sha1 | 1 + testdata/setup2.exe.sha1 | 1 + type_win_pe.h | 188 ++++++++++++ typed_value.h | 57 ++++ typed_value_unittest.cc | 40 +++ zucchini.h | 54 ++++ zucchini_apply.cc | 202 +++++++++++++ zucchini_apply.h | 43 +++ zucchini_apply_unittest.cc | 22 ++ zucchini_commands.cc | 176 +++++++++++ zucchini_commands.h | 51 ++++ zucchini_exe_version.rc.version | 46 +++ zucchini_gen.cc | 430 +++++++++++++++++++++++++++ zucchini_gen.h | 84 ++++++ zucchini_gen_unittest.cc | 176 +++++++++++ zucchini_integration.cc | 122 ++++++++ zucchini_integration.h | 34 +++ zucchini_main.cc | 54 ++++ zucchini_tools.cc | 126 ++++++++ zucchini_tools.h | 38 +++ 118 files changed, 16500 insertions(+) create mode 100644 BUILD.gn create mode 100644 OWNERS create mode 100644 README.md create mode 100644 abs32_utils.cc create mode 100644 abs32_utils.h create mode 100644 abs32_utils_unittest.cc create mode 100644 address_translator.cc create mode 100644 address_translator.h create mode 100644 address_translator_unittest.cc create mode 100644 algorithm.h create mode 100644 algorithm_unittest.cc create mode 100644 binary_data_histogram.cc create mode 100644 binary_data_histogram.h create mode 100644 binary_data_histogram_unittest.cc create mode 100644 buffer_sink.cc create mode 100644 buffer_sink.h create mode 100644 buffer_sink_unittest.cc create mode 100644 buffer_source.cc create mode 100644 buffer_source.h create mode 100644 buffer_source_unittest.cc create mode 100644 buffer_view.h create mode 100644 buffer_view_unittest.cc create mode 100644 crc32.cc create mode 100644 crc32.h create mode 100644 crc32_unittest.cc create mode 100644 disassembler.cc create mode 100644 disassembler.h create mode 100644 disassembler_no_op.cc create mode 100644 disassembler_no_op.h create mode 100644 disassembler_win32.cc create mode 100644 disassembler_win32.h create mode 100644 element_detection.cc create mode 100644 element_detection.h create mode 100644 element_detection_unittest.cc create mode 100644 encoded_view.cc create mode 100644 encoded_view.h create mode 100644 encoded_view_unittest.cc create mode 100644 ensemble_matcher.cc create mode 100644 ensemble_matcher.h create mode 100644 equivalence_map.cc create mode 100644 equivalence_map.h create mode 100644 equivalence_map_unittest.cc create mode 100644 heuristic_ensemble_matcher.cc create mode 100644 heuristic_ensemble_matcher.h create mode 100644 image_index.cc create mode 100644 image_index.h create mode 100644 image_index_unittest.cc create mode 100644 image_utils.h create mode 100644 image_utils_unittest.cc create mode 100644 integration_test.cc create mode 100644 io_utils.cc create mode 100644 io_utils.h create mode 100644 io_utils_unittest.cc create mode 100644 label_manager.cc create mode 100644 label_manager.h create mode 100644 label_manager_unittest.cc create mode 100644 main_utils.cc create mode 100644 main_utils.h create mode 100644 mapped_file.cc create mode 100644 mapped_file.h create mode 100644 mapped_file_unittest.cc create mode 100644 patch_fuzzer.cc create mode 100644 patch_read_write_unittest.cc create mode 100644 patch_reader.cc create mode 100644 patch_reader.h create mode 100644 patch_utils.h create mode 100644 patch_utils_unittest.cc create mode 100644 patch_writer.cc create mode 100644 patch_writer.h create mode 100644 reference_set.cc create mode 100644 reference_set.h create mode 100644 reference_set_unittest.cc create mode 100644 rel32_finder.cc create mode 100644 rel32_finder.h create mode 100644 rel32_finder_unittest.cc create mode 100644 rel32_utils.cc create mode 100644 rel32_utils.h create mode 100644 rel32_utils_unittest.cc create mode 100644 reloc_utils.cc create mode 100644 reloc_utils.h create mode 100644 reloc_utils_unittest.cc create mode 100644 suffix_array.h create mode 100644 suffix_array_unittest.cc create mode 100644 target_pool.cc create mode 100644 target_pool.h create mode 100644 target_pool_unittest.cc create mode 100644 targets_affinity.cc create mode 100644 targets_affinity.h create mode 100644 targets_affinity_unittest.cc create mode 100644 test_disassembler.cc create mode 100644 test_disassembler.h create mode 100644 test_reference_reader.cc create mode 100644 test_reference_reader.h create mode 100644 test_utils.cc create mode 100644 test_utils.h create mode 100644 testdata/chrome64_1.exe.sha1 create mode 100644 testdata/chrome64_2.exe.sha1 create mode 100644 testdata/patch_fuzzer/empty.zuc create mode 100644 testdata/setup1.exe.sha1 create mode 100644 testdata/setup2.exe.sha1 create mode 100644 type_win_pe.h create mode 100644 typed_value.h create mode 100644 typed_value_unittest.cc create mode 100644 zucchini.h create mode 100644 zucchini_apply.cc create mode 100644 zucchini_apply.h create mode 100644 zucchini_apply_unittest.cc create mode 100644 zucchini_commands.cc create mode 100644 zucchini_commands.h create mode 100644 zucchini_exe_version.rc.version create mode 100644 zucchini_gen.cc create mode 100644 zucchini_gen.h create mode 100644 zucchini_gen_unittest.cc create mode 100644 zucchini_integration.cc create mode 100644 zucchini_integration.h create mode 100644 zucchini_main.cc create mode 100644 zucchini_tools.cc create mode 100644 zucchini_tools.h diff --git a/BUILD.gn b/BUILD.gn new file mode 100644 index 0000000..47eef3a --- /dev/null +++ b/BUILD.gn @@ -0,0 +1,195 @@ +# Copyright 2017 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import("//chrome/process_version_rc_template.gni") +import("//testing/libfuzzer/fuzzer_test.gni") +import("//testing/test.gni") + +static_library("zucchini_lib") { + sources = [ + "abs32_utils.cc", + "abs32_utils.h", + "address_translator.cc", + "address_translator.h", + "algorithm.h", + "binary_data_histogram.cc", + "binary_data_histogram.h", + "buffer_sink.cc", + "buffer_sink.h", + "buffer_source.cc", + "buffer_source.h", + "buffer_view.h", + "crc32.cc", + "crc32.h", + "disassembler.cc", + "disassembler.h", + "disassembler_no_op.cc", + "disassembler_no_op.h", + "disassembler_win32.cc", + "disassembler_win32.h", + "element_detection.cc", + "element_detection.h", + "encoded_view.cc", + "encoded_view.h", + "ensemble_matcher.cc", + "ensemble_matcher.h", + "equivalence_map.cc", + "equivalence_map.h", + "heuristic_ensemble_matcher.cc", + "heuristic_ensemble_matcher.h", + "image_index.cc", + "image_index.h", + "image_utils.h", + "io_utils.cc", + "io_utils.h", + "label_manager.cc", + "label_manager.h", + "patch_reader.cc", + "patch_reader.h", + "patch_utils.h", + "patch_writer.cc", + "patch_writer.h", + "reference_set.cc", + "reference_set.h", + "rel32_finder.cc", + "rel32_finder.h", + "rel32_utils.cc", + "rel32_utils.h", + "reloc_utils.cc", + "reloc_utils.h", + "suffix_array.h", + "target_pool.cc", + "target_pool.h", + "targets_affinity.cc", + "targets_affinity.h", + "type_win_pe.h", + "typed_value.h", + "zucchini.h", + "zucchini_apply.cc", + "zucchini_apply.h", + "zucchini_gen.cc", + "zucchini_gen.h", + "zucchini_tools.cc", + "zucchini_tools.h", + ] + + deps = [ + "//base", + ] +} + +static_library("zucchini_io") { + sources = [ + "mapped_file.cc", + "mapped_file.h", + "zucchini_integration.cc", + "zucchini_integration.h", + ] + + deps = [ + ":zucchini_lib", + "//base", + ] +} + +executable("zucchini") { + sources = [ + "main_utils.cc", + "main_utils.h", + "zucchini_commands.cc", + "zucchini_commands.h", + "zucchini_main.cc", + ] + + deps = [ + ":zucchini_io", + ":zucchini_lib", + "//base", + "//build/config:exe_and_shlib_deps", + ] + + if (is_win) { + deps += [ ":zucchini_exe_version" ] + } +} + +if (is_win) { + process_version_rc_template("zucchini_exe_version") { + template_file = "zucchini_exe_version.rc.version" + output = "$target_gen_dir/zucchini_exe_version.rc" + } +} + +fuzzer_test("zucchini_patch_fuzzer") { + sources = [ + "patch_fuzzer.cc", + ] + deps = [ + ":zucchini_lib", + "//base", + ] + seed_corpus = "testdata/patch_fuzzer" +} + +test("zucchini_unittests") { + sources = [ + "abs32_utils_unittest.cc", + "address_translator_unittest.cc", + "algorithm_unittest.cc", + "binary_data_histogram_unittest.cc", + "buffer_sink_unittest.cc", + "buffer_source_unittest.cc", + "buffer_view_unittest.cc", + "crc32_unittest.cc", + "element_detection_unittest.cc", + "encoded_view_unittest.cc", + "equivalence_map_unittest.cc", + "image_index_unittest.cc", + "image_utils_unittest.cc", + "io_utils_unittest.cc", + "label_manager_unittest.cc", + "mapped_file_unittest.cc", + "patch_read_write_unittest.cc", + "patch_utils_unittest.cc", + "reference_set_unittest.cc", + "rel32_finder_unittest.cc", + "rel32_utils_unittest.cc", + "reloc_utils_unittest.cc", + "suffix_array_unittest.cc", + "target_pool_unittest.cc", + "targets_affinity_unittest.cc", + "test_disassembler.cc", + "test_disassembler.h", + "test_reference_reader.cc", + "test_reference_reader.h", + "test_utils.cc", + "test_utils.h", + "typed_value_unittest.cc", + "zucchini_apply_unittest.cc", + "zucchini_gen_unittest.cc", + ] + + deps = [ + ":zucchini_io", + ":zucchini_lib", + "//base", + "//base/test:run_all_unittests", + "//base/test:test_support", + "//testing/gtest", + ] +} + +test("zucchini_integration_test") { + sources = [ + "integration_test.cc", + ] + + deps = [ + ":zucchini_lib", + "//base", + "//base/test:run_all_unittests", + "//base/test:test_support", + "//testing/gtest", + ] +} diff --git a/OWNERS b/OWNERS new file mode 100644 index 0000000..0c93e58 --- /dev/null +++ b/OWNERS @@ -0,0 +1,5 @@ +huangs@chromium.org +grt@chromium.org +wfh@chromium.org + +# COMPONENT: Internals>Installer>Diff diff --git a/README.md b/README.md new file mode 100644 index 0000000..fe11a0f --- /dev/null +++ b/README.md @@ -0,0 +1,259 @@ + +## Basic Definitions for Patching + +**Binary**: Executable image and data. Binaries may persist in an archive +(e.g., chrome.7z), and need to be periodically updated. Formats for binaries +include {PE files EXE / DLL, ELF, DEX}. Architectures binaries include +{x86, x64, ARM, AArch64, Dalvik}. A binary is also referred to as an executable +or an image file. + +**Patching**: Sending a "new" file to clients who have an "old" file by +computing and transmitting a "patch" that can be used to transform "old" into +"new". Patches are compressed for transmission. A key performance metric is +patch size, which refers to the size of compressed patch file. For our +experiments we use 7z. + +**Patch generation**: Computation of a "patch" from "old" and "new". This can be +expensive (e.g., ~15-20 min for Chrome, using 1 GB of RAM), but since patch +generation is a run-once step on the server-side when releasing "new" binaries, +the expense is not too critical. + +**Patch application**: Transformation from "old" binaries to "new", using a +(downloaded) "patch". This is executed on client side on updates, so resource +constraints (e.g., time, RAM, disk space) is more stringent. Also, fault- +tolerance is important. This is usually achieved by an update system by having +a fallback method of directly downloading "new" in case of patching failure. + +**Offset**: Position relative to the start of a file. + +**Local offset**: An offset relative to the start of a region of a file. + +**Element**: A region in a file with associated executable type, represented by +the tuple (exe_type, offset, length). Every Element in new file is associated +with an Element in old file and patched independently. + +**Reference**: A directed connection between two offsets in a binary. For +example, consider jump instructions in x86: + + 00401000: E9 3D 00 00 00 jmp 00401042 + +Here, the 4 bytes `[3D 00 00 00]` starting at address `00401001` point to +address `00401042` in memory. This forms a reference from `offset(00401001)` +(length 4) to `offset(00401042)`, where `offset(addr)` indicates the disk +offset corresponding to `addr`. A reference has a location, length (implicitly +determined by reference type), body, and target. + +**Location**: The starting offset of bytes that store a reference. In the +preceding example, `offset(00401001)` is a location. Each location is the +beginning of a reference body. + +**Body**: The span of bytes that encodes reference data, i.e., +[location, location + length) = +[location, location + 1, ..., location + length - 1]. +In the preceding example, `length = 4`, so the reference body is +`[00401001, 00401001 + 4) = [00401001, 00401002, 00401003, 00401004]`. +All reference bodies in an image must not overlap, and often regions boundaries +are required to not straddle a reference body. + +**Target**: The offset that's the destination of a reference. In the preceding +example, `offset(00401042)` is the target. Different references can share common +targets. For example, in + + 00401000: E9 3D 00 00 00 jmp 00401042 + 00401005: EB 3B jmp 00401042 + +we have two references with different locations and bodies, but same target +of `00401042`. + +Because the bytes that encode a reference depend on its target, and potentially +on its location, they are more likely to get modified from an old version of a +binary to a newer version. This is why "naive" patching does not do well on +binaries. + +**Disassembler**: Architecture specific data and operations, used to extract and +correct references in a binary. + +**Type of reference**: The type of a reference determines the binary +representation used to encode its target. This affects how references are parsed +and written by a disassembler. There can be many types of references in the same +binary. + +A reference is represented by the tuple (disassembler, location, target, type). +This tuple contains sufficient information to write the reference in a binary. + +**Pool of targets**: Collection of targets that is assumed to have some semantic +relationship. Each reference type belong to exactly one reference pool. Targets +for references in the same pool are shared. + +For example, the following describes two pools defined for Dalvik Executable +format (DEX). Both pools spawn multiple types of references. + +1. Index in string table. + - From bytecode to string index using 16 bits. + - From bytecode to string index using 32 bits. + - From field item to string index using 32 bits. +2. Address in code. + - Relative 16 bits pointer. + - Relative 32 bits pointer. + +Boundaries between different pools can be ambiguous. Having all targets belong +to the same pool can reduce redundancy, but will use more memory and might +cause larger corrections to happen, so this is a trade-off that can be resolved +with benchmarks. + +**Abs32 references**: References whose targets are adjusted by the OS during +program load. In an image, a **relocation table** typically provides locations +of abs32 references. At each abs32 location, the stored bytes then encode +semantic information about the target (e.g., as RVA). + +**Rel32 references**: References embedded within machine code, in which targets +are encoded as some delta relative to the reference's location. Typical examples +of rel32 references are branching instructions and instruction pointer-relative +memory access. + +**Equivalence**: A (src_offset, dst_offset, length) tuple describing a region of +"old" binary, at an offset of |src_offset|, that is similar to a region of "new" +binary, at an offset of |dst_offset|. + +**Raw delta unit**: Describes a raw modification to apply on the new image, as a +pair (copy_offset, diff), where copy_offset describes the position in new file +as an offset in the data that was copied from the old file, and diff is the +bytewise difference to apply. + +**Associated Targets**: A target in "old" binary is associated with a target in +"new" binary if both targets: +1. are part of similar regions from the same equivalence, and +2. have the same local offset (relative to respective start regions), and +3. are not part of any larger region from a different equivalence. +Not all targets are necessarily associated with another target. + +**Label**: An (offset, index) pair, where |offset| is a target, and |index| is +an integer used to uniquely identify |offset| in its corresponding pool of +targets. Labels are created for each Reference in "old" and "new" binary as part +of generating a patch, and used to alias targets when searching for similar +regions that will form equivalences. Labels are created such that associated +targets in old and new binaries share the same |index|, and such that indices in +a pool are tightly packed. For example, suppose "old" Labels are: + - (0x1111, 0), (0x3333, 4), (0x5555, 1), (0x7777, 3) +and given the following association of targets between "old" and "new": + - 0x1111 <=> 0x6666, 0x3333 <=> 0x2222. +then we could assign indices for "new" Labels as: + - (0x2222, 4}, (0x4444, 8), (0x6666, 0), (0x8888, 2) + +**Encoded Image**: The result of projecting the content of an image to scalar +values that describe content on a higher level of abstraction, masking away +undesirable noise in raw content. Notably, the projection encodes references +based on their associated label. + +## Zucchini Ensemble Patch Format + +### Types + +**int8**: 8-bit unsigned int. + +**uint32**: 32-bit unsigned int, little-endian. + +**int32**: 32-bit signed int, little-endian. + +**Varints**: This is a generic variable-length encoding for integer quantities +that strips away leading (most-significant) null bytes. +The Varints format is borrowed from protocol-buffers, see +[documentation](https://developers.google.com/protocol-buffers/docs/encoding#varints) +for more info. + +**varuint32**: A uint32 encoded using Varints format. + +**varint32**: A int32 encoded using Varints format. + +### File Layout + +Name | Format | Description +--- | --- | --- +header | PatchHeader | The header. +patch_type | uint32 | Type of this patch, see `enum PatchType`. +elements_count | uint32 | Number of patch units. +elements | PatchElement[elements_count] | List of all patch elements. + +Position of elements in new file is ascending. + +### Structures + +**PatchHeader** + +Name | Format | Description +--- | --- | --- +magic | uint32 = kMagic | Magic value. +old_size | uint32 | Size of old file in bytes. +old_crc | uint32 | CRC32 of old file. +new_size | uint32 | Size of new file in bytes. +new_crc | uint32 | CRC32 of new file. + +**kMagic** == `'Z' | ('u' << 8) | ('c' << 16)` + +**PatchElement** +Contains all the information required to produce a single element in new file. + +Name | Format | Description +--- | --- | --- +header | PatchElementHeader | The header. +equivalences | EquivalenceList | List of equivalences. +raw_deltas | RawDeltaList | List of raw deltas. +reference_deltas | ReferenceDeltaList | List of reference deltas. +pool_count | uint32 | Number of pools. +extra_targets | ExtraTargetList[pool_count] | Lists of extra targets. + +**PatchElementHeader** +Describes a correspondence between an element in old and in new files. Some +redundancy arise from storing |new_offset|, but it is necessary to make +PatchElement self contained. + +Name | Format | Description +--- | --- | --- +old_offset | uint32 | Starting offset of the element in old file. +new_offset | uint32 | Starting offset of the element in new file. +old_length | uint32 | Length of the element in old file. +new_length | uint32 | Length of the element in new file. +exe_type | uint32 | Executable type for this unit, see `enum ExecutableType`. + +**EquivalenceList** +Encodes a list of equivalences, where dst offsets (in new image) are ascending. + +Name | Format | Description +--- | --- | --- +src_skip | Buffer | Src offset for each equivalence, delta encoded. +dst_skip | Buffer | Dst offset for each equivalence, delta encoded. +copy_count | Buffer | Length for each equivalence. + +**RawDeltaList** +Encodes a list of raw delta units, with ascending copy offsets. + +Name | Format | Description +--- | --- | --- +raw_delta_skip | Buffer | Copy offset for each delta unit, delta encoded and biased by -1. +raw_delta_diff | Buffer | Bytewise difference for each delta unit. + +**ReferenceDeltaList** +Encodes a list of reference deltas, in the order they appear in the new +image file. A reference delta is a signed integer representing a jump through a +list of targets. + +Name | Format | Description +--- | --- | --- +reference_delta | Buffer | Vector of reference deltas. + +**ExtraTargetList** +Encodes a list of additional targets in the new image file, in ascending +order. + +Name | Format | Description +--- | --- | --- +pool_tag | uint8_t | Unique identifier for this pool of targets. +extra_targets | Buffer | Additional targets, delta encoded and biased by -1. + +**Buffer** +A generic vector of data. + +Name | Format | Description +--- | --- | --- +size |uint32 | Size of content in bytes. +content |T[] | List of integers. diff --git a/abs32_utils.cc b/abs32_utils.cc new file mode 100644 index 0000000..b45da7e --- /dev/null +++ b/abs32_utils.cc @@ -0,0 +1,201 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/abs32_utils.h" + +#include +#include +#include + +#include "base/logging.h" +#include "components/zucchini/io_utils.h" + +namespace zucchini { + +namespace { + +// Templated helper for AbsoluteAddress::Read(). +template +bool ReadAbs(ConstBufferView image, offset_t offset, uint64_t* value) { + static_assert(std::is_unsigned::value, "Value type must be unsigned."); + if (!image.can_access(offset)) + return false; + *value = static_cast(image.read(offset)); + return true; +} + +// Templated helper for AbsoluteAddress::Write(). +template +bool WriteAbs(offset_t offset, T value, MutableBufferView* image) { + static_assert(std::is_unsigned::value, "Value type must be unsigned."); + if (!image->can_access(offset)) + return false; + image->write(offset, value); + return true; +} + +} // namespace + +/******** AbsoluteAddress ********/ + +AbsoluteAddress::AbsoluteAddress(Bitness bitness, uint64_t image_base) + : bitness_(bitness), image_base_(image_base), value_(image_base) { + CHECK(bitness_ == kBit64 || image_base_ < 0x100000000ULL); +} + +AbsoluteAddress::AbsoluteAddress(AbsoluteAddress&&) = default; + +AbsoluteAddress::~AbsoluteAddress() = default; + +bool AbsoluteAddress::FromRva(rva_t rva) { + if (rva >= kRvaBound) + return false; + uint64_t value = image_base_ + rva; + // Check overflow, which manifests as |value| "wrapping around", resulting in + // |value| less than |image_base_| (preprocessing needed for 32-bit). + if (((bitness_ == kBit32) ? (value & 0xFFFFFFFFU) : value) < image_base_) + return false; + value_ = value; + return true; +} + +rva_t AbsoluteAddress::ToRva() const { + if (value_ < image_base_) + return kInvalidRva; + uint64_t raw_rva = value_ - image_base_; + if (raw_rva >= kRvaBound) + return kInvalidRva; + return static_cast(raw_rva); +} + +bool AbsoluteAddress::Read(offset_t offset, const ConstBufferView& image) { + // Read raw data; |value_| is not guaranteed to represent a valid RVA. + if (bitness_ == kBit32) + return ReadAbs(image, offset, &value_); + DCHECK_EQ(kBit64, bitness_); + return ReadAbs(image, offset, &value_); +} + +bool AbsoluteAddress::Write(offset_t offset, MutableBufferView* image) { + if (bitness_ == kBit32) + return WriteAbs(offset, static_cast(value_), image); + DCHECK_EQ(kBit64, bitness_); + return WriteAbs(offset, value_, image); +} + +/******** Abs32RvaExtractorWin32 ********/ + +Abs32RvaExtractorWin32::Abs32RvaExtractorWin32( + ConstBufferView image, + AbsoluteAddress&& addr, + const std::vector& abs32_locations, + offset_t lo, + offset_t hi) + : image_(image), addr_(std::move(addr)) { + CHECK_LE(lo, hi); + auto find_and_check = [&addr](const std::vector& locations, + offset_t offset) { + auto it = std::lower_bound(locations.begin(), locations.end(), offset); + // Ensure |offset| does not straddle a reference body. + CHECK(it == locations.begin() || offset - *(it - 1) >= addr.width()); + return it; + }; + cur_abs32_ = find_and_check(abs32_locations, lo); + end_abs32_ = find_and_check(abs32_locations, hi); +} + +Abs32RvaExtractorWin32::Abs32RvaExtractorWin32(Abs32RvaExtractorWin32&&) = + default; + +Abs32RvaExtractorWin32::~Abs32RvaExtractorWin32() = default; + +base::Optional Abs32RvaExtractorWin32::GetNext() { + while (cur_abs32_ < end_abs32_) { + offset_t location = *(cur_abs32_++); + if (!addr_.Read(location, image_)) + continue; + rva_t target_rva = addr_.ToRva(); + if (target_rva == kInvalidRva) + continue; + return Unit{location, target_rva}; + } + return base::nullopt; +} + +/******** Abs32ReaderWin32 ********/ + +Abs32ReaderWin32::Abs32ReaderWin32(Abs32RvaExtractorWin32&& abs32_rva_extractor, + const AddressTranslator& translator) + : abs32_rva_extractor_(std::move(abs32_rva_extractor)), + target_rva_to_offset_(translator) {} + +Abs32ReaderWin32::~Abs32ReaderWin32() = default; + +base::Optional Abs32ReaderWin32::GetNext() { + for (auto unit = abs32_rva_extractor_.GetNext(); unit.has_value(); + unit = abs32_rva_extractor_.GetNext()) { + offset_t location = unit->location; + offset_t target = target_rva_to_offset_.Convert(unit->target_rva); + if (target == kInvalidOffset) + continue; + // In rare cases, the most significant bit of |target| is set. This + // interferes with label marking. A quick fix is to reject these. + if (IsMarked(target)) { + LOG(WARNING) << "Warning: Skipping mark-aliased PE abs32 target: " + << AsHex<8>(location) << " -> " << AsHex<8>(target) << "."; + continue; + } + return Reference{location, target}; + } + return base::nullopt; +} + +/******** Abs32WriterWin32 ********/ + +Abs32WriterWin32::Abs32WriterWin32(MutableBufferView image, + AbsoluteAddress&& addr, + const AddressTranslator& translator) + : image_(image), + addr_(std::move(addr)), + target_offset_to_rva_(translator) {} + +Abs32WriterWin32::~Abs32WriterWin32() = default; + +void Abs32WriterWin32::PutNext(Reference ref) { + rva_t target_rva = target_offset_to_rva_.Convert(ref.target); + if (target_rva != kInvalidRva) { + addr_.FromRva(target_rva); + addr_.Write(ref.location, &image_); + } +} + +/******** Exported Functions ********/ + +size_t RemoveOverlappingAbs32Locations(Bitness bitness, + std::vector* locations) { + if (locations->size() <= 1) + return 0; + + uint32_t width = WidthOf(bitness); + auto slow = locations->begin(); + auto fast = locations->begin() + 1; + for (;;) { + // Find next good location. + while (fast != locations->end() && *fast - *slow < width) + ++fast; + // Advance |slow|. For the last iteration this becomes the new sentinel. + ++slow; + if (fast == locations->end()) + break; + // Compactify good locations (potentially overwrite bad locations). + if (slow != fast) + *slow = *fast; + ++fast; + } + size_t num_removed = locations->end() - slow; + locations->erase(slow, locations->end()); + return num_removed; +} + +} // namespace zucchini diff --git a/abs32_utils.h b/abs32_utils.h new file mode 100644 index 0000000..b1d3ae0 --- /dev/null +++ b/abs32_utils.h @@ -0,0 +1,137 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_ABS32_UTILS_H_ +#define COMPONENTS_ZUCCHINI_ABS32_UTILS_H_ + +#include +#include + +#include + +#include "base/macros.h" +#include "base/optional.h" +#include "components/zucchini/address_translator.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/image_utils.h" + +namespace zucchini { + +// A class to represent an abs32 address (32-bit or 64-bit). Accessors are +// provided to translate from / to RVA, and to read / write the represented +// abs32 address from / to an image. +class AbsoluteAddress { + public: + AbsoluteAddress(Bitness bitness, uint64_t image_base); + AbsoluteAddress(AbsoluteAddress&&); + ~AbsoluteAddress(); + + // Attempts to translate |rva| to an abs32 address. On success, assigns + // |value_| to the result and returns true. On failure (invalid |rva| or + // overflow), returns false. + bool FromRva(rva_t rva); + + // Returns the RVA for |value_|, or |kInvalidRva| if the represented value + // address does not correspond to a valid RVA. + rva_t ToRva() const; + + // Attempts to read the abs32 address at |image[offset]| into |value_|. On + // success, updates |value_| and returns true. On failure (invalid |offset|), + // returns false. + bool Read(offset_t offset, const ConstBufferView& image); + + // Attempts to write |value_| to to |(*image)[offset]|. On success, performs + // the write and returns true. On failure (invalid |offset|), returns false. + bool Write(offset_t offset, MutableBufferView* image); + + size_t width() const { return WidthOf(bitness_); } + + // Exposing |value_| for testing. + uint64_t* mutable_value() { return &value_; } + + private: + const Bitness bitness_; + const uint64_t image_base_; // Accommodates 32-bit and 64-bit. + uint64_t value_; // Accommodates 32-bit and 64-bit. +}; + +// A class to extract Win32 abs32 references from |abs32_locations| within +// |image_| bounded by |[lo, hi)|. GetNext() is used to successively return +// data as Units, which are locations and (potentially out-of-bound) RVAs. +// |addr| determines the bitness of abs32 values stored, and mediates all reads. +class Abs32RvaExtractorWin32 { + public: + struct Unit { + offset_t location; + rva_t target_rva; + }; + + // Requires |lo| <= |hi|, and they must not straddle a reference body (with + // length |addr.width()|) in |abs32_locations|. + Abs32RvaExtractorWin32(ConstBufferView image, + AbsoluteAddress&& addr, + const std::vector& abs32_locations, + offset_t lo, + offset_t hi); + Abs32RvaExtractorWin32(Abs32RvaExtractorWin32&&); + ~Abs32RvaExtractorWin32(); + + // Visits given abs32 locations, rejects invalid locations and non-existent + // RVAs, and returns reference as Unit, or base::nullopt on completion. + base::Optional GetNext(); + + private: + ConstBufferView image_; + AbsoluteAddress addr_; + std::vector::const_iterator cur_abs32_; + std::vector::const_iterator end_abs32_; +}; + +// A reader for Win32 abs32 references that filters and translates results from +// |abs32_rva_extractor_|. +class Abs32ReaderWin32 : public ReferenceReader { + public: + Abs32ReaderWin32(Abs32RvaExtractorWin32&& abs32_rva_extractor, + const AddressTranslator& translator); + ~Abs32ReaderWin32() override; + + // ReferenceReader: + base::Optional GetNext() override; + + private: + Abs32RvaExtractorWin32 abs32_rva_extractor_; + AddressTranslator::RvaToOffsetCache target_rva_to_offset_; + + DISALLOW_COPY_AND_ASSIGN(Abs32ReaderWin32); +}; + +// A writer for Win32 abs32 references. |addr| determines the bitness of the +// abs32 values stored, and mediates all writes. +class Abs32WriterWin32 : public ReferenceWriter { + public: + Abs32WriterWin32(MutableBufferView image, + AbsoluteAddress&& addr, + const AddressTranslator& translator); + ~Abs32WriterWin32() override; + + // ReferenceWriter: + void PutNext(Reference ref) override; + + private: + MutableBufferView image_; + AbsoluteAddress addr_; + AddressTranslator::OffsetToRvaCache target_offset_to_rva_; + + DISALLOW_COPY_AND_ASSIGN(Abs32WriterWin32); +}; + +// Given a sorted list of abs32 |locations|, removes all elements whose body +// overlaps with the body of a previous element (|bitness| determines length). +// Returns the number of elements removed. +size_t RemoveOverlappingAbs32Locations(Bitness bitness, + std::vector* locations); + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_ABS32_UTILS_H_ diff --git a/abs32_utils_unittest.cc b/abs32_utils_unittest.cc new file mode 100644 index 0000000..480fea0 --- /dev/null +++ b/abs32_utils_unittest.cc @@ -0,0 +1,496 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/abs32_utils.h" + +#include + +#include +#include +#include + +#include "base/numerics/safe_conversions.h" +#include "components/zucchini/image_utils.h" +#include "components/zucchini/test_utils.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +namespace { + +// A trivial AddressTranslator that applies constant shift. +class TestAddressTranslator : public AddressTranslator { + public: + TestAddressTranslator(size_t image_size, rva_t rva_begin) { + DCHECK_GE(rva_begin, 0U); + CHECK_EQ(AddressTranslator::kSuccess, + Initialize({{0, base::checked_cast(image_size), + rva_begin, base::checked_cast(image_size)}})); + } +}; + +// Helper to translate address |value| to RVA. May return |kInvalidRva|. +rva_t AddrValueToRva(uint64_t value, AbsoluteAddress* addr) { + *addr->mutable_value() = value; + return addr->ToRva(); +} + +} // namespace + +TEST(Abs32UtilsTest, AbsoluteAddress32) { + std::vector data32 = ParseHexString( + "00 00 32 00 21 43 65 4A 00 00 00 00 FF FF FF FF FF FF 31 00"); + ConstBufferView image32(data32.data(), data32.size()); + MutableBufferView mutable_image32(data32.data(), data32.size()); + + AbsoluteAddress addr32(kBit32, 0x00320000U); + EXPECT_TRUE(addr32.Read(0x0U, image32)); + EXPECT_EQ(0x00000000U, addr32.ToRva()); + EXPECT_TRUE(addr32.Read(0x4U, image32)); + EXPECT_EQ(0x4A334321U, addr32.ToRva()); + EXPECT_TRUE(addr32.Read(0x8U, image32)); + EXPECT_EQ(kInvalidRva, addr32.ToRva()); // Underflow. + EXPECT_TRUE(addr32.Read(0xCU, image32)); + EXPECT_EQ(kInvalidRva, addr32.ToRva()); // Translated RVA would be to large. + EXPECT_TRUE(addr32.Read(0x10U, image32)); + EXPECT_EQ(kInvalidRva, addr32.ToRva()); // Underflow (boundary case). + + EXPECT_FALSE(addr32.Read(0x11U, image32)); + EXPECT_FALSE(addr32.Read(0x14U, image32)); + EXPECT_FALSE(addr32.Read(0x100000U, image32)); + EXPECT_FALSE(addr32.Read(0x80000000U, image32)); + EXPECT_FALSE(addr32.Read(0xFFFFFFFFU, image32)); + + EXPECT_TRUE(addr32.FromRva(0x11223344U)); + EXPECT_TRUE(addr32.Write(0x2U, &mutable_image32)); + EXPECT_TRUE(addr32.Write(0x10U, &mutable_image32)); + std::vector expected_data32 = ParseHexString( + "00 00 44 33 54 11 65 4A 00 00 00 00 FF FF FF FF 44 33 54 11"); + EXPECT_EQ(expected_data32, data32); + EXPECT_FALSE(addr32.Write(0x11U, &mutable_image32)); + EXPECT_FALSE(addr32.Write(0xFFFFFFFFU, &mutable_image32)); + EXPECT_EQ(expected_data32, data32); +} + +TEST(Abs32UtilsTest, AbsoluteAddress32Overflow) { + AbsoluteAddress addr32(kBit32, 0xC0000000U); + EXPECT_TRUE(addr32.FromRva(0x00000000U)); + EXPECT_TRUE(addr32.FromRva(0x11223344U)); + EXPECT_TRUE(addr32.FromRva(0x3FFFFFFFU)); + EXPECT_FALSE(addr32.FromRva(0x40000000U)); + EXPECT_FALSE(addr32.FromRva(0x40000001U)); + EXPECT_FALSE(addr32.FromRva(0x80000000U)); + EXPECT_FALSE(addr32.FromRva(0xFFFFFFFFU)); + + EXPECT_EQ(0x00000000U, AddrValueToRva(0xC0000000U, &addr32)); + EXPECT_EQ(kInvalidRva, AddrValueToRva(0xBFFFFFFFU, &addr32)); + EXPECT_EQ(kInvalidRva, AddrValueToRva(0x00000000U, &addr32)); + EXPECT_EQ(0x3FFFFFFFU, AddrValueToRva(0xFFFFFFFFU, &addr32)); +} + +TEST(Abs32UtilsTest, AbsoluteAddress64) { + std::vector data64 = ParseHexString( + "00 00 00 00 64 00 00 00 21 43 65 4A 64 00 00 00 " + "00 00 00 00 00 00 00 00 FF FF FF FF FF FF FF FF " + "00 00 00 00 64 00 00 80 FF FF FF FF 63 00 00 00"); + ConstBufferView image64(data64.data(), data64.size()); + MutableBufferView mutable_image64(data64.data(), data64.size()); + + AbsoluteAddress addr64(kBit64, 0x0000006400000000ULL); + EXPECT_TRUE(addr64.Read(0x0U, image64)); + EXPECT_EQ(0x00000000U, addr64.ToRva()); + EXPECT_TRUE(addr64.Read(0x8U, image64)); + EXPECT_EQ(0x4A654321U, addr64.ToRva()); + EXPECT_TRUE(addr64.Read(0x10U, image64)); // Succeeds, in spite of value. + EXPECT_EQ(kInvalidRva, addr64.ToRva()); // Underflow. + EXPECT_TRUE(addr64.Read(0x18U, image64)); + EXPECT_EQ(kInvalidRva, addr64.ToRva()); // Translated RVA too large. + EXPECT_TRUE(addr64.Read(0x20U, image64)); + EXPECT_EQ(kInvalidRva, addr64.ToRva()); // Translated RVA toolarge. + EXPECT_TRUE(addr64.Read(0x28U, image64)); + EXPECT_EQ(kInvalidRva, addr64.ToRva()); // Underflow. + + EXPECT_FALSE(addr64.Read(0x29U, image64)); // Extends outside. + EXPECT_FALSE(addr64.Read(0x30U, image64)); // Entirely outside (note: hex). + EXPECT_FALSE(addr64.Read(0x100000U, image64)); + EXPECT_FALSE(addr64.Read(0x80000000U, image64)); + EXPECT_FALSE(addr64.Read(0xFFFFFFFFU, image64)); + + EXPECT_TRUE(addr64.FromRva(0x11223344U)); + EXPECT_TRUE(addr64.Write(0x13U, &mutable_image64)); + EXPECT_TRUE(addr64.Write(0x20U, &mutable_image64)); + std::vector expected_data64 = ParseHexString( + "00 00 00 00 64 00 00 00 21 43 65 4A 64 00 00 00 " + "00 00 00 44 33 22 11 64 00 00 00 FF FF FF FF FF " + "44 33 22 11 64 00 00 00 FF FF FF FF 63 00 00 00"); + EXPECT_EQ(expected_data64, data64); + EXPECT_FALSE(addr64.Write(0x29U, &mutable_image64)); + EXPECT_FALSE(addr64.Write(0x30U, &mutable_image64)); + EXPECT_FALSE(addr64.Write(0xFFFFFFFFU, &mutable_image64)); + EXPECT_EQ(expected_data64, data64); + + EXPECT_FALSE(addr64.FromRva(0xFFFFFFFFU)); +} + +TEST(Abs32UtilsTest, AbsoluteAddress64Overflow) { + { + // Counterpart to AbsoluteAddress632verflow test. + AbsoluteAddress addr64(kBit64, 0xFFFFFFFFC0000000ULL); + EXPECT_TRUE(addr64.FromRva(0x00000000U)); + EXPECT_TRUE(addr64.FromRva(0x11223344U)); + EXPECT_TRUE(addr64.FromRva(0x3FFFFFFFU)); + EXPECT_FALSE(addr64.FromRva(0x40000000U)); + EXPECT_FALSE(addr64.FromRva(0x40000001U)); + EXPECT_FALSE(addr64.FromRva(0x80000000U)); + EXPECT_FALSE(addr64.FromRva(0xFFFFFFFFU)); + + EXPECT_EQ(0x00000000U, AddrValueToRva(0xFFFFFFFFC0000000U, &addr64)); + EXPECT_EQ(kInvalidRva, AddrValueToRva(0xFFFFFFFFBFFFFFFFU, &addr64)); + EXPECT_EQ(kInvalidRva, AddrValueToRva(0x0000000000000000U, &addr64)); + EXPECT_EQ(kInvalidRva, AddrValueToRva(0xFFFFFFFF00000000U, &addr64)); + EXPECT_EQ(0x3FFFFFFFU, AddrValueToRva(0xFFFFFFFFFFFFFFFFU, &addr64)); + } + { + // Pseudo-counterpart to AbsoluteAddress632verflow test: Some now pass. + AbsoluteAddress addr64(kBit64, 0xC0000000U); + EXPECT_TRUE(addr64.FromRva(0x00000000U)); + EXPECT_TRUE(addr64.FromRva(0x11223344U)); + EXPECT_TRUE(addr64.FromRva(0x3FFFFFFFU)); + EXPECT_TRUE(addr64.FromRva(0x40000000U)); + EXPECT_TRUE(addr64.FromRva(0x40000001U)); + EXPECT_FALSE(addr64.FromRva(0x80000000U)); + EXPECT_FALSE(addr64.FromRva(0xFFFFFFFFU)); + + // ToRva() still fail though. + EXPECT_EQ(0x00000000U, AddrValueToRva(0xC0000000U, &addr64)); + EXPECT_EQ(kInvalidRva, AddrValueToRva(0xBFFFFFFFU, &addr64)); + EXPECT_EQ(kInvalidRva, AddrValueToRva(0x00000000U, &addr64)); + EXPECT_EQ(0x3FFFFFFFU, AddrValueToRva(0xFFFFFFFFU, &addr64)); + } + { + AbsoluteAddress addr64(kBit64, 0xC000000000000000ULL); + EXPECT_TRUE(addr64.FromRva(0x00000000ULL)); + EXPECT_TRUE(addr64.FromRva(0x11223344ULL)); + EXPECT_TRUE(addr64.FromRva(0x3FFFFFFFULL)); + EXPECT_TRUE(addr64.FromRva(0x40000000ULL)); + EXPECT_TRUE(addr64.FromRva(0x40000001ULL)); + EXPECT_FALSE(addr64.FromRva(0x80000000ULL)); + EXPECT_FALSE(addr64.FromRva(0xFFFFFFFFULL)); + + EXPECT_EQ(0x00000000U, AddrValueToRva(0xC000000000000000ULL, &addr64)); + EXPECT_EQ(kInvalidRva, AddrValueToRva(0xBFFFFFFFFFFFFFFFULL, &addr64)); + EXPECT_EQ(kInvalidRva, AddrValueToRva(0x0000000000000000ULL, &addr64)); + EXPECT_EQ(0x3FFFFFFFU, AddrValueToRva(0xC00000003FFFFFFFULL, &addr64)); + EXPECT_EQ(kInvalidRva, AddrValueToRva(0xFFFFFFFFFFFFFFFFULL, &addr64)); + } +} + +TEST(Abs32UtilsTest, Win32Read32) { + constexpr uint32_t kImageBase = 0xA0000000U; + constexpr uint32_t kRvaBegin = 0x00C00000U; + struct { + std::vector data32; + std::vector abs32_locations; // Assumtion: Sorted. + offset_t lo; // Assumption: In range, does not straddle |abs32_location|. + offset_t hi; // Assumption: Also >= |lo|. + std::vector expected_refs; + } test_cases[] = { + // Targets at beginning and end. + {ParseHexString("FF FF FF FF 0F 00 C0 A0 00 00 C0 A0 FF FF FF FF"), + {0x4U, 0x8U}, + 0x0U, + 0x10U, + {{0x4U, 0xFU}, {0x8U, 0x0U}}}, + // Targets at beginning and end are out of bound: Rejected. + {ParseHexString("FF FF FF FF 10 00 C0 A0 FF FF BF A0 FF FF FF FF"), + {0x4U, 0x8U}, + 0x0U, + 0x10U, + std::vector()}, + // Same with more extreme target values: Rejected. + {ParseHexString("FF FF FF FF FF FF FF FF 00 00 00 00 FF FF FF FF"), + {0x4U, 0x8U}, + 0x0U, + 0x10U, + std::vector()}, + // Locations at beginning and end, plus invalid locations. + {ParseHexString("08 00 C0 A0 FF FF FF FF FF FF FF FF 04 00 C0 A0"), + {0x0U, 0xCU, 0x10U, 0x1000U, 0x80000000U, 0xFFFFFFFFU}, + 0x0U, + 0x10U, + {{0x0U, 0x8U}, {0xCU, 0x4U}}}, + // Odd size, location, target. + {ParseHexString("FF FF FF 09 00 C0 A0 FF FF FF FF FF FF FF FF FF " + "FF FF FF"), + {0x3U}, + 0x0U, + 0x13U, + {{0x3U, 0x9U}}}, + // No location given. + {ParseHexString("FF FF FF FF 0C 00 C0 A0 00 00 C0 A0 FF FF FF FF"), + std::vector(), 0x0U, 0x10U, std::vector()}, + // Simple alternation. + {ParseHexString("04 00 C0 A0 FF FF FF FF 0C 00 C0 A0 FF FF FF FF " + "14 00 C0 A0 FF FF FF FF 1C 00 C0 A0 FF FF FF FF"), + {0x0U, 0x8U, 0x10U, 0x18U}, + 0x0U, + 0x20U, + {{0x0U, 0x4U}, {0x8U, 0xCU}, {0x10U, 0x14U}, {0x18U, 0x1CU}}}, + // Same, with locations limited by |lo| and |hi|. By assumption these must + // not cut accross Reference body. + {ParseHexString("04 00 C0 A0 FF FF FF FF 0C 00 C0 A0 FF FF FF FF " + "14 00 C0 A0 FF FF FF FF 1C 00 C0 A0 FF FF FF FF"), + {0x0U, 0x8U, 0x10U, 0x18U}, + 0x04U, + 0x17U, + {{0x8U, 0xCU}, {0x10U, 0x14U}}}, + // Same, with very limiting |lo| and |hi|. + {ParseHexString("04 00 C0 A0 FF FF FF FF 0C 00 C0 A0 FF FF FF FF " + "14 00 C0 A0 FF FF FF FF 1C 00 C0 A0 FF FF FF FF"), + {0x0U, 0x8U, 0x10U, 0x18U}, + 0x0CU, + 0x10U, + std::vector()}, + // Same, |lo| == |hi|. + {ParseHexString("04 00 C0 A0 FF FF FF FF 0C 00 C0 A0 FF FF FF FF " + "14 00 C0 A0 FF FF FF FF 1C 00 C0 A0 FF FF FF FF"), + {0x0U, 0x8U, 0x10U, 0x18U}, + 0x14U, + 0x14U, + std::vector()}, + // Same, |lo| and |hi| at end. + {ParseHexString("04 00 C0 A0 FF FF FF FF 0C 00 C0 A0 FF FF FF FF " + "14 00 C0 A0 FF FF FF FF 1C 00 C0 A0 FF FF FF FF"), + {0x0U, 0x8U, 0x10U, 0x18U}, + 0x20U, + 0x20U, + std::vector()}, + // Mix. Note that targets can overlap. + {ParseHexString("FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF " + "06 00 C0 A0 2C 00 C0 A0 FF FF C0 A0 2B 00 C0 A0 " + "FF 06 00 C0 A0 00 00 C0 A0 FF FF FF FF FF FF FF"), + {0x10U, 0x14U, 0x18U, 0x1CU, 0x21U, 0x25U, 0xAAAAU}, + 0x07U, + 0x25U, + {{0x10U, 0x6U}, {0x14U, 0x2CU}, {0x1CU, 0x2BU}, {0x21, 0x6U}}}, + }; + + for (const auto& test_case : test_cases) { + ConstBufferView image32(test_case.data32.data(), test_case.data32.size()); + Abs32RvaExtractorWin32 extractor(image32, {kBit32, kImageBase}, + test_case.abs32_locations, test_case.lo, + test_case.hi); + + TestAddressTranslator translator(test_case.data32.size(), kRvaBegin); + Abs32ReaderWin32 reader(std::move(extractor), translator); + + // Loop over |expected_ref| to check element-by-element. + base::Optional ref; + for (const auto& expected_ref : test_case.expected_refs) { + ref = reader.GetNext(); + EXPECT_TRUE(ref.has_value()); + EXPECT_EQ(expected_ref, ref.value()); + } + // Check that nothing is left. + ref = reader.GetNext(); + EXPECT_FALSE(ref.has_value()); + } +} + +TEST(Abs32UtilsTest, Win32Read64) { + constexpr uint64_t kImageBase = 0x31415926A0000000U; + constexpr uint32_t kRvaBegin = 0x00C00000U; + // For simplicity, just test mixed case. + std::vector data64 = ParseHexString( + "FF FF FF FF FF FF FF FF 00 00 C0 A0 26 59 41 31 " + "06 00 C0 A0 26 59 41 31 02 00 C0 A0 26 59 41 31 " + "FF FF FF BF 26 59 41 31 FF FF FF FF FF FF FF FF " + "02 00 C0 A0 26 59 41 31 07 00 C0 A0 26 59 41 31"); + std::vector abs32_locations = {0x8U, 0x10U, 0x18U, 0x20U, + 0x28U, 0x30U, 0x38U, 0x40U}; + offset_t lo = 0x10U; + offset_t hi = 0x38U; + std::vector expected_refs = { + {0x10U, 0x06U}, {0x18U, 0x02U}, {0x30U, 0x02U}}; + + ConstBufferView image64(data64.data(), data64.size()); + Abs32RvaExtractorWin32 extractor(image64, {kBit64, kImageBase}, + abs32_locations, lo, hi); + TestAddressTranslator translator(data64.size(), kRvaBegin); + Abs32ReaderWin32 reader(std::move(extractor), translator); + + std::vector refs; + base::Optional ref; + for (ref = reader.GetNext(); ref.has_value(); ref = reader.GetNext()) + refs.push_back(ref.value()); + EXPECT_EQ(expected_refs, refs); +} + +TEST(Abs32UtilsTest, Win32ReadFail) { + // Make |bitness| a state to reduce repetition. + Bitness bitness = kBit32; + + constexpr uint32_t kImageBase = 0xA0000000U; // Shared for 32-bit and 64-bit. + std::vector data(32U, 0xFFU); + ConstBufferView image(data.data(), data.size()); + + auto try_make = [&](std::vector&& abs32_locations, offset_t lo, + offset_t hi) { + Abs32RvaExtractorWin32 extractor(image, {bitness, kImageBase}, + abs32_locations, lo, hi); + extractor.GetNext(); // Dummy call so |extractor| gets used. + }; + + // 32-bit tests. + bitness = kBit32; + try_make({8U, 24U}, 0U, 32U); + EXPECT_DEATH(try_make({4U, 24U}, 32U, 0U), ""); // |lo| > |hi|. + try_make({8U, 24U}, 0U, 12U); + try_make({8U, 24U}, 0U, 28U); + try_make({8U, 24U}, 8U, 32U); + try_make({8U, 24U}, 24U, 32U); + EXPECT_DEATH(try_make({8U, 24U}, 0U, 11U), ""); // |hi| straddles. + EXPECT_DEATH(try_make({8U, 24U}, 26U, 32U), ""); // |lo| straddles. + try_make({8U, 24U}, 12U, 24U); + + // 64-bit tests. + bitness = kBit64; + try_make({6U, 22U}, 0U, 32U); + // |lo| > |hi|. + EXPECT_DEATH(try_make(std::vector(), 32U, 31U), ""); + try_make({6U, 22U}, 0U, 14U); + try_make({6U, 22U}, 0U, 30U); + try_make({6U, 22U}, 6U, 32U); + try_make({6U, 22U}, 22U, 32U); + EXPECT_DEATH(try_make({6U, 22U}, 0U, 29U), ""); // |hi| straddles. + EXPECT_DEATH(try_make({6U, 22U}, 7U, 32U), ""); // |lo| straddles. + try_make({6U, 22U}, 14U, 20U); + try_make({16U}, 16U, 24U); + EXPECT_DEATH(try_make({16U}, 18U, 18U), ""); // |lo|, |hi| straddle. +} + +TEST(Abs32UtilsTest, Win32Write32) { + constexpr uint32_t kImageBase = 0xA0000000U; + constexpr uint32_t kRvaBegin = 0x00C00000U; + std::vector data32(0x30, 0xFFU); + MutableBufferView image32(data32.data(), data32.size()); + AbsoluteAddress addr(kBit32, kImageBase); + TestAddressTranslator translator(data32.size(), kRvaBegin); + Abs32WriterWin32 writer(image32, std::move(addr), translator); + + // Successful writes. + writer.PutNext({0x02U, 0x10U}); + writer.PutNext({0x0BU, 0x21U}); + writer.PutNext({0x16U, 0x10U}); + writer.PutNext({0x2CU, 0x00U}); + + // Invalid data: For simplicity, Abs32WriterWin32 simply ignores bad writes. + // Invalid location. + writer.PutNext({0x2DU, 0x20U}); + writer.PutNext({0x80000000U, 0x20U}); + writer.PutNext({0xFFFFFFFFU, 0x20U}); + // Invalid target. + writer.PutNext({0x1CU, 0x00001111U}); + writer.PutNext({0x10U, 0xFFFFFF00U}); + + std::vector expected_data32 = ParseHexString( + "FF FF 10 00 C0 A0 FF FF FF FF FF 21 00 C0 A0 FF " + "FF FF FF FF FF FF 10 00 C0 A0 FF FF FF FF FF FF " + "FF FF FF FF FF FF FF FF FF FF FF FF 00 00 C0 A0"); + EXPECT_EQ(expected_data32, data32); +} + +TEST(Abs32UtilsTest, Win32Write64) { + constexpr uint64_t kImageBase = 0x31415926A0000000U; + constexpr uint32_t kRvaBegin = 0x00C00000U; + std::vector data64(0x30, 0xFFU); + MutableBufferView image32(data64.data(), data64.size()); + AbsoluteAddress addr(kBit64, kImageBase); + TestAddressTranslator translator(data64.size(), kRvaBegin); + Abs32WriterWin32 writer(image32, std::move(addr), translator); + + // Successful writes. + writer.PutNext({0x02U, 0x10U}); + writer.PutNext({0x0BU, 0x21U}); + writer.PutNext({0x16U, 0x10U}); + writer.PutNext({0x28U, 0x00U}); + + // Invalid data: For simplicity, Abs32WriterWin32 simply ignores bad writes. + // Invalid location. + writer.PutNext({0x29U, 0x20U}); + writer.PutNext({0x80000000U, 0x20U}); + writer.PutNext({0xFFFFFFFFU, 0x20U}); + // Invalid target. + writer.PutNext({0x1CU, 0x00001111U}); + writer.PutNext({0x10U, 0xFFFFFF00U}); + + std::vector expected_data64 = ParseHexString( + "FF FF 10 00 C0 A0 26 59 41 31 FF 21 00 C0 A0 26 " + "59 41 31 FF FF FF 10 00 C0 A0 26 59 41 31 FF FF " + "FF FF FF FF FF FF FF FF 00 00 C0 A0 26 59 41 31"); + EXPECT_EQ(expected_data64, data64); +} + +TEST(Abs32UtilsTest, RemoveOverlappingAbs32Locations) { + // Make |bitness| a state to reduce repetition. + Bitness bitness = kBit32; + + auto run_test = [&bitness](const std::vector& expected_locations, + std::vector&& locations) { + ASSERT_TRUE(std::is_sorted(locations.begin(), locations.end())); + size_t expected_removals = locations.size() - expected_locations.size(); + size_t removals = RemoveOverlappingAbs32Locations(bitness, &locations); + EXPECT_EQ(expected_removals, removals); + EXPECT_EQ(expected_locations, locations); + }; + + // 32-bit tests. + bitness = kBit32; + run_test(std::vector(), std::vector()); + run_test({4U}, {4U}); + run_test({4U, 10U}, {4U, 10U}); + run_test({4U, 8U}, {4U, 8U}); + run_test({4U}, {4U, 7U}); + run_test({4U}, {4U, 4U}); + run_test({4U, 8U}, {4U, 7U, 8U}); + run_test({4U, 10U}, {4U, 7U, 10U}); + run_test({4U, 9U}, {4U, 9U, 10U}); + run_test({3U}, {3U, 5U, 6U}); + run_test({3U, 7U}, {3U, 4U, 5U, 6U, 7U, 8U, 9U, 10U}); + run_test({3U, 7U, 11U}, {3U, 4U, 5U, 6U, 7U, 8U, 9U, 10U, 11U, 12U}); + run_test({4U, 8U, 12U}, {4U, 6U, 8U, 10U, 12U}); + run_test({4U, 8U, 12U, 16U}, {4U, 8U, 12U, 16U}); + run_test({4U, 8U, 12U}, {4U, 8U, 9U, 12U}); + run_test({4U}, {4U, 4U, 4U, 4U, 4U, 4U}); + run_test({3U}, {3U, 4U, 4U, 4U, 5U, 5U}); + run_test({3U, 7U}, {3U, 4U, 4U, 4U, 7U, 7U, 8U}); + run_test({10U, 20U, 30U, 40U}, {10U, 20U, 22U, 22U, 30U, 40U}); + run_test({1000000U, 1000004U}, {1000000U, 1000004U}); + run_test({1000000U}, {1000000U, 1000002U}); + + // 64-bit tests. + bitness = kBit64; + run_test(std::vector(), std::vector()); + run_test({4U}, {4U}); + run_test({4U, 20U}, {4U, 20U}); + run_test({4U, 12U}, {4U, 12U}); + run_test({4U}, {4U, 11U}); + run_test({4U}, {4U, 5U}); + run_test({4U}, {4U, 4U}); + run_test({4U, 12U, 20U}, {4U, 12U, 20U}); + run_test({1U, 9U, 17U}, {1U, 9U, 17U}); + run_test({1U, 17U}, {1U, 8U, 17U}); + run_test({1U, 10U}, {1U, 10U, 17U}); + run_test({3U, 11U}, {3U, 4U, 5U, 6U, 7U, 8U, 9U, 10U, 11U, 12U}); + run_test({4U, 12U}, {4U, 6U, 8U, 10U, 12U}); + run_test({4U, 12U}, {4U, 12U, 16U}); + run_test({4U, 12U, 20U, 28U}, {4U, 12U, 20U, 28U}); + run_test({4U}, {4U, 4U, 4U, 4U, 5U, 5U}); + run_test({3U, 11U}, {3U, 4U, 4U, 4U, 11U, 11U, 12U}); + run_test({10U, 20U, 30U, 40U}, {10U, 20U, 22U, 22U, 30U, 40U}); + run_test({1000000U, 1000008U}, {1000000U, 1000008U}); + run_test({1000000U}, {1000000U, 1000004U}); +} + +} // namespace zucchini diff --git a/address_translator.cc b/address_translator.cc new file mode 100644 index 0000000..79e7ba6 --- /dev/null +++ b/address_translator.cc @@ -0,0 +1,254 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/address_translator.h" + +#include +#include + +namespace zucchini { + +/******** AddressTranslator::OffsetToRvaCache ********/ + +AddressTranslator::OffsetToRvaCache::OffsetToRvaCache( + const AddressTranslator& translator) + : translator_(translator) {} + +rva_t AddressTranslator::OffsetToRvaCache::Convert(offset_t offset) const { + if (offset >= translator_.fake_offset_begin_) { + // Rely on |translator_| to handle this special case. + return translator_.OffsetToRva(offset); + } + if (cached_unit_ && cached_unit_->CoversOffset(offset)) + return cached_unit_->OffsetToRvaUnsafe(offset); + const AddressTranslator::Unit* unit = translator_.OffsetToUnit(offset); + if (!unit) + return kInvalidRva; + cached_unit_ = unit; + return unit->OffsetToRvaUnsafe(offset); +} + +/******** AddressTranslator::RvaToOffsetCache ********/ + +AddressTranslator::RvaToOffsetCache::RvaToOffsetCache( + const AddressTranslator& translator) + : translator_(translator) {} + +bool AddressTranslator::RvaToOffsetCache::IsValid(rva_t rva) const { + if (!cached_unit_ || !cached_unit_->CoversRva(rva)) { + const AddressTranslator::Unit* unit = translator_.RvaToUnit(rva); + if (!unit) + return false; + cached_unit_ = unit; + } + return true; +} + +offset_t AddressTranslator::RvaToOffsetCache::Convert(rva_t rva) const { + if (!cached_unit_ || !cached_unit_->CoversRva(rva)) { + const AddressTranslator::Unit* unit = translator_.RvaToUnit(rva); + if (!unit) + return kInvalidOffset; + cached_unit_ = unit; + } + return cached_unit_->RvaToOffsetUnsafe(rva, translator_.fake_offset_begin_); +} + +/******** AddressTranslator ********/ + +AddressTranslator::AddressTranslator() = default; + +AddressTranslator::~AddressTranslator() = default; + +AddressTranslator::Status AddressTranslator::Initialize( + std::vector&& units) { + for (Unit& unit : units) { + // Check for overflows and fail if found. + if (!RangeIsBounded(unit.offset_begin, unit.offset_size, + kOffsetBound) || + !RangeIsBounded(unit.rva_begin, unit.rva_size, kRvaBound)) { + return kErrorOverflow; + } + // If |rva_size < offset_size|: Just shrink |offset_size| to accommodate. + unit.offset_size = std::min(unit.offset_size, unit.rva_size); + // Now |rva_size >= offset_size|. Note that |rva_size > offset_size| is + // allowed; these lead to dangling RVA. + } + + // Remove all empty units. + units.erase(std::remove_if(units.begin(), units.end(), + [](const Unit& unit) { return unit.IsEmpty(); }), + units.end()); + + // Sort |units| by RVA, then uniquefy. + std::sort(units.begin(), units.end(), [](const Unit& a, const Unit& b) { + return std::tie(a.rva_begin, a.rva_size) < + std::tie(b.rva_begin, b.rva_size); + }); + units.erase(std::unique(units.begin(), units.end()), units.end()); + + // Scan for RVA range overlaps, validate, and merge wherever possible. + if (units.size() > 1) { + // Traverse with two iterators: |slow| stays behind and modifies Units that + // absorb all overlapping (or tangent if suitable) Units; |fast| explores + // new Units as candidates for consistency checks and potential merge into + // |slow|. + auto slow = units.begin(); + + // All |it| with |slow| < |it| < |fast| contain garbage. + for (auto fast = slow + 1; fast != units.end(); ++fast) { + // Comment notation: S = slow offset, F = fast offset, O = overlap offset, + // s = slow RVA, f = fast RVA, o = overlap RVA. + DCHECK_GE(fast->rva_begin, slow->rva_begin); + if (slow->rva_end() < fast->rva_begin) { + // ..ssssss..ffffff..: Disjoint: Can advance |slow|. + *(++slow) = *fast; + continue; + } + + // ..ssssffff..: Tangent: Merge is optional. + // ..sssooofff.. / ..sssooosss..: Overlap: Merge is required. + bool merge_is_optional = slow->rva_end() == fast->rva_begin; + + // Check whether |fast| and |slow| have identical RVA -> offset shift. + // If not, then merge cannot be resolved. Examples: + // ..ssssffff.. -> ..SSSSFFFF..: Good, can merge. + // ..ssssffff.. -> ..SSSS..FFFF..: Non-fatal: don't merge. + // ..ssssffff.. -> ..FFFF..SSSS..: Non-fatal: don't merge. + // ..ssssffff.. -> ..SSOOFF..: Fatal: Ignore for now (handled later). + // ..sssooofff.. -> ..SSSOOOFFF..: Good, can merge. + // ..sssooofff.. -> ..SSSSSOFFFFF..: Fatal. + // ..sssooofff.. -> ..FFOOOOSS..: Fatal. + // ..sssooofff.. -> ..SSSOOOF..: Good, notice |fast| has dangling RVAs. + // ..oooooo.. -> ..OOOOOO..: Good, can merge. + if (fast->offset_begin < slow->offset_begin || + fast->offset_begin - slow->offset_begin != + fast->rva_begin - slow->rva_begin) { + if (merge_is_optional) { + *(++slow) = *fast; + continue; + } + return kErrorBadOverlap; + } + + // Check whether dangling RVAs (if they exist) are consistent. Examples: + // ..sssooofff.. -> ..SSSOOOF..: Good, can merge. + // ..sssooosss.. -> ..SSSOOOS..: Good, can merge. + // ..sssooofff.. -> ..SSSOO..: Good, can merge. + // ..sssooofff.. -> ..SSSOFFF..: Fatal. + // ..sssooosss.. -> ..SSSOOFFFF..: Fatal. + // ..oooooo.. -> ..OOO..: Good, can merge. + // Idea of check: Suppose |fast| has dangling RVA, then + // |[fast->rva_start, fast->rva_start + fast->offset_start)| -> + // |[fast->offset_start, **fast->offset_end()**)|, with remaining RVA + // mapping to fake offsets. This means |fast->offset_end()| must be >= + // |slow->offset_end()|, and failure to do so resluts in error. The + // argument for |slow| havng dangling RVA is symmetric. + if ((fast->HasDanglingRva() && fast->offset_end() < slow->offset_end()) || + (slow->HasDanglingRva() && slow->offset_end() < fast->offset_end())) { + if (merge_is_optional) { + *(++slow) = *fast; + continue; + } + return kErrorBadOverlapDanglingRva; + } + + // Merge |fast| into |slow|. + slow->rva_size = + std::max(slow->rva_size, fast->rva_end() - slow->rva_begin); + slow->offset_size = + std::max(slow->offset_size, fast->offset_end() - slow->offset_begin); + } + ++slow; + units.erase(slow, units.end()); + } + + // After resolving RVA overlaps, any offset overlap would imply error. + std::sort(units.begin(), units.end(), [](const Unit& a, const Unit& b) { + return a.offset_begin < b.offset_begin; + }); + + if (units.size() > 1) { + auto previous = units.begin(); + for (auto current = previous + 1; current != units.end(); ++current) { + if (previous->offset_end() > current->offset_begin) + return kErrorBadOverlap; + previous = current; + } + } + + // For to fake offset heuristics: Compute exclusive upper bounds for offsets + // and RVAs. + offset_t offset_bound = 0; + rva_t rva_bound = 0; + for (const Unit& unit : units) { + offset_bound = std::max(offset_bound, unit.offset_end()); + rva_bound = std::max(rva_bound, unit.rva_end()); + } + + // Compute pessimistic range and see if it still fits within space of valid + // offsets. This limits image size to one half of |kOffsetBound|, and is a + // main drawback for the current heuristic to convert dangling RVA to fake + // offsets. + if (!RangeIsBounded(offset_bound, rva_bound, kOffsetBound)) + return kErrorFakeOffsetBeginTooLarge; + + // Success. Store results. |units| is currently sorted by offset, so assign. + units_sorted_by_offset_.assign(units.begin(), units.end()); + + // Sort |units| by RVA, and just store it directly + std::sort(units.begin(), units.end(), [](const Unit& a, const Unit& b) { + return a.rva_begin < b.rva_begin; + }); + units_sorted_by_rva_ = std::move(units); + + fake_offset_begin_ = offset_bound; + return kSuccess; +} + +rva_t AddressTranslator::OffsetToRva(offset_t offset) const { + if (offset >= fake_offset_begin_) { + // Handle dangling RVA: First shift it to regular RVA space. + rva_t rva = offset - fake_offset_begin_; + // If result is indeed a dangling RVA, return it; else return |kInvalidRva|. + const Unit* unit = RvaToUnit(rva); + return (unit && unit->HasDanglingRva() && unit->CoversDanglingRva(rva)) + ? rva + : kInvalidRva; + } + const Unit* unit = OffsetToUnit(offset); + return unit ? unit->OffsetToRvaUnsafe(offset) : kInvalidRva; +} + +offset_t AddressTranslator::RvaToOffset(rva_t rva) const { + const Unit* unit = RvaToUnit(rva); + // This also handles dangling RVA. + return unit ? unit->RvaToOffsetUnsafe(rva, fake_offset_begin_) + : kInvalidOffset; +} + +const AddressTranslator::Unit* AddressTranslator::OffsetToUnit( + offset_t offset) const { + // Finds first Unit with |offset_begin| > |offset|, rewind by 1 to find the + // last Unit with |offset_begin| >= |offset| (if it exists). + auto it = std::upper_bound( + units_sorted_by_offset_.begin(), units_sorted_by_offset_.end(), offset, + [](offset_t a, const Unit& b) { return a < b.offset_begin; }); + if (it == units_sorted_by_offset_.begin()) + return nullptr; + --it; + return it->CoversOffset(offset) ? &(*it) : nullptr; +} + +const AddressTranslator::Unit* AddressTranslator::RvaToUnit(rva_t rva) const { + auto it = std::upper_bound( + units_sorted_by_rva_.begin(), units_sorted_by_rva_.end(), rva, + [](rva_t a, const Unit& b) { return a < b.rva_begin; }); + if (it == units_sorted_by_rva_.begin()) + return nullptr; + --it; + return it->CoversRva(rva) ? &(*it) : nullptr; +} + +} // namespace zucchini diff --git a/address_translator.h b/address_translator.h new file mode 100644 index 0000000..821b9ad --- /dev/null +++ b/address_translator.h @@ -0,0 +1,198 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_ADDRESS_TRANSLATOR_H_ +#define COMPONENTS_ZUCCHINI_ADDRESS_TRANSLATOR_H_ + +#include + +#include +#include + +#include "base/macros.h" +#include "components/zucchini/algorithm.h" +#include "components/zucchini/image_utils.h" + +namespace zucchini { + +// There are several ways to reason about addresses in an image: +// - Offset: Position relative to start of image. +// - VA (Virtual Address): Virtual memory address of a loaded image. This is +// subject to relocation by the OS. +// - RVA (Relative Virtual Address): VA relative to some base address. This is +// the preferred way to specify pointers in an image. +// +// Zucchini is primarily concerned with offsets and RVAs. Executable images like +// PE and ELF are organized into sections. Each section specifies offset and RVA +// ranges as: +// {Offset start, offset size, RVA start, RVA size}. +// This constitutes a basic unit to translate between offsets and RVAs. Note: +// |offset size| < |RVA size| is possible. For example, the .bss section can can +// have zero-filled statically-allocated data that have no corresponding bytes +// on image (to save space). This poses a problem for Zucchini, which stores +// addresses as offsets: now we'd have "dangling RVAs" that don't map to +// offsets! Some ways to handling this are: +// 1. Ignore all dangling RVAs. This simplifies the algorithm, but also means +// some reference targets would escape detection and processing. +// 2. Create distinct "fake offsets" to accommodate dangling RVAs. Image data +// must not be read on these fake offsets, which are only valid as target +// addresses for reference matching. +// As for |RVA size| < |offset size|, the extra portion just gets ignored. +// +// Status: Zucchini implements (2) in a simple way: dangling RVAs are mapped to +// fake offsets by adding a large value. This value can be chosen as an +// exclusive upper bound of all offsets (i.e., image size). This allows them to +// be easily detected and processed as a special-case. +// TODO(huangs): Investigate option (1), now that the refactored code makes +// experimentation easier. +// TODO(huangs): Make AddressTranslator smarter: Allocate unused |offset_t| +// ranges and create "fake" units to accommodate dangling RVAs. Then +// AddressTranslator can be simplified. + +// Virtual Address relative to some base address (RVA). There's distinction +// between "valid RVA" and "existent RVA": +// - Valid RVA: An RVA that's reasonably small, i.e., below |kRvaBound|. +// - Existent RVA: An RVA that has semantic meaning in an image, and may +// translate to an offset in an image or (if a dangling RVA) a fake offset. +// All existent RVAs are valid RVAs. +using rva_t = uint32_t; +// Divide by 2 to match |kOffsetBound|. +constexpr rva_t kRvaBound = static_cast(-1) / 2; +constexpr rva_t kInvalidRva = static_cast(-1); + +// A utility to translate between offsets and RVAs in an image. +class AddressTranslator { + public: + // A basic unit for address translation, roughly maps to a section, but may + // be processed (e.g., merged) as an optimization. + struct Unit { + offset_t offset_end() const { return offset_begin + offset_size; } + rva_t rva_end() const { return rva_begin + rva_size; } + bool IsEmpty() const { + // |rva_size == 0| and |offset_size > 0| means Unit hasn't been trimmed + // yet, and once it is then it's empty. + // |rva_size > 0| and |offset_size == 0| means Unit has dangling RVA, but + // is not empty. + return rva_size == 0; + } + bool CoversOffset(offset_t offset) const { + return RangeCovers(offset_begin, offset_size, offset); + } + bool CoversRva(rva_t rva) const { + return RangeCovers(rva_begin, rva_size, rva); + } + bool CoversDanglingRva(rva_t rva) const { + return CoversRva(rva) && rva - rva_begin >= offset_size; + } + // Assumes valid |offset| (*cannot* be fake offset). + rva_t OffsetToRvaUnsafe(offset_t offset) const { + return offset - offset_begin + rva_begin; + } + // Assumes valid |rva| (*can* be danging RVA). + offset_t RvaToOffsetUnsafe(rva_t rva, offset_t fake_offset_begin) const { + rva_t delta = rva - rva_begin; + return delta < offset_size ? delta + offset_begin + : fake_offset_begin + rva; + } + bool HasDanglingRva() const { return rva_size > offset_size; } + friend bool operator==(const Unit& a, const Unit& b) { + return std::tie(a.offset_begin, a.offset_size, a.rva_begin, a.rva_size) == + std::tie(b.offset_begin, b.offset_size, b.rva_begin, b.rva_size); + } + + offset_t offset_begin; + offset_t offset_size; + rva_t rva_begin; + rva_t rva_size; + }; + + // An adaptor for AddressTranslator::OffsetToRva() that caches the last Unit + // found, to reduce the number of OffsetToUnit() calls for clustered queries. + class OffsetToRvaCache { + public: + // Embeds |translator| for use. Now object lifetime is tied to |translator| + // lifetime. + explicit OffsetToRvaCache(const AddressTranslator& translator); + + rva_t Convert(offset_t offset) const; + + private: + const AddressTranslator& translator_; + mutable const AddressTranslator::Unit* cached_unit_ = nullptr; + + DISALLOW_COPY_AND_ASSIGN(OffsetToRvaCache); + }; + + // An adaptor for AddressTranslator::RvaToOffset() that caches the last Unit + // found, to reduce the number of RvaToUnit() calls for clustered queries. + class RvaToOffsetCache { + public: + // Embeds |translator| for use. Now object lifetime is tied to |translator| + // lifetime. + explicit RvaToOffsetCache(const AddressTranslator& translator); + + bool IsValid(rva_t rva) const; + offset_t Convert(rva_t rva) const; + + private: + const AddressTranslator& translator_; + mutable const AddressTranslator::Unit* cached_unit_ = nullptr; + + DISALLOW_COPY_AND_ASSIGN(RvaToOffsetCache); + }; + + enum Status { + kSuccess = 0, + kErrorOverflow, + kErrorBadOverlap, + kErrorBadOverlapDanglingRva, + kErrorFakeOffsetBeginTooLarge, + }; + + AddressTranslator(); + ~AddressTranslator(); + + // Consumes |units| to populate data in this class. Performs consistency + // checks and overlapping Units. Returns Status to indicate success. + Status Initialize(std::vector&& units); + + // Returns the (possibly dangling) RVA corresponding to |offset|, or + // kInvalidRva if not found. + rva_t OffsetToRva(offset_t offset) const; + + // Returns the (possibly fake) offset corresponding to |rva|, or + // kInvalidOffset if not found (i.e., |rva| is non-existent). + offset_t RvaToOffset(rva_t rva) const; + + // For testing. + offset_t fake_offset_begin() const { return fake_offset_begin_; } + + const std::vector& units_sorted_by_offset() const { + return units_sorted_by_offset_; + } + + const std::vector& units_sorted_by_rva() const { + return units_sorted_by_rva_; + } + + private: + // Helper to find the Unit that contains given |offset| or |rva|. Returns null + // if not found. + const Unit* OffsetToUnit(offset_t offset) const; + const Unit* RvaToUnit(rva_t rva) const; + + // Storage of Units. All offset ranges are non-empty and disjoint. Likewise + // for all RVA ranges. + std::vector units_sorted_by_offset_; + std::vector units_sorted_by_rva_; + + // Conversion factor to translate between dangling RVAs and fake offsets. + offset_t fake_offset_begin_; + + DISALLOW_COPY_AND_ASSIGN(AddressTranslator); +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_ADDRESS_TRANSLATOR_H_ diff --git a/address_translator_unittest.cc b/address_translator_unittest.cc new file mode 100644 index 0000000..0aeff77 --- /dev/null +++ b/address_translator_unittest.cc @@ -0,0 +1,556 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/address_translator.h" + +#include +#include +#include + +#include "base/format_macros.h" +#include "base/strings/stringprintf.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +namespace { + +// Test case structs. The convention of EXPECT() specifies "expectd" value +// before ""actual". However, AddressTranslator interfaces explicitly state "X +// to Y". So it is clearer in test cases to specify "input" before "expect". +struct OffsetToRvaTestCase { + offset_t input; + rva_t expect; +}; + +struct RvaToOffsetTestCase { + rva_t input; + offset_t expect; +}; + +class TestAddressTranslator : public AddressTranslator { + public: + using AddressTranslator::AddressTranslator; + + // Initialize() alternative that parses a visual representation of offset and + // RVA ranges. Illustrative example ("special" means '.' or '!'): + // "..AAA...|....aaaa" => "..AAA..." for offsets, and "....aaaa" for RVAs: + // - "..AAA...": First non-period character is at 2, so |offset_begin| = 2. + // - "..AAA...": There are 3 non-special characters, so |offset_size| = +3. + // - "....aaaa": First non-period character is at 4, so |rva_begin| = 4. + // - "....aaaa": There are 4 non-special characters, so |rva_size| = +4. + // For the special case of length-0 range, '!' can be used. For example, + // "...!...." specifies |begin| = 3 and |size| = +0. + AddressTranslator::Status InitializeWithStrings( + const std::vector& specs) { + std::vector units; + units.reserve(specs.size()); + for (const std::string& s : specs) { + size_t sep = s.find('|'); + CHECK_NE(sep, std::string::npos); + std::string s1 = s.substr(0, sep); + std::string s2 = s.substr(sep + 1); + + auto first_non_blank = [](const std::string& t) { + auto is_blank = [](char ch) { return ch == '.'; }; + return std::find_if_not(t.begin(), t.end(), is_blank) - t.begin(); + }; + auto count_non_special = [](const std::string& t) { + auto is_special = [](char ch) { return ch == '.' || ch == '!'; }; + return t.size() - std::count_if(t.begin(), t.end(), is_special); + }; + units.push_back({static_cast(first_non_blank(s1)), + static_cast(count_non_special(s1)), + static_cast(first_non_blank(s2)), + static_cast(count_non_special(s2))}); + } + return Initialize(std::move(units)); + } +}; + +// Simple test: Initialize TestAddressTranslator using |specs|, and match +// |expected| results re. success or failure. +void SimpleTest(const std::vector& specs, + AddressTranslator::Status expected, + const std::string& case_name) { + TestAddressTranslator translator; + auto result = translator.InitializeWithStrings(specs); + EXPECT_EQ(expected, result) << case_name; +} + +// Test AddressTranslator::Initialize's Unit overlap and error checks over +// multiple test cases, each case consists of a fixed unit (specified as +// string), and a variable string taken from an list. +class TwoUnitOverlapTester { + public: + struct TestCase { + std::string unit_str; + AddressTranslator::Status expected; + }; + + static void RunTest(const std::string& unit_str1, + const std::vector& test_cases) { + for (size_t i = 0; i < test_cases.size(); ++i) { + const auto& test_case = test_cases[i]; + const std::string& unit_str2 = test_case.unit_str; + const std::string str = + base::StringPrintf("Case #%" PRIuS ": %s", i, unit_str2.c_str()); + SimpleTest({unit_str1, unit_str2}, test_case.expected, str); + // Switch order. Expect same results. + SimpleTest({unit_str2, unit_str1}, test_case.expected, str); + } + } +}; + +} // namespace + +TEST(AddressTranslatorTest, Empty) { + using AT = AddressTranslator; + TestAddressTranslator translator; + EXPECT_EQ(AT::kSuccess, + translator.Initialize(std::vector())); + offset_t fake_offset_begin = translator.fake_offset_begin(); + + // Optimized versions. + AddressTranslator::OffsetToRvaCache offset_to_rva(translator); + AddressTranslator::RvaToOffsetCache rva_to_offset(translator); + + EXPECT_EQ(kInvalidRva, translator.OffsetToRva(0U)); + EXPECT_EQ(kInvalidRva, translator.OffsetToRva(100U)); + EXPECT_EQ(kInvalidRva, offset_to_rva.Convert(0U)); + EXPECT_EQ(kInvalidRva, offset_to_rva.Convert(100U)); + + EXPECT_EQ(kInvalidOffset, translator.RvaToOffset(0U)); + EXPECT_EQ(kInvalidOffset, translator.RvaToOffset(100U)); + EXPECT_EQ(kInvalidOffset, rva_to_offset.Convert(0U)); + EXPECT_EQ(kInvalidOffset, rva_to_offset.Convert(100U)); + + EXPECT_EQ(kInvalidRva, translator.OffsetToRva(fake_offset_begin)); + EXPECT_EQ(kInvalidRva, offset_to_rva.Convert(fake_offset_begin)); +} + +TEST(AddressTranslatorTest, Single) { + using AT = AddressTranslator; + TestAddressTranslator translator; + // Offsets to RVA: [10, 30) -> [100, 120). + EXPECT_EQ(AT::kSuccess, translator.Initialize({{10U, +20U, 100U, +20U}})); + offset_t fake_offset_begin = translator.fake_offset_begin(); + + // Optimized versions. + AddressTranslator::OffsetToRvaCache offset_to_rva(translator); + AddressTranslator::RvaToOffsetCache rva_to_offset(translator); + EXPECT_EQ(30U, fake_offset_begin); // Test implementation detail. + + // Offsets to RVAs. + OffsetToRvaTestCase test_cases1[] = { + {0U, kInvalidRva}, {9U, kInvalidRva}, {10U, 100U}, + {20U, 110U}, {29U, 119U}, {30U, kInvalidRva}, + }; + for (auto& test_case : test_cases1) { + EXPECT_EQ(test_case.expect, translator.OffsetToRva(test_case.input)); + EXPECT_EQ(test_case.expect, offset_to_rva.Convert(test_case.input)); + } + + // RVAs to offsets. + RvaToOffsetTestCase test_cases2[] = { + {0U, kInvalidOffset}, {99U, kInvalidOffset}, {100U, 10U}, + {110U, 20U}, {119U, 29U}, {120U, kInvalidOffset}, + }; + for (auto& test_case : test_cases2) { + EXPECT_EQ(test_case.expect, translator.RvaToOffset(test_case.input)); + EXPECT_EQ(test_case.expect, rva_to_offset.Convert(test_case.input)); + } +} + +TEST(AddressTranslatorTest, SingleDanglingRva) { + using AT = AddressTranslator; + TestAddressTranslator translator; + // Offsets to RVA: [10, 30) -> [100, 120 + 7), so has dangling RVAs. + EXPECT_EQ(AT::kSuccess, + translator.Initialize({{10U, +20U, 100U, +20U + 7U}})); + offset_t fake_offset_begin = translator.fake_offset_begin(); + + EXPECT_EQ(30U, fake_offset_begin); // Test implementation detail. + + // Optimized versions. + AddressTranslator::OffsetToRvaCache offset_to_rva(translator); + AddressTranslator::RvaToOffsetCache rva_to_offset(translator); + + // Offsets to RVAs. + OffsetToRvaTestCase test_cases1[] = { + {0U, kInvalidRva}, + {9U, kInvalidRva}, + {10U, 100U}, + {20U, 110U}, + {29U, 119U}, + {30U, kInvalidRva}, + // Fake offsets to dangling RVAs. + {fake_offset_begin + 100U, kInvalidRva}, + {fake_offset_begin + 119U, kInvalidRva}, + {fake_offset_begin + 120U, 120U}, + {fake_offset_begin + 126U, 126U}, + {fake_offset_begin + 127U, kInvalidRva}, + }; + for (auto& test_case : test_cases1) { + EXPECT_EQ(test_case.expect, translator.OffsetToRva(test_case.input)); + EXPECT_EQ(test_case.expect, offset_to_rva.Convert(test_case.input)); + } + + // RVAs to offsets. + RvaToOffsetTestCase test_cases2[] = { + {0U, kInvalidOffset}, + {99U, kInvalidOffset}, + {100U, 10U}, + {110U, 20U}, + {119U, 29U}, + // Dangling RVAs to fake offsets. + {120U, fake_offset_begin + 120U}, + {126U, fake_offset_begin + 126U}, + {127U, kInvalidOffset}, + }; + for (auto& test_case : test_cases2) { + EXPECT_EQ(test_case.expect, translator.RvaToOffset(test_case.input)); + EXPECT_EQ(test_case.expect, rva_to_offset.Convert(test_case.input)); + } +} + +TEST(AddressTranslatorTest, BasicUsage) { + using AT = AddressTranslator; + TestAddressTranslator translator; + // Offsets covered: [10, 30), [40, 70), [70, 110). + // Map to RVAs: [200, 220 + 5), [300, 330), [100, 140), so has dangling RVAs. + auto result = translator.Initialize({ + {10U, +20U, 200U, +20U + 5U}, // Has dangling RVAs. + {40U, +30U, 300U, +20U}, // Extra offset truncated and ignored. + {50U, +20U, 310U, +20U}, // Overlap with previous: Merged. + {70U, +40U, 100U, +20U}, // Tangent with previous but inconsistent; extra + // offset truncated and ignored. + {90U, +20U, 120U, +20U}, // Tangent with previous and consistent: Merged. + }); + EXPECT_EQ(AT::kSuccess, result); + offset_t fake_offset_begin = translator.fake_offset_begin(); + EXPECT_EQ(110U, fake_offset_begin); // Test implementation detail. + + // Optimized versions. + AddressTranslator::OffsetToRvaCache offset_to_rva(translator); + AddressTranslator::RvaToOffsetCache rva_to_offset(translator); + + // Offsets to RVAs. + OffsetToRvaTestCase test_cases1[] = { + {0U, kInvalidRva}, + {9U, kInvalidRva}, + {10U, 200U}, + {20U, 210U}, + {29U, 219U}, + {30U, kInvalidRva}, + {39U, kInvalidRva}, + {40U, 300U}, + {55U, 315U}, + {69U, 329U}, + {70U, 100U}, + {90U, 120U}, + {109U, 139U}, + {110U, kInvalidRva}, + // Fake offsets to dangling RVAs. + {fake_offset_begin + 220U, 220U}, + {fake_offset_begin + 224U, 224U}, + {fake_offset_begin + 225U, kInvalidRva}, + }; + for (auto& test_case : test_cases1) { + EXPECT_EQ(test_case.expect, translator.OffsetToRva(test_case.input)); + EXPECT_EQ(test_case.expect, offset_to_rva.Convert(test_case.input)); + } + + // RVAs to offsets. + RvaToOffsetTestCase test_cases2[] = { + {0U, kInvalidOffset}, + {99U, kInvalidOffset}, + {100U, 70U}, + {120U, 90U}, + {139U, 109U}, + {140U, kInvalidOffset}, + {199U, kInvalidOffset}, + {200U, 10U}, + {210U, 20U}, + {219U, 29U}, + {225U, kInvalidOffset}, + {299U, kInvalidOffset}, + {300U, 40U}, + {315U, 55U}, + {329U, 69U}, + {330U, kInvalidOffset}, + // Dangling RVAs to fake offsets. + {220U, fake_offset_begin + 220U}, + {224U, fake_offset_begin + 224U}, + {225U, kInvalidOffset}, + }; + for (auto& test_case : test_cases2) { + EXPECT_EQ(test_case.expect, translator.RvaToOffset(test_case.input)); + EXPECT_EQ(test_case.expect, rva_to_offset.Convert(test_case.input)); + } +} + +TEST(AddressTranslatorTest, Overflow) { + using AT = AddressTranslator; + // Test assumes that offset_t and rva_t to be 32-bit. + static_assert(sizeof(offset_t) == 4 && sizeof(rva_t) == 4, + "Needs to update test."); + { + AddressTranslator translator1; + EXPECT_EQ(AT::kErrorOverflow, + translator1.Initialize({{0, +0xC0000000U, 0, +0xC0000000U}})); + } + { + AddressTranslator translator2; + EXPECT_EQ(AT::kErrorOverflow, + translator2.Initialize({{0, +0, 0, +0xC0000000U}})); + } + { + // Units are okay, owing to but limitations of the heuristic to convert + // dangling RVA to fake offset, AddressTranslator::Initialize() fails. + AddressTranslator translator3; + EXPECT_EQ(AT::kErrorFakeOffsetBeginTooLarge, + translator3.Initialize( + {{32, +0, 32, +0x50000000U}, {0x50000000U, +16, 0, +16}})); + } +} + +// Sanity test for TestAddressTranslator::InitializeWithStrings(); +TEST(AddressTranslatorTest, AddUnitAsString) { + using AT = AddressTranslator; + { + TestAddressTranslator translator1; + EXPECT_EQ(AT::kSuccess, translator1.InitializeWithStrings({"..A..|.aaa."})); + AddressTranslator::Unit unit1 = translator1.units_sorted_by_offset()[0]; + EXPECT_EQ(2U, unit1.offset_begin); + EXPECT_EQ(+1U, unit1.offset_size); + EXPECT_EQ(1U, unit1.rva_begin); + EXPECT_EQ(+3U, unit1.rva_size); + } + { + TestAddressTranslator translator2; + EXPECT_EQ(AT::kSuccess, + translator2.InitializeWithStrings({".....!...|.bbbbbb..."})); + AddressTranslator::Unit unit2 = translator2.units_sorted_by_offset()[0]; + EXPECT_EQ(5U, unit2.offset_begin); + EXPECT_EQ(+0U, unit2.offset_size); + EXPECT_EQ(1U, unit2.rva_begin); + EXPECT_EQ(+6U, unit2.rva_size); + } +} + +// AddressTranslator::Initialize() lists Unit merging examples in comments. The +// format is different from that used by InitializeWithStrings(), but adapting +// them is easy, so we may as well do so. +TEST(AddressTranslatorTest, OverlapFromComment) { + using AT = AddressTranslator; + constexpr auto OK = AT::kSuccess; + struct { + const char* rva_str; // RVA comes first in this case. + const char* offset_str; + AT::Status expected; + } test_cases[] = { + {"..ssssffff..", "..SSSSFFFF..", OK}, + {"..ssssffff..", "..SSSS..FFFF..", OK}, + {"..ssssffff..", "..FFFF..SSSS..", OK}, + {"..ssssffff..", "..SSOOFF..", AT::kErrorBadOverlap}, + {"..sssooofff..", "..SSSOOOFFF..", OK}, + {"..sssooofff..", "..SSSSSOFFFFF..", AT::kErrorBadOverlap}, + {"..sssooofff..", "..FFOOOOSS..", AT::kErrorBadOverlap}, + {"..sssooofff..", "..SSSOOOF..", OK}, + {"..sssooofff..", "..SSSOOOF..", OK}, + {"..sssooosss..", "..SSSOOOS..", OK}, + {"..sssooofff..", "..SSSOO..", OK}, + {"..sssooofff..", "..SSSOFFF..", AT::kErrorBadOverlapDanglingRva}, + {"..sssooosss..", "..SSSOOSSSS..", AT::kErrorBadOverlapDanglingRva}, + {"..oooooo..", "..OOO..", OK}, + }; + + auto to_period = [](std::string s, char ch) { // |s| passed by value. + std::replace(s.begin(), s.end(), ch, '.'); + return s; + }; + + size_t idx = 0; + for (const auto& test_case : test_cases) { + std::string base_str = + std::string(test_case.offset_str) + "|" + test_case.rva_str; + std::string unit_str1 = to_period(to_period(base_str, 'S'), 's'); + std::string unit_str2 = to_period(to_period(base_str, 'F'), 'f'); + SimpleTest({unit_str1, unit_str2}, test_case.expected, + base::StringPrintf("Case #%" PRIuS, idx)); + ++idx; + } +} + +TEST(AddressTranslatorTest, Overlap) { + using AT = AddressTranslator; + constexpr auto OK = AT::kSuccess; + constexpr const char* unit_str1 = "....AAA.......|.....aaa......"; + + std::vector test_cases = { + //....AAA.......|.....aaa...... The first Unit. NOLINT + {"....BBB.......|.....bbb......", OK}, + {"..BBB.........|...bbb........", OK}, + {"......BBB.....|.......bbb....", OK}, + {"..BBBBBBBBB...|...bbb........", OK}, // Extra offset get truncated. + {"......BBBBBBBB|.......bbb....", OK}, + {"....BBB.......|.......bbb....", AT::kErrorBadOverlap}, + {"..BBB.........|.......bbb....", AT::kErrorBadOverlap}, + {".......BBB....|.......bbb....", AT::kErrorBadOverlap}, + //....AAA.......|.....aaa...... The first Unit. NOLINT + {"....BBB.......|..........bbb.", AT::kErrorBadOverlap}, + {"..........BBB.|.......bbb....", AT::kErrorBadOverlap}, + {"......BBB.....|.....bbb......", AT::kErrorBadOverlap}, + {"......BBB.....|..bbb.........", AT::kErrorBadOverlap}, + {"......BBB.....|bbb...........", AT::kErrorBadOverlap}, + {"BBB...........|bbb...........", OK}, // Disjoint. + {"........BBB...|.........bbb..", OK}, // Disjoint. + {"BBB...........|..........bbb.", OK}, // Disjoint, offset elsewhere. + //....AAA.......|.....aaa...... The first Unit. NOLINT + {".BBB..........|..bbb.........", OK}, // Tangent. + {".......BBB....|........bbb...", OK}, // Tangent. + {".BBB..........|........bbb...", OK}, // Tangent, offset elsewhere. + {"BBBBBB........|bbb...........", OK}, // Repeat, with extra offsets. + {"........BBBB..|.........bbb..", OK}, + {"BBBBBB........|..........bbb.", OK}, + {".BBBBBB.......|..bbb.........", OK}, + {".......BBBBB..|........bbb...", OK}, + //....AAA.......|.....aaa...... The first Unit. NOLINT + {".BBB..........|........bbb...", OK}, // Tangent, offset elsewhere. + {"..BBB.........|........bbb...", AT::kErrorBadOverlap}, + {"...BB.........|....bb........", OK}, + {"....BB........|.....bb.......", OK}, + {".......BB.....|........bb....", OK}, + {"...BBBBBB.....|....bbbbbb....", OK}, + {"..BBBBBB......|...bbbbbb.....", OK}, + {"......BBBBBB..|.......bbbbbb.", OK}, + //....AAA.......|.....aaa...... The first Unit. NOLINT + {"BBBBBBBBBBBBBB|bbbbbbbbbbbbbb", AT::kErrorBadOverlap}, + {"B.............|b.............", OK}, + {"B.............|.............b", OK}, + {"....B.........|.....b........", OK}, + {"....B.........|......b.......", AT::kErrorBadOverlap}, + {"....B.........|......b.......", AT::kErrorBadOverlap}, + {"....BBB.......|.....bb.......", OK}, + {"....BBBB......|.....bbb......", OK}, + //....AAA.......|.....aaa...... The first Unit. NOLINT + {".........BBBBB|.b............", OK}, + {"....AAA.......|.....!........", OK}, + {"....!.........|.....!........", OK}, // Empty units gets deleted early. + {"....!.........|..........!...", OK}, // Forgiving! + }; + + TwoUnitOverlapTester::RunTest(unit_str1, test_cases); +} + +TEST(AddressTranslatorTest, OverlapOffsetMultiple) { + using AT = AddressTranslator; + // Simple case. Note that RVA ranges don't get merged. + SimpleTest({"A..|a....", // + ".A.|..a..", // + "..A|....a"}, + AT::kSuccess, "Case #0"); + + // Offset range 1 overlaps 2 and 3, but truncation takes place to trim down + // offset ranges, so still successful. + SimpleTest({"..A|a....", // + ".AA|..a..", // + "AAA|....a"}, + AT::kSuccess, "Case #1"); + + // Offset range 2 and 3 overlap, so fail. + SimpleTest({"A..|a....", // + ".A.|..a..", // + ".A.|....a"}, + AT::kErrorBadOverlap, "Case #2"); +} + +TEST(AddressTranslatorTest, OverlapDangling) { + using AT = AddressTranslator; + constexpr auto OK = AT::kSuccess; + // First Unit has dangling offsets at + constexpr const char* unit_str1 = "....AAA.......|.....aaaaaa..."; + + std::vector test_cases = { + //....AAA.......|.....aaaaaa... The first Unit. NOLINT + {"....BBB.......|.....bbbbbb...", OK}, + {"....BBB.......|.....bbbbb....", OK}, + {"....BBB.......|.....bbbb.....", OK}, + {"....BBB.......|.....bbb......", OK}, + {".....BBB......|......bbb.....", AT::kErrorBadOverlapDanglingRva}, + {".....BB.......|......bbb.....", OK}, + {"....BBB.......|.....bbbbbbbb.", OK}, + {"..BBBBB.......|...bbbbbbbb...", OK}, + //....AAA.......|.....aaaaaa... The first Unit. NOLINT + {"......!.......|.bbb..........", AT::kErrorBadOverlap}, + {"..BBBBB.......|...bbbbb......", OK}, + {".......BBB....|.bbb..........", OK}, // Just tangent: Can go elsewhere. + {".......BBB....|.bbbb.........", OK}, // Can be another dangling RVA. + {".......!......|.bbbb.........", OK}, // Same with empty. + {"......!.......|.......!......", OK}, // Okay, but gets deleted. + {"......!.......|.......b......", AT::kErrorBadOverlapDanglingRva}, + {"......B.......|.......b......", OK}, + //....AAA.......|.....aaaaaa... The first Unit. NOLINT + {"......BBBB....|.......bbbb...", AT::kErrorBadOverlapDanglingRva}, + {"......BB......|.......bb.....", AT::kErrorBadOverlapDanglingRva}, + {"......BB......|bb............", AT::kErrorBadOverlap}, + }; + + TwoUnitOverlapTester::RunTest(unit_str1, test_cases); +} + +// Tests implementation since algorithm is tricky. +TEST(AddressTranslatorTest, Merge) { + using AT = AddressTranslator; + // Merge a bunch of overlapping Units into one big Unit. + std::vector test_case1 = { + "AAA.......|.aaa......", // Comment to prevent wrap by formatter. + "AA........|.aa.......", // + "..AAA.....|...aaa....", // + "....A.....|.....a....", // + ".....AAA..|......aaa.", // + "........A.|.........a", // + }; + // Try all 6! permutations. + std::sort(test_case1.begin(), test_case1.end()); + do { + TestAddressTranslator translator1; + EXPECT_EQ(AT::kSuccess, translator1.InitializeWithStrings(test_case1)); + EXPECT_EQ(9U, translator1.fake_offset_begin()); + + AT::Unit expected{0U, +9U, 1U, +9U}; + EXPECT_EQ(1U, translator1.units_sorted_by_offset().size()); + EXPECT_EQ(expected, translator1.units_sorted_by_offset()[0]); + EXPECT_EQ(1U, translator1.units_sorted_by_rva().size()); + EXPECT_EQ(expected, translator1.units_sorted_by_rva()[0]); + } while (std::next_permutation(test_case1.begin(), test_case1.end())); + + // Merge RVA-adjacent Units into two Units. + std::vector test_case2 = { + ".....A..|.a......", // First Unit. + "......A.|..a.....", // + "A.......|...a....", // Second Unit: RVA-adjacent to first Unit, but + ".A......|....a...", // offset would become inconsistent, so a new + "..A.....|.....a..", // Unit gets created. + }; + // Try all 5! permutations. + std::sort(test_case2.begin(), test_case2.end()); + do { + TestAddressTranslator translator2; + EXPECT_EQ(AT::kSuccess, translator2.InitializeWithStrings(test_case2)); + EXPECT_EQ(7U, translator2.fake_offset_begin()); + + AT::Unit expected1{0U, +3U, 3U, +3U}; + AT::Unit expected2{5U, +2U, 1U, +2U}; + EXPECT_EQ(2U, translator2.units_sorted_by_offset().size()); + EXPECT_EQ(expected1, translator2.units_sorted_by_offset()[0]); + EXPECT_EQ(expected2, translator2.units_sorted_by_offset()[1]); + EXPECT_EQ(2U, translator2.units_sorted_by_rva().size()); + EXPECT_EQ(expected2, translator2.units_sorted_by_rva()[0]); + EXPECT_EQ(expected1, translator2.units_sorted_by_rva()[1]); + } while (std::next_permutation(test_case2.begin(), test_case2.end())); +} + +} // namespace zucchini diff --git a/algorithm.h b/algorithm.h new file mode 100644 index 0000000..7143a95 --- /dev/null +++ b/algorithm.h @@ -0,0 +1,84 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_ALGORITHM_H_ +#define COMPONENTS_ZUCCHINI_ALGORITHM_H_ + +#include + +#include +#include +#include + +#include "base/logging.h" + +// Collection of simple utilities used in for low-level computation. + +namespace zucchini { + +// Safely determines whether |[begin, begin + size)| is in |[0, bound)|. Note: +// The special case |[bound, bound)| is not considered to be in |[0, bound)|. +template +bool RangeIsBounded(T begin, T size, size_t bound) { + static_assert(std::is_unsigned::value, "Value type must be unsigned."); + return begin < bound && size <= bound - begin; +} + +// Safely determines whether |value| lies in |[begin, begin + size)|. Works +// properly even if |begin + size| overflows -- although such ranges are +// considered pathological, and should fail validation elsewhere. +template +bool RangeCovers(T begin, T size, T value) { + static_assert(std::is_unsigned::value, "Value type must be unsigned."); + return begin <= value && value - begin < size; +} + +// Returns the integer in inclusive range |[lo, hi]| that's closest to |value|. +// This departs from the usual usage of semi-inclusive ranges, but is useful +// because (1) sentinels can use this, (2) a valid output always exists. It is +// assumed that |lo <= hi|. +template +T InclusiveClamp(T value, T lo, T hi) { + static_assert(std::is_unsigned::value, "Value type must be unsigned."); + DCHECK_LE(lo, hi); + return value <= lo ? lo : (value >= hi ? hi : value); +} + +// Returns the minimum multiple of |m| that's no less than |x|. Assumes |m > 0| +// and |x| is sufficiently small so that no overflow occurs. +template +constexpr T ceil(T x, T m) { + static_assert(std::is_unsigned::value, "Value type must be unsigned."); + return T((x + m - 1) / m) * m; +} + +// Sorts values in |container| and removes duplicates. +template +void SortAndUniquify(std::vector* container) { + std::sort(container->begin(), container->end()); + container->erase(std::unique(container->begin(), container->end()), + container->end()); + container->shrink_to_fit(); +} + +// Copies bits at |pos| in |v| to all higher bits, and returns the result as the +// same int type as |v|. +template +constexpr T SignExtend(int pos, T v) { + int kNumBits = sizeof(T) * 8; + int kShift = kNumBits - 1 - pos; + return static_cast::type>(v << kShift) >> kShift; +} + +// Optimized version where |pos| becomes a template parameter. +template +constexpr T SignExtend(T v) { + constexpr int kNumBits = sizeof(T) * 8; + constexpr int kShift = kNumBits - 1 - pos; + return static_cast::type>(v << kShift) >> kShift; +} + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_ALGORITHM_H_ diff --git a/algorithm_unittest.cc b/algorithm_unittest.cc new file mode 100644 index 0000000..2c685db --- /dev/null +++ b/algorithm_unittest.cc @@ -0,0 +1,206 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/algorithm.h" + +#include +#include + +#include "base/logging.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +namespace { + +// Casting functions to specify signed 8-bit and 16-bit integer constants. +// For example, signed8(0xFF) == int8_t(-1). +inline int8_t signed8(uint8_t v) { + return *reinterpret_cast(&v); +} + +inline int32_t signed16(uint16_t v) { + return *reinterpret_cast(&v); +} + +} // namespace + +TEST(AlgorithmTest, RangeIsBounded) { + // Basic tests. + EXPECT_TRUE(RangeIsBounded(0U, +0U, 10U)); + EXPECT_TRUE(RangeIsBounded(0U, +10U, 10U)); + EXPECT_TRUE(RangeIsBounded(1U, +9U, 10U)); + EXPECT_FALSE(RangeIsBounded(1U, +10U, 10U)); + EXPECT_TRUE(RangeIsBounded(8U, +1U, 10U)); + EXPECT_TRUE(RangeIsBounded(8U, +2U, 10U)); + EXPECT_TRUE(RangeIsBounded(9U, +0U, 10U)); + EXPECT_FALSE(RangeIsBounded(10U, +0U, 10U)); // ! + EXPECT_FALSE(RangeIsBounded(100U, +0U, 10U)); + EXPECT_FALSE(RangeIsBounded(100U, +1U, 10U)); + + // Test at boundary of overflow. + EXPECT_TRUE(RangeIsBounded(42U, +137U, 255U)); + EXPECT_TRUE(RangeIsBounded(0U, +255U, 255U)); + EXPECT_TRUE(RangeIsBounded(1U, +254U, 255U)); + EXPECT_FALSE(RangeIsBounded(1U, +255U, 255U)); + EXPECT_TRUE(RangeIsBounded(254U, +0U, 255U)); + EXPECT_TRUE(RangeIsBounded(254U, +1U, 255U)); + EXPECT_FALSE(RangeIsBounded(255U, +0U, 255U)); + EXPECT_FALSE(RangeIsBounded(255U, +3U, 255U)); + + // Test with uint32_t. + EXPECT_TRUE(RangeIsBounded(0U, +0x1000U, 0x2000U)); + EXPECT_TRUE(RangeIsBounded(0x0FFFU, +0x1000U, 0x2000U)); + EXPECT_TRUE(RangeIsBounded(0x1000U, +0x1000U, 0x2000U)); + EXPECT_FALSE(RangeIsBounded(0x1000U, +0x1001U, 0x2000U)); + EXPECT_TRUE(RangeIsBounded(0x1FFFU, +1U, 0x2000U)); + EXPECT_FALSE(RangeIsBounded(0x2000U, +0U, 0x2000U)); // ! + EXPECT_FALSE(RangeIsBounded(0x3000U, +0U, 0x2000U)); + EXPECT_FALSE(RangeIsBounded(0x3000U, +1U, 0x2000U)); + EXPECT_TRUE(RangeIsBounded(0U, +0xFFFFFFFEU, 0xFFFFFFFFU)); + EXPECT_TRUE(RangeIsBounded(0U, +0xFFFFFFFFU, 0xFFFFFFFFU)); + EXPECT_TRUE(RangeIsBounded(1U, +0xFFFFFFFEU, 0xFFFFFFFFU)); + EXPECT_FALSE(RangeIsBounded(1U, +0xFFFFFFFFU, 0xFFFFFFFFU)); + EXPECT_TRUE(RangeIsBounded(0x80000000U, +0x7FFFFFFFU, 0xFFFFFFFFU)); + EXPECT_FALSE( + RangeIsBounded(0x80000000U, +0x80000000U, 0xFFFFFFFFU)); + EXPECT_TRUE(RangeIsBounded(0xFFFFFFFEU, +1U, 0xFFFFFFFFU)); + EXPECT_FALSE(RangeIsBounded(0xFFFFFFFFU, +0U, 0xFFFFFFFFU)); // ! + EXPECT_FALSE( + RangeIsBounded(0xFFFFFFFFU, +0xFFFFFFFFU, 0xFFFFFFFFU)); +} + +TEST(AlgorithmTest, RangeCovers) { + // Basic tests. + EXPECT_TRUE(RangeCovers(0U, +10U, 0U)); + EXPECT_TRUE(RangeCovers(0U, +10U, 5U)); + EXPECT_TRUE(RangeCovers(0U, +10U, 9U)); + EXPECT_FALSE(RangeCovers(0U, +10U, 10U)); + EXPECT_FALSE(RangeCovers(0U, +10U, 100U)); + EXPECT_FALSE(RangeCovers(0U, +10U, 255U)); + + EXPECT_FALSE(RangeCovers(42U, +137U, 0U)); + EXPECT_FALSE(RangeCovers(42U, +137U, 41U)); + EXPECT_TRUE(RangeCovers(42U, +137U, 42U)); + EXPECT_TRUE(RangeCovers(42U, +137U, 100U)); + EXPECT_TRUE(RangeCovers(42U, +137U, 178U)); + EXPECT_FALSE(RangeCovers(42U, +137U, 179U)); + EXPECT_FALSE(RangeCovers(42U, +137U, 255U)); + + // 0-size ranges. + EXPECT_FALSE(RangeCovers(42U, +0U, 41U)); + EXPECT_FALSE(RangeCovers(42U, +0U, 42U)); + EXPECT_FALSE(RangeCovers(42U, +0U, 43U)); + + // Test at boundary of overflow. + EXPECT_TRUE(RangeCovers(254U, +1U, 254U)); + EXPECT_FALSE(RangeCovers(254U, +1U, 255U)); + EXPECT_FALSE(RangeCovers(255U, +0U, 255U)); + EXPECT_TRUE(RangeCovers(255U, +1U, 255U)); + EXPECT_FALSE(RangeCovers(255U, +5U, 0U)); + + // Test with unit32_t. + EXPECT_FALSE(RangeCovers(1234567U, +7654321U, 0U)); + EXPECT_FALSE(RangeCovers(1234567U, +7654321U, 1234566U)); + EXPECT_TRUE(RangeCovers(1234567U, +7654321U, 1234567U)); + EXPECT_TRUE(RangeCovers(1234567U, +7654321U, 4444444U)); + EXPECT_TRUE(RangeCovers(1234567U, +7654321U, 8888887U)); + EXPECT_FALSE(RangeCovers(1234567U, +7654321U, 8888888U)); + EXPECT_FALSE(RangeCovers(1234567U, +7654321U, 0x80000000U)); + EXPECT_FALSE(RangeCovers(1234567U, +7654321U, 0xFFFFFFFFU)); + EXPECT_FALSE(RangeCovers(0xFFFFFFFFU, +0, 0xFFFFFFFFU)); + EXPECT_TRUE(RangeCovers(0xFFFFFFFFU, +1, 0xFFFFFFFFU)); + EXPECT_FALSE(RangeCovers(0xFFFFFFFFU, +2, 0)); +} + +TEST(AlgorithmTest, InclusiveClamp) { + EXPECT_EQ(1U, InclusiveClamp(0U, 1U, 9U)); + EXPECT_EQ(1U, InclusiveClamp(1U, 1U, 9U)); + EXPECT_EQ(5U, InclusiveClamp(5U, 1U, 9U)); + EXPECT_EQ(8U, InclusiveClamp(8U, 1U, 9U)); + EXPECT_EQ(9U, InclusiveClamp(9U, 1U, 9U)); + EXPECT_EQ(9U, InclusiveClamp(10U, 1U, 9U)); + EXPECT_EQ(9U, InclusiveClamp(0xFFFFFFFFU, 1U, 9U)); + EXPECT_EQ(42U, InclusiveClamp(0U, 42U, 42U)); + EXPECT_EQ(42U, InclusiveClamp(41U, 42U, 42U)); + EXPECT_EQ(42U, InclusiveClamp(42U, 42U, 42U)); + EXPECT_EQ(42U, InclusiveClamp(43U, 42U, 42U)); + EXPECT_EQ(0U, InclusiveClamp(0U, 0U, 0U)); + EXPECT_EQ(0xFFFFFFFF, + InclusiveClamp(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)); +} + +TEST(AlgorithmTest, Ceil) { + EXPECT_EQ(0U, ceil(0U, 2U)); + EXPECT_EQ(2U, ceil(1U, 2U)); + EXPECT_EQ(2U, ceil(2U, 2U)); + EXPECT_EQ(4U, ceil(3U, 2U)); + EXPECT_EQ(4U, ceil(4U, 2U)); + EXPECT_EQ(11U, ceil(10U, 11U)); + EXPECT_EQ(11U, ceil(11U, 11U)); + EXPECT_EQ(22U, ceil(12U, 11U)); + EXPECT_EQ(22U, ceil(21U, 11U)); + EXPECT_EQ(22U, ceil(22U, 11U)); + EXPECT_EQ(33U, ceil(23U, 11U)); +} + +TEST(AlgorithmTest, SignExtend) { + // 0x6A = 0b0110'1010. + EXPECT_EQ(uint8_t(0x00), (SignExtend(0, 0x6A))); + EXPECT_EQ(signed8(0xFE), (SignExtend(1, signed8(0x6A)))); + EXPECT_EQ(uint8_t(0x02), (SignExtend(2, 0x6A))); + EXPECT_EQ(signed8(0xFA), (SignExtend(3, signed8(0x6A)))); + EXPECT_EQ(uint8_t(0x0A), (SignExtend(4, 0x6A))); + EXPECT_EQ(signed8(0xEA), (SignExtend(5, signed8(0x6A)))); + EXPECT_EQ(uint8_t(0xEA), (SignExtend(6, 0x6A))); + EXPECT_EQ(signed8(0x6A), (SignExtend(7, signed8(0x6A)))); + + EXPECT_EQ(signed16(0xFFFA), (SignExtend(3, 0x6A))); + EXPECT_EQ(uint16_t(0x000A), (SignExtend(4, 0x6A))); + + EXPECT_EQ(int32_t(0xFFFF8000), (SignExtend(15, 0x00008000))); + EXPECT_EQ(uint32_t(0x00008000U), (SignExtend(16, 0x00008000))); + EXPECT_EQ(int32_t(0xFFFFFC00), (SignExtend(10, 0x00000400))); + EXPECT_EQ(uint32_t(0xFFFFFFFFU), (SignExtend(31, 0xFFFFFFFF))); + + EXPECT_EQ(int64_t(0xFFFFFFFFFFFFFE6ALL), + (SignExtend(9, 0x000000000000026ALL))); + EXPECT_EQ(int64_t(0x000000000000016ALL), + (SignExtend(9, 0xFFFFFFFFFFFFFD6ALL))); + EXPECT_EQ(uint64_t(0xFFFFFFFFFFFFFE6AULL), + (SignExtend(9, 0x000000000000026AULL))); + EXPECT_EQ(uint64_t(0x000000000000016AULL), + (SignExtend(9, 0xFFFFFFFFFFFFFD6AULL))); +} + +TEST(AlgorithmTest, SignExtendTemplated) { + // 0x6A = 0b0110'1010. + EXPECT_EQ(uint8_t(0x00), (SignExtend<0, uint8_t>(0x6A))); + EXPECT_EQ(signed8(0xFE), (SignExtend<1, int8_t>(signed8(0x6A)))); + EXPECT_EQ(uint8_t(0x02), (SignExtend<2, uint8_t>(0x6A))); + EXPECT_EQ(signed8(0xFA), (SignExtend<3, int8_t>(signed8(0x6A)))); + EXPECT_EQ(uint8_t(0x0A), (SignExtend<4, uint8_t>(0x6A))); + EXPECT_EQ(signed8(0xEA), (SignExtend<5, int8_t>(signed8(0x6A)))); + EXPECT_EQ(uint8_t(0xEA), (SignExtend<6, uint8_t>(0x6A))); + EXPECT_EQ(signed8(0x6A), (SignExtend<7, int8_t>(signed8(0x6A)))); + + EXPECT_EQ(signed16(0xFFFA), (SignExtend<3, int16_t>(0x6A))); + EXPECT_EQ(uint16_t(0x000A), (SignExtend<4, uint16_t>(0x6A))); + + EXPECT_EQ(int32_t(0xFFFF8000), (SignExtend<15, int32_t>(0x00008000))); + EXPECT_EQ(uint32_t(0x00008000U), (SignExtend<16, uint32_t>(0x00008000))); + EXPECT_EQ(int32_t(0xFFFFFC00), (SignExtend<10, int32_t>(0x00000400))); + EXPECT_EQ(uint32_t(0xFFFFFFFFU), (SignExtend<31, uint32_t>(0xFFFFFFFF))); + + EXPECT_EQ(int64_t(0xFFFFFFFFFFFFFE6ALL), + (SignExtend<9, int64_t>(0x000000000000026ALL))); + EXPECT_EQ(int64_t(0x000000000000016ALL), + (SignExtend<9, int64_t>(0xFFFFFFFFFFFFFD6ALL))); + EXPECT_EQ(uint64_t(0xFFFFFFFFFFFFFE6AULL), + (SignExtend<9, uint64_t>(0x000000000000026AULL))); + EXPECT_EQ(uint64_t(0x000000000000016AULL), + (SignExtend<9, uint64_t>(0xFFFFFFFFFFFFFD6AULL))); +} + +} // namespace zucchini diff --git a/binary_data_histogram.cc b/binary_data_histogram.cc new file mode 100644 index 0000000..785e8ea --- /dev/null +++ b/binary_data_histogram.cc @@ -0,0 +1,91 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/binary_data_histogram.h" + +#include +#include +#include + +#include "base/format_macros.h" +#include "base/logging.h" +#include "base/strings/stringprintf.h" + +namespace zucchini { + +/******** OutlierDetector ********/ + +OutlierDetector::OutlierDetector() = default; + +OutlierDetector::~OutlierDetector() = default; + +// For BinaryDataHistogram, |sample| is typically in interval [0, 1]. +void OutlierDetector::Add(double sample) { + ++n_; + sum_ += sample; + sum_of_squares_ += sample * sample; +} + +void OutlierDetector::Prepare() { + if (n_ > 0) { + mean_ = sum_ / n_; + standard_deviation_ = ::sqrt((sum_of_squares_ - sum_ * mean_) / + std::max(static_cast(1), n_ - 1)); + } +} + +std::string OutlierDetector::RenderStats() { + return base::StringPrintf("Mean = %.5f, StdDev = %.5f over %" PRIuS + " samples", + mean_, standard_deviation_, n_); +} + +// Constants are chosen for BinaryDataHistogram, where |sample| is typically in +// [0, 1]. +int OutlierDetector::DecideOutlier(double sample) { + // Lower bound to avoid divide-by-zero and penalizing tight clusters. + constexpr double kMinTolerance = 0.1; + // Number of standard deviations away from mean for value to become outlier. + constexpr double kSigmaBound = 1.9; + if (n_ <= 1) + return 0; + double tolerance = std::max(kMinTolerance, standard_deviation_); + double num_sigma = (sample - mean_) / tolerance; + return num_sigma > kSigmaBound ? 1 : num_sigma < -kSigmaBound ? -1 : 0; +} + +/******** BinaryDataHistogram ********/ + +BinaryDataHistogram::BinaryDataHistogram() = default; + +BinaryDataHistogram::~BinaryDataHistogram() = default; + +bool BinaryDataHistogram::Compute(ConstBufferView region) { + DCHECK(!histogram_); + // Binary data with size < 2 are invalid. + if (region.size() < sizeof(uint16_t)) + return false; + DCHECK_LE(region.size(), + static_cast(std::numeric_limits::max())); + + histogram_ = std::make_unique(kNumBins); + size_ = region.size(); + // Number of 2-byte intervals fully contained in |region|. + size_t bound = size_ - sizeof(uint16_t) + 1; + for (size_t i = 0; i < bound; ++i) + ++histogram_[region.read(i)]; + return true; +} + +double BinaryDataHistogram::Distance(const BinaryDataHistogram& other) const { + DCHECK(IsValid() && other.IsValid()); + // Compute Manhattan (L1) distance between respective histograms. + double total_diff = 0; + for (int i = 0; i < kNumBins; ++i) + total_diff += std::abs(histogram_[i] - other.histogram_[i]); + // Normalize by total size, so result lies in [0, 1]. + return total_diff / (size_ + other.size_); +} + +} // namespace zucchini diff --git a/binary_data_histogram.h b/binary_data_histogram.h new file mode 100644 index 0000000..3950ab7 --- /dev/null +++ b/binary_data_histogram.h @@ -0,0 +1,91 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_BINARY_DATA_HISTOGRAM_H_ +#define COMPONENTS_ZUCCHINI_BINARY_DATA_HISTOGRAM_H_ + +#include +#include + +#include +#include + +#include "base/macros.h" +#include "components/zucchini/buffer_view.h" + +namespace zucchini { + +// A class to detect outliers in a list of doubles using Chauvenet's criterion: +// Compute mean and standard deviation of observations, then determine whether +// a query value lies beyond a fixed number of standard deviations (sigmas) from +// the mean. The purpose of this test is to reduce the chance of false-positive +// ensemble matches. +class OutlierDetector { + public: + OutlierDetector(); + ~OutlierDetector(); + + // Incorporates |sample| into mean and standard deviation. + void Add(double sample); + + // Prepares basic statistics for DecideOutlier() calls. Should be called after + // all samples have been added. + void Prepare(); + + // Renders current statistics as strings for logging. + std::string RenderStats(); + + // Heuristically decides whether |sample| is an outlier. Returns 1 if |sample| + // is "too high", 0 if |sample| is "normal", and -1 if |sample| is "too low". + // Must be called after Prepare(). + int DecideOutlier(double sample); + + private: + size_t n_ = 0; + double sum_ = 0; + double sum_of_squares_ = 0; + double mean_ = 0; + double standard_deviation_ = 0; + + DISALLOW_COPY_AND_ASSIGN(OutlierDetector); +}; + +// A class to compute similarity score between binary data. The heuristic here +// preprocesses input data to a size-65536 histogram, counting the frequency of +// consecutive 2-byte sequences. Therefore data with lengths < 2 are considered +// invalid -- but this is okay for Zucchini's use case. +class BinaryDataHistogram { + public: + BinaryDataHistogram(); + ~BinaryDataHistogram(); + + // Attempts to compute the histogram, returns true iff successful. + bool Compute(ConstBufferView region); + + bool IsValid() const { return static_cast(histogram_); } + + // Returns distance to another histogram (heuristics). If two binaries are + // identical then their histogram distance is 0. However, the converse is not + // true in general. For example, "aba" and "bab" are different, but their + // histogram distance is 0 (both histograms are {"ab": 1, "ba": 1}). + double Distance(const BinaryDataHistogram& other) const; + + private: + enum { kNumBins = 1 << (sizeof(uint16_t) * 8) }; + static_assert(kNumBins == 65536, "Incorrect constant computation."); + + // Size, in bytes, of the data over which the histogram was computed. + size_t size_ = 0; + + // 2^16 buckets holding counts of all 2-byte sequences in the data. The counts + // are stored as signed values to simplify computing the distance between two + // histograms. + std::unique_ptr histogram_; + + DISALLOW_COPY_AND_ASSIGN(BinaryDataHistogram); +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_BINARY_DATA_HISTOGRAM_H_ diff --git a/binary_data_histogram_unittest.cc b/binary_data_histogram_unittest.cc new file mode 100644 index 0000000..ca71010 --- /dev/null +++ b/binary_data_histogram_unittest.cc @@ -0,0 +1,132 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/binary_data_histogram.h" + +#include + +#include +#include + +#include "components/zucchini/buffer_view.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +TEST(OutlierDetectorTest, Basic) { + auto make_detector = [](const std::vector& values) { + auto detector = std::make_unique(); + for (double v : values) + detector->Add(v); + detector->Prepare(); + return detector; + }; + + std::unique_ptr detector; + // No data: Should at least not cause error. + detector = make_detector({}); + EXPECT_EQ(0, detector->DecideOutlier(0.0)); + // Single point: Trivially inert. + detector = make_detector({0.5}); + EXPECT_EQ(0, detector->DecideOutlier(0.1)); + EXPECT_EQ(0, detector->DecideOutlier(0.5)); + EXPECT_EQ(0, detector->DecideOutlier(0.9)); + // Two identical points: StdDev is 0, so falls back to built-in tolerance. + detector = make_detector({0.5, 0.5}); + EXPECT_EQ(-1, detector->DecideOutlier(0.3)); + EXPECT_EQ(0, detector->DecideOutlier(0.499)); + EXPECT_EQ(0, detector->DecideOutlier(0.5)); + EXPECT_EQ(0, detector->DecideOutlier(0.501)); + EXPECT_EQ(1, detector->DecideOutlier(0.7)); + // Two separate points: Outliner test is pretty lax. + detector = make_detector({0.4, 0.6}); + EXPECT_EQ(-1, detector->DecideOutlier(0.2)); + EXPECT_EQ(0, detector->DecideOutlier(0.3)); + EXPECT_EQ(0, detector->DecideOutlier(0.5)); + EXPECT_EQ(0, detector->DecideOutlier(0.7)); + EXPECT_EQ(1, detector->DecideOutlier(0.8)); + // Sharpen distribution by clustering toward norm: Now test is stricter. + detector = make_detector({0.4, 0.47, 0.48, 0.49, 0.50, 0.51, 0.52, 0.6}); + EXPECT_EQ(-1, detector->DecideOutlier(0.3)); + EXPECT_EQ(0, detector->DecideOutlier(0.4)); + EXPECT_EQ(0, detector->DecideOutlier(0.5)); + EXPECT_EQ(0, detector->DecideOutlier(0.6)); + EXPECT_EQ(1, detector->DecideOutlier(0.7)); + // Shift numbers around: Mean is 0.3, and data order scrambled. + detector = make_detector({0.28, 0.2, 0.31, 0.4, 0.29, 0.32, 0.27, 0.30}); + EXPECT_EQ(-1, detector->DecideOutlier(0.0)); + EXPECT_EQ(-1, detector->DecideOutlier(0.1)); + EXPECT_EQ(0, detector->DecideOutlier(0.2)); + EXPECT_EQ(0, detector->DecideOutlier(0.3)); + EXPECT_EQ(0, detector->DecideOutlier(0.4)); + EXPECT_EQ(1, detector->DecideOutlier(0.5)); + EXPECT_EQ(1, detector->DecideOutlier(1.0)); + // Typical usage: Potential outlier would be part of original input data! + detector = make_detector({0.3, 0.29, 0.31, 0.0, 0.3, 0.32, 0.3, 0.29, 0.6}); + EXPECT_EQ(-1, detector->DecideOutlier(0.0)); + EXPECT_EQ(0, detector->DecideOutlier(0.28)); + EXPECT_EQ(0, detector->DecideOutlier(0.29)); + EXPECT_EQ(0, detector->DecideOutlier(0.3)); + EXPECT_EQ(0, detector->DecideOutlier(0.31)); + EXPECT_EQ(0, detector->DecideOutlier(0.32)); + EXPECT_EQ(1, detector->DecideOutlier(0.6)); +} + +TEST(BinaryDataHistogramTest, Basic) { + constexpr double kUninitScore = -1; + + constexpr uint8_t kTestData[] = {2, 137, 42, 0, 0, 0, 7, 11, 1, 11, 255}; + const size_t n = sizeof(kTestData); + ConstBufferView region(kTestData, n); + + std::vector prefix_histograms(n + 1); // Short to long. + std::vector suffix_histograms(n + 1); // Long to short. + + for (size_t i = 0; i <= n; ++i) { + ConstBufferView prefix(region.begin(), i); + ConstBufferView suffix(region.begin() + i, n - i); + // If regions are smaller than 2 bytes then it is invalid. Else valid. + EXPECT_EQ(prefix.size() >= 2, prefix_histograms[i].Compute(prefix)); + EXPECT_EQ(suffix.size() >= 2, suffix_histograms[i].Compute(suffix)); + // IsValid() returns the same results. + EXPECT_EQ(prefix.size() >= 2, prefix_histograms[i].IsValid()); + EXPECT_EQ(suffix.size() >= 2, suffix_histograms[i].IsValid()); + } + + // Full-prefix = full-suffix = full data. + EXPECT_EQ(0.0, prefix_histograms[n].Distance(suffix_histograms[0])); + EXPECT_EQ(0.0, suffix_histograms[0].Distance(prefix_histograms[n])); + + // Testing heuristics without overreliance on implementation details. + + // Strict prefixes, in increasing size. Compare against full data. + double prev_prefix_score = kUninitScore; + for (size_t i = 2; i < n; ++i) { + double score = prefix_histograms[i].Distance(prefix_histograms[n]); + // Positivity. + EXPECT_GT(score, 0.0); + // Symmetry. + EXPECT_EQ(score, prefix_histograms[n].Distance(prefix_histograms[i])); + // Distance should decrease as prefix gets nearer to full data. + if (prev_prefix_score != kUninitScore) + EXPECT_LT(score, prev_prefix_score); + prev_prefix_score = score; + } + + // Strict suffixes, in decreasing size. Compare against full data. + double prev_suffix_score = -1; + for (size_t i = 1; i <= n - 2; ++i) { + double score = suffix_histograms[i].Distance(suffix_histograms[0]); + // Positivity. + EXPECT_GT(score, 0.0); + // Symmetry. + EXPECT_EQ(score, suffix_histograms[0].Distance(suffix_histograms[i])); + // Distance should increase as suffix gets farther from full data. + if (prev_suffix_score != kUninitScore) + EXPECT_GT(score, prev_suffix_score); + prev_suffix_score = score; + } +} + +} // namespace zucchini diff --git a/buffer_sink.cc b/buffer_sink.cc new file mode 100644 index 0000000..5b89e3a --- /dev/null +++ b/buffer_sink.cc @@ -0,0 +1,11 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/buffer_sink.h" + +namespace zucchini { + +BufferSink::BufferSink(MutableBufferView buffer) : MutableBufferView(buffer) {} + +} // namespace zucchini diff --git a/buffer_sink.h b/buffer_sink.h new file mode 100644 index 0000000..c17f040 --- /dev/null +++ b/buffer_sink.h @@ -0,0 +1,68 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_BUFFER_SINK_H_ +#define COMPONENTS_ZUCCHINI_BUFFER_SINK_H_ + +#include + +#include +#include + +#include "base/logging.h" +#include "components/zucchini/buffer_view.h" + +namespace zucchini { + +// BufferSink acts like an output stream with convenience methods to serialize +// data into a contiguous sequence of raw data. The underlying MutableBufferView +// emulates a cursor to track current write position, and guards against buffer +// overrun. Where applicable, BufferSink should be passed by pointer to maintain +// cursor progress across writes. +class BufferSink : public MutableBufferView { + public: + using iterator = MutableBufferView::iterator; + + using MutableBufferView::MutableBufferView; + BufferSink() = default; + explicit BufferSink(MutableBufferView buffer); + BufferSink(const BufferSink&) = default; + BufferSink& operator=(BufferSink&&) = default; + + // If sufficient space is available, writes the binary representation of + // |value| starting at the cursor, while advancing the cursor beyond the + // written region, and returns true. Otherwise returns false. + template + bool PutValue(const T& value) { + DCHECK_NE(begin(), nullptr); + if (Remaining() < sizeof(T)) + return false; + *reinterpret_cast(begin()) = value; + remove_prefix(sizeof(T)); + return true; + } + + // If sufficient space is available, writes the raw bytes [|first|, |last|) + // starting at the cursor, while advancing the cursor beyond the written + // region, and returns true. Otherwise returns false. + template + bool PutRange(It first, It last) { + static_assert(sizeof(typename std::iterator_traits::value_type) == + sizeof(uint8_t), + "value_type should fit in uint8_t"); + DCHECK_NE(begin(), nullptr); + DCHECK(last >= first); + if (Remaining() < size_type(last - first)) + return false; + std::copy(first, last, begin()); + remove_prefix(last - first); + return true; + } + + size_type Remaining() const { return size(); } +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_BUFFER_SINK_H_ diff --git a/buffer_sink_unittest.cc b/buffer_sink_unittest.cc new file mode 100644 index 0000000..33b788e --- /dev/null +++ b/buffer_sink_unittest.cc @@ -0,0 +1,71 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/buffer_sink.h" + +#include +#include + +#include + +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +constexpr uint8_t kUninit = 0xFF; + +class BufferSinkTest : public testing::Test { + protected: + BufferSinkTest() + : buffer_(10, kUninit), sink_(buffer_.data(), buffer_.size()) {} + + std::vector buffer_; + BufferSink sink_; +}; + +TEST_F(BufferSinkTest, PutValue) { + EXPECT_EQ(size_t(10), sink_.Remaining()); + + EXPECT_TRUE(sink_.PutValue(uint32_t(0x76543210))); + EXPECT_EQ(size_t(6), sink_.Remaining()); + + EXPECT_TRUE(sink_.PutValue(uint32_t(0xFEDCBA98))); + EXPECT_EQ(size_t(2), sink_.Remaining()); + + EXPECT_FALSE(sink_.PutValue(uint32_t(0x00))); + EXPECT_EQ(size_t(2), sink_.Remaining()); + + EXPECT_TRUE(sink_.PutValue(uint16_t(0x0010))); + EXPECT_EQ(size_t(0), sink_.Remaining()); + + // Assuming little-endian architecture. + EXPECT_EQ(std::vector( + {0x10, 0x32, 0x54, 0x76, 0x98, 0xBA, 0xDC, 0xFE, 0x10, 0x00}), + buffer_); +} + +TEST_F(BufferSinkTest, PutRange) { + std::vector range = {0x10, 0x32, 0x54, 0x76, 0x98, 0xBA, + 0xDC, 0xFE, 0x10, 0x00, 0x42}; + + EXPECT_EQ(size_t(10), sink_.Remaining()); + EXPECT_FALSE(sink_.PutRange(range.begin(), range.end())); + EXPECT_EQ(size_t(10), sink_.Remaining()); + + EXPECT_TRUE(sink_.PutRange(range.begin(), range.begin() + 8)); + EXPECT_EQ(size_t(2), sink_.Remaining()); + EXPECT_EQ(std::vector({0x10, 0x32, 0x54, 0x76, 0x98, 0xBA, 0xDC, + 0xFE, kUninit, kUninit}), + buffer_); + + EXPECT_FALSE(sink_.PutRange(range.begin(), range.begin() + 4)); + EXPECT_EQ(size_t(2), sink_.Remaining()); + + // range is not written + EXPECT_EQ(std::vector({0x10, 0x32, 0x54, 0x76, 0x98, 0xBA, 0xDC, + 0xFE, kUninit, kUninit}), + buffer_); +} + +} // namespace zucchini diff --git a/buffer_source.cc b/buffer_source.cc new file mode 100644 index 0000000..721588a --- /dev/null +++ b/buffer_source.cc @@ -0,0 +1,105 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/buffer_source.h" + +#include + +#include "components/zucchini/algorithm.h" + +namespace zucchini { + +BufferSource::BufferSource(ConstBufferView buffer) : ConstBufferView(buffer) {} + +BufferSource& BufferSource::Skip(size_type n) { + remove_prefix(std::min(n, Remaining())); + return *this; +} + +bool BufferSource::CheckNextBytes(std::initializer_list bytes) const { + if (Remaining() < bytes.size()) + return false; + return std::mismatch(bytes.begin(), bytes.end(), begin()).first == + bytes.end(); +} + +bool BufferSource::ConsumeBytes(std::initializer_list bytes) { + if (!CheckNextBytes(bytes)) + return false; + remove_prefix(bytes.size()); + return true; +} + +bool BufferSource::GetRegion(size_type count, ConstBufferView* buffer) { + DCHECK_NE(begin(), nullptr); + if (Remaining() < count) + return false; + *buffer = ConstBufferView(begin(), count); + remove_prefix(count); + return true; +} + +// [0aaaaaaa] => 00000000'00000000'00000000'0aaaaaaa +// [1aaaaaaa 0bbbbbbb] => 00000000'00000000'00bbbbbb'baaaaaaa +// [1aaaaaaa 1bbbbbbb 0ccccccc] => 00000000'000ccccc'ccbbbbbb'baaaaaaa +// [1aaaaaaa 1bbbbbbb 1ccccccc 0ddddddd] => 0000dddd'dddccccc'ccbbbbbb'baaaaaaa +// [1aaaaaaa 1bbbbbbb 1ccccccc 1ddddddd 0???eeee] +// => eeeedddd'dddccccc'ccbbbbbb'baaaaaaa +// Note that "???" is discarded. Meanwhile, 1???eeee is invalid. +bool BufferSource::GetUleb128(uint32_t* ret) { + int shift_lim = + static_cast(std::min(kMaxLeb128Size, size())) * 7; + const_iterator cur = cbegin(); + uint32_t value = 0U; + for (int shift = 0; shift < shift_lim; shift += 7, ++cur) { + uint32_t b = *cur; + // When |shift == 28|, |(b & 0x7F) << shift| discards the "???" bits. + value |= static_cast(b & 0x7F) << shift; + if (!(b & 0x80)) { + *ret = value; + seek(cur + 1); + return true; + } + } + return false; +} + +// [0Saaaaaa] => SSSSSSSS'SSSSSSSS'SSSSSSSS'SSaaaaaa +// [1aaaaaaa 0Sbbbbbb] => SSSSSSSS'SSSSSSSS'SSSbbbbb'baaaaaaa +// [1aaaaaaa 1bbbbbbb 0Scccccc] => SSSSSSSS'SSSScccc'ccbbbbbb'baaaaaaa +// [1aaaaaaa 1bbbbbbb 1ccccccc 0Sdddddd] => SSSSSddd'dddccccc'ccbbbbbb'baaaaaaa +// [1aaaaaaa 1bbbbbbb 1ccccccc 1ddddddd 0???Seee] +// => Seeedddd'dddccccc'ccbbbbbb'baaaaaaa +// Note that "???" is discarded. Meanwhile, 1???eeee is invalid. +bool BufferSource::GetSleb128(int32_t* ret) { + int shift_lim = + static_cast(std::min(kMaxLeb128Size, size())) * 7; + const_iterator cur = cbegin(); + int32_t value = 0; + for (int shift = 0; shift < shift_lim; shift += 7, ++cur) { + uint32_t b = *cur; + // When |shift == 28|, |(b & 0x7F) << shift| discards the "???" bits. + value |= static_cast(b & 0x7F) << shift; + if (!(b & 0x80)) { + *ret = (shift == 28) ? value : SignExtend(shift + 6, value); + seek(cur + 1); + return true; + } + } + return false; +} + +bool BufferSource::SkipLeb128() { + int lim = static_cast(std::min(kMaxLeb128Size, size())); + const_iterator cur = cbegin(); + for (int i = 0; i < lim; ++i, ++cur) { + if (!(*cur & 0x80)) { + seek(cur + 1); + return true; + } + } + return false; +} + +} // namespace zucchini diff --git a/buffer_source.h b/buffer_source.h new file mode 100644 index 0000000..d2a05b0 --- /dev/null +++ b/buffer_source.h @@ -0,0 +1,141 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_BUFFER_SOURCE_H_ +#define COMPONENTS_ZUCCHINI_BUFFER_SOURCE_H_ + +#include +#include + +#include +#include + +#include "base/logging.h" +#include "components/zucchini/buffer_view.h" + +namespace zucchini { + +// BufferSource acts like an input stream with convenience methods to parse data +// from a contiguous sequence of raw data. The underlying ConstBufferView +// emulates a cursor to track current read position, and guards against buffer +// overrun. Where applicable, BufferSource should be passed by pointer to +// maintain cursor progress across reads. +class BufferSource : public ConstBufferView { + public: + // LEB128 info: http://dwarfstd.org/doc/dwarf-2.0.0.pdf , Section 7.6. + enum : size_t { kMaxLeb128Size = 5 }; + + static BufferSource FromRange(const_iterator first, const_iterator last) { + return BufferSource(ConstBufferView::FromRange(first, last)); + } + + using ConstBufferView::ConstBufferView; + BufferSource() = default; + explicit BufferSource(ConstBufferView buffer); + BufferSource(const BufferSource&) = default; + BufferSource& operator=(BufferSource&&) = default; + + // Moves the cursor forward by |n| bytes, or to the end if data is exhausted. + // Returns a reference to *this, to allow chaining, e.g.: + // if (!buffer_source.Skip(1024).GetValue(&value)) { + // ... // Handle error. + // } + // Notice that Skip() defers error handling to GetValue(). + BufferSource& Skip(size_type n); + + // Returns true if |value| matches data starting at the cursor when + // reinterpreted as the integral type |T|. + template + bool CheckNextValue(const T& value) const { + static_assert(std::is_integral::value, + "Value type must be an integral type"); + DCHECK_NE(begin(), nullptr); + if (Remaining() < sizeof(T)) + return false; + return value == *reinterpret_cast(begin()); + } + + // Returns true if the next bytes.size() bytes at the cursor match those in + // |bytes|. + bool CheckNextBytes(std::initializer_list bytes) const; + + // Same as CheckNextBytes(), but moves the cursor by bytes.size() if read is + // successfull. + bool ConsumeBytes(std::initializer_list bytes); + + // Tries to reinterpret data as type |T|, starting at the cursor and to write + // the result into |value|, while moving the cursor forward by sizeof(T). + // Returns true if sufficient data is available, and false otherwise. + template + bool GetValue(T* value) { + static_assert(std::is_standard_layout::value, + "Value type must be a standard layout type"); + + DCHECK_NE(begin(), nullptr); + if (Remaining() < sizeof(T)) + return false; + *value = *reinterpret_cast(begin()); + remove_prefix(sizeof(T)); + return true; + } + + // Tries to reinterpret data as type |T| at the cursor and to return a + // reinterpreted pointer of type |T| pointing into the underlying data, while + // moving the cursor forward by sizeof(T). Returns nullptr if insufficient + // data is available. + template + const T* GetPointer() { + static_assert(std::is_standard_layout::value, + "Value type must be a standard layout type"); + + DCHECK_NE(begin(), nullptr); + if (Remaining() < sizeof(T)) + return nullptr; + const T* ptr = reinterpret_cast(begin()); + remove_prefix(sizeof(T)); + return ptr; + } + + // Tries to reinterpret data as an array of type |T| with |count| elements, + // starting at the cursor, and to return a reinterpreted pointer of type |T| + // pointing into the underlying data, while advancing the cursor beyond the + // array. Returns nullptr if insufficient data is available. + template + const T* GetArray(size_t count) { + static_assert(std::is_standard_layout::value, + "Value type must be a standard layout type"); + + if (Remaining() / sizeof(T) < count) + return nullptr; + const T* array = reinterpret_cast(begin()); + remove_prefix(count * sizeof(T)); + return array; + } + + // If sufficient data is available, assigns |buffer| to point to a region of + // |size| bytes starting at the cursor, while advancing the cursor beyond the + // region, and returns true. Otherwise returns false. + bool GetRegion(size_type size, ConstBufferView* buffer); + + // Reads an Unsigned Little Endian Base 128 (uleb128) int at |first_|. If + // successful, writes the result to |value|, advances |first_|, and returns + // true. Otherwise returns false. + bool GetUleb128(uint32_t* value); + + // Reads a Signed Little Endian Base 128 (sleb128) int at |first_|. If + // successful, writes the result to |value|, advances |first_|, and returns + // true. Otherwise returns false. + bool GetSleb128(int32_t* value); + + // Reads uleb128 / sleb128 at |first_| but discards the result. If successful, + // advances |first_| and returns true. Otherwise returns false. + bool SkipLeb128(); + + // Returns the number of bytes remaining from cursor until end. + size_type Remaining() const { return size(); } +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_BUFFER_SOURCE_H_ diff --git a/buffer_source_unittest.cc b/buffer_source_unittest.cc new file mode 100644 index 0000000..e8f00c5 --- /dev/null +++ b/buffer_source_unittest.cc @@ -0,0 +1,347 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/buffer_source.h" + +#include +#include + +#include +#include +#include +#include + +#include "components/zucchini/test_utils.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +using vec = std::vector; + +class BufferSourceTest : public testing::Test { + protected: + std::vector bytes_ = ParseHexString("10 32 54 76 98 BA DC FE 10 00"); + + BufferSource source_ = {bytes_.data(), bytes_.size()}; +}; + +TEST_F(BufferSourceTest, Skip) { + EXPECT_EQ(bytes_.size(), source_.Remaining()); + source_.Skip(2); + EXPECT_EQ(bytes_.size() - 2, source_.Remaining()); + source_.Skip(10); // Skipping past end just moves cursor to end. + EXPECT_EQ(size_t(0), source_.Remaining()); +} + +TEST_F(BufferSourceTest, CheckNextBytes) { + EXPECT_TRUE(source_.CheckNextBytes({0x10, 0x32, 0x54, 0x76})); + source_.Skip(4); + EXPECT_TRUE(source_.CheckNextBytes({0x98, 0xBA, 0xDC, 0xFE})); + + // Cursor has not advanced, so check fails. + EXPECT_FALSE(source_.CheckNextBytes({0x10, 0x00})); + + source_.Skip(4); + EXPECT_EQ(size_t(2), source_.Remaining()); + + // Goes beyond end by 2 bytes. + EXPECT_FALSE(source_.CheckNextBytes({0x10, 0x00, 0x00, 0x00})); + EXPECT_EQ(size_t(2), source_.Remaining()); +} + +TEST_F(BufferSourceTest, ConsumeBytes) { + EXPECT_FALSE(source_.ConsumeBytes({0x10, 0x00})); + EXPECT_EQ(bytes_.size(), source_.Remaining()); + EXPECT_TRUE(source_.ConsumeBytes({0x10, 0x32, 0x54, 0x76})); + EXPECT_EQ(size_t(6), source_.Remaining()); + EXPECT_TRUE(source_.ConsumeBytes({0x98, 0xBA, 0xDC, 0xFE})); + EXPECT_EQ(size_t(2), source_.Remaining()); + + // Goes beyond end by 2 bytes. + EXPECT_FALSE(source_.ConsumeBytes({0x10, 0x00, 0x00, 0x00})); + EXPECT_EQ(size_t(2), source_.Remaining()); +} + +TEST_F(BufferSourceTest, CheckNextValue) { + EXPECT_TRUE(source_.CheckNextValue(uint32_t(0x76543210))); + EXPECT_FALSE(source_.CheckNextValue(uint32_t(0x0))); + EXPECT_TRUE(source_.CheckNextValue(uint64_t(0xFEDCBA9876543210))); + EXPECT_FALSE(source_.CheckNextValue(uint64_t(0x0))); + + source_.Skip(8); + EXPECT_EQ(size_t(2), source_.Remaining()); + + // Goes beyond end by 2 bytes. + EXPECT_FALSE(source_.CheckNextValue(uint32_t(0x1000))); +} + +// Supported by MSVC, g++, and clang++. +// Ensures no gaps in packing. +#pragma pack(push, 1) +struct ValueType { + uint32_t a; + uint16_t b; +}; +#pragma pack(pop) + +TEST_F(BufferSourceTest, GetValueIntegral) { + uint32_t value = 0; + EXPECT_TRUE(source_.GetValue(&value)); + EXPECT_EQ(uint32_t(0x76543210), value); + EXPECT_EQ(size_t(6), source_.Remaining()); + + EXPECT_TRUE(source_.GetValue(&value)); + EXPECT_EQ(uint32_t(0xFEDCBA98), value); + EXPECT_EQ(size_t(2), source_.Remaining()); + + EXPECT_FALSE(source_.GetValue(&value)); + EXPECT_EQ(size_t(2), source_.Remaining()); +} + +TEST_F(BufferSourceTest, GetValueAggregate) { + ValueType value = {}; + EXPECT_TRUE(source_.GetValue(&value)); + EXPECT_EQ(uint32_t(0x76543210), value.a); + EXPECT_EQ(uint32_t(0xBA98), value.b); + EXPECT_EQ(size_t(4), source_.Remaining()); +} + +TEST_F(BufferSourceTest, GetRegion) { + ConstBufferView region; + EXPECT_TRUE(source_.GetRegion(0, ®ion)); + EXPECT_EQ(bytes_.size(), source_.Remaining()); + EXPECT_TRUE(region.empty()); + + EXPECT_TRUE(source_.GetRegion(2, ®ion)); + EXPECT_EQ(size_t(2), region.size()); + EXPECT_EQ(vec({0x10, 0x32}), vec(region.begin(), region.end())); + EXPECT_EQ(size_t(8), source_.Remaining()); + + EXPECT_FALSE(source_.GetRegion(bytes_.size(), ®ion)); + EXPECT_EQ(size_t(8), source_.Remaining()); + // |region| is left untouched. + EXPECT_EQ(vec({0x10, 0x32}), vec(region.begin(), region.end())); + EXPECT_EQ(size_t(2), region.size()); +} + +TEST_F(BufferSourceTest, GetPointerIntegral) { + const uint32_t* ptr = source_.GetPointer(); + EXPECT_NE(nullptr, ptr); + EXPECT_EQ(uint32_t(0x76543210), *ptr); + EXPECT_EQ(size_t(6), source_.Remaining()); + + ptr = source_.GetPointer(); + EXPECT_NE(nullptr, ptr); + EXPECT_EQ(uint32_t(0xFEDCBA98), *ptr); + EXPECT_EQ(size_t(2), source_.Remaining()); + + EXPECT_EQ(nullptr, source_.GetPointer()); + EXPECT_EQ(size_t(2), source_.Remaining()); +} + +TEST_F(BufferSourceTest, GetPointerAggregate) { + const ValueType* ptr = source_.GetPointer(); + EXPECT_NE(nullptr, ptr); + EXPECT_EQ(uint32_t(0x76543210), ptr->a); + EXPECT_EQ(uint32_t(0xBA98), ptr->b); + EXPECT_EQ(size_t(4), source_.Remaining()); +} + +TEST_F(BufferSourceTest, GetArrayIntegral) { + EXPECT_EQ(nullptr, source_.GetArray(3)); + + const uint32_t* ptr = source_.GetArray(2); + EXPECT_NE(nullptr, ptr); + EXPECT_EQ(uint32_t(0x76543210), ptr[0]); + EXPECT_EQ(uint32_t(0xFEDCBA98), ptr[1]); + EXPECT_EQ(size_t(2), source_.Remaining()); +} + +TEST_F(BufferSourceTest, GetArrayAggregate) { + const ValueType* ptr = source_.GetArray(2); + EXPECT_EQ(nullptr, ptr); + + ptr = source_.GetArray(1); + + EXPECT_NE(nullptr, ptr); + EXPECT_EQ(uint32_t(0x76543210), ptr[0].a); + EXPECT_EQ(uint32_t(0xBA98), ptr[0].b); + EXPECT_EQ(size_t(4), source_.Remaining()); +} + +TEST_F(BufferSourceTest, GetUleb128) { + using size_type = BufferSource::size_type; + // Result = {success, value, bytes_consumed}. + using Result = std::tuple; + + constexpr uint32_t kUnInit = 0xCCCCCCCC; // Arbitrary value. + constexpr Result kBad{false, kUnInit, 0U}; + + auto run = [kUnInit](const std::string hex_string) -> Result { + std::vector bytes = ParseHexString(hex_string); + BufferSource source(ConstBufferView{bytes.data(), bytes.size()}); + BufferSource::iterator base = source.begin(); + // Initialize |value| to |kUnInit| to ensure no write on failure. + uint32_t value = kUnInit; + bool success = source.GetUleb128(&value); + return {success, value, source.begin() - base}; + }; + + auto good = [](uint32_t value, size_type bytes_consumed) -> Result { + return Result{true, value, bytes_consumed}; + }; + + EXPECT_EQ(good(0x0U, 1U), run("00")); + EXPECT_EQ(good(0x20U, 1U), run("20")); + EXPECT_EQ(good(0x42U, 1U), run("42")); + EXPECT_EQ(good(0x7FU, 1U), run("7F")); + EXPECT_EQ(kBad, run("80")); // Out of data. + EXPECT_EQ(good(0x0U, 2U), run("80 00")); // Redundant code. + EXPECT_EQ(good(0x80U, 2U), run("80 01")); + EXPECT_EQ(good(0x7FU, 2U), run("FF 00")); // Redundant (unsigned). + EXPECT_EQ(good(0x3FFFU, 2U), run("FF 7F")); + EXPECT_EQ(good(0x0U, 1U), run("00 80")); // Only reads byte 0. + EXPECT_EQ(kBad, run("80 80")); // Out of data. + EXPECT_EQ(kBad, run("F1 88")); // Out of data. + EXPECT_EQ(good(0x0U, 3U), run("80 80 00")); // Redundant code. + EXPECT_EQ(good(0x4000U, 3U), run("80 80 01")); + EXPECT_EQ(good(0x00100000U, 3U), run("80 80 40")); + EXPECT_EQ(good(0x001FFFFFU, 3U), run("FF FF 7F")); + EXPECT_EQ(good(0x0U, 1U), run("00 00 80")); // Only reads byte 0. + EXPECT_EQ(kBad, run("80 80 80")); // Out of data. + EXPECT_EQ(kBad, run("AB CD EF")); // Out of data. + EXPECT_EQ(good(0x0U, 4U), run("80 80 80 00")); // Redundant code. + EXPECT_EQ(good(0x00100000U, 4U), run("80 80 C0 00")); + EXPECT_EQ(good(0x00200000U, 4U), run("80 80 80 01")); + EXPECT_EQ(good(0x08000000U, 4U), run("80 80 80 40")); + EXPECT_EQ(good(0x001FC07FU, 4U), run("FF 80 FF 00")); + EXPECT_EQ(good(0x0U, 5U), run("80 80 80 80 00")); // Redundant code. + EXPECT_EQ(good(0x10000000U, 5U), run("80 80 80 80 01")); + EXPECT_EQ(good(0x10204081U, 5U), run("81 81 81 81 01")); + EXPECT_EQ(good(0x7FFFFFFFU, 5U), run("FF FF FF FF 07")); + EXPECT_EQ(good(0x80000000U, 5U), run("80 80 80 80 08")); + EXPECT_EQ(good(0xFFFFFFFFU, 5U), run("FF FF FF FF 0F")); + EXPECT_EQ(kBad, run("FF FF FF FF 80")); // Too long / out of data. + EXPECT_EQ(good(0x0FFFFFFFU, 5U), run("FF FF FF FF 10")); // "1" discarded. + EXPECT_EQ(good(0x00000000U, 5U), run("80 80 80 80 20")); // "2" discarded. + EXPECT_EQ(good(0xA54A952AU, 5U), run("AA AA AA AA 7A")); // "7" discarded. + EXPECT_EQ(kBad, run("FF FF FF FF FF 00")); // Too long. +} + +TEST_F(BufferSourceTest, GetSleb128) { + using size_type = BufferSource::size_type; + // Result = {success, value, bytes_consumed}. + using Result = std::tuple; + + constexpr int32_t kUnInit = 0xCCCCCCCC; // Arbitrary value. + constexpr Result kBad{false, kUnInit, 0U}; + + auto run = [kUnInit](const std::string hex_string) -> Result { + std::vector bytes = ParseHexString(hex_string); + BufferSource source(ConstBufferView{bytes.data(), bytes.size()}); + BufferSource::iterator base = source.begin(); + // Initialize |value| to |kUnInit| to ensure no write on failure. + int32_t value = kUnInit; + bool success = source.GetSleb128(&value); + return {success, value, source.begin() - base}; + }; + + auto good = [](int32_t value, size_type bytes_consumed) -> Result { + return Result{true, value, bytes_consumed}; + }; + + EXPECT_EQ(good(0x0, 1U), run("00")); + EXPECT_EQ(good(0x20U, 1U), run("20")); + EXPECT_EQ(good(-0x3E, 1U), run("42")); + EXPECT_EQ(good(-0x1, 1U), run("7F")); + EXPECT_EQ(kBad, run("80")); // Out of data. + EXPECT_EQ(good(0x0, 2U), run("80 00")); // Redundant code. + EXPECT_EQ(good(0x80, 2U), run("80 01")); + EXPECT_EQ(good(0x7F, 2U), run("FF 00")); // Not redudnant. + EXPECT_EQ(good(-0x1, 2U), run("FF 7F")); // Redundant code. + EXPECT_EQ(good(0x0, 1U), run("00 80")); // Only reads byte 0. + EXPECT_EQ(kBad, run("80 80")); // Out of data. + EXPECT_EQ(kBad, run("F1 88")); // Out of data. + EXPECT_EQ(good(0x0, 3U), run("80 80 00")); // Redundant code. + EXPECT_EQ(good(0x4000, 3U), run("80 80 01")); + EXPECT_EQ(good(-0x100000, 3U), run("80 80 40")); + EXPECT_EQ(good(-0x1, 3U), run("FF FF 7F")); // Redundant code. + EXPECT_EQ(good(0x0, 1U), run("00 00 80")); // Only reads byte 0. + EXPECT_EQ(kBad, run("80 80 80")); // Out of data. + EXPECT_EQ(kBad, run("AB CD EF")); // Out of data. + EXPECT_EQ(good(0x0, 4U), run("80 80 80 00")); // Redundant code. + EXPECT_EQ(good(0x00100000, 4U), run("80 80 C0 00")); + EXPECT_EQ(good(0x00200000, 4U), run("80 80 80 01")); + EXPECT_EQ(good(-static_cast(0x08000000), 4U), run("80 80 80 40")); + EXPECT_EQ(good(0x001FC07F, 4U), run("FF 80 FF 00")); + EXPECT_EQ(good(0x0, 5U), run("80 80 80 80 00")); // Redundant code. + EXPECT_EQ(good(0x10000000, 5U), run("80 80 80 80 01")); + EXPECT_EQ(good(0x10204081, 5U), run("81 81 81 81 01")); + EXPECT_EQ(good(0x7FFFFFFF, 5U), run("FF FF FF FF 07")); + EXPECT_EQ(good(-static_cast(0x80000000), 5U), run("80 80 80 80 08")); + EXPECT_EQ(good(-0x1, 5U), run("FF FF FF FF 0F")); // Redundant code. + EXPECT_EQ(kBad, run("FF FF FF FF 80")); // Too long / out of data. + EXPECT_EQ(good(0x0FFFFFFF, 5U), run("FF FF FF FF 10")); // "1" discarded. + EXPECT_EQ(good(0x00000000, 5U), run("80 80 80 80 20")); // "2" discarded. + EXPECT_EQ(good(-0x5AB56AD6, 5U), run("AA AA AA AA 7A")); // "7" discarded. + EXPECT_EQ(kBad, run("FF FF FF FF FF 00")); // Too long. +} + +TEST_F(BufferSourceTest, SkipLeb128) { + using size_type = BufferSource::size_type; + // Result = {success, value, bytes_consumed}. + using Result = std::tuple; + + constexpr Result kBad{false, 0U}; + + auto run = [](const std::string hex_string) -> Result { + std::vector bytes = ParseHexString(hex_string); + BufferSource source(ConstBufferView{bytes.data(), bytes.size()}); + BufferSource::iterator base = source.begin(); + bool success = source.SkipLeb128(); + return {success, source.begin() - base}; + }; + + auto good = [](size_type bytes_consumed) -> Result { + return Result{true, bytes_consumed}; + }; + + EXPECT_EQ(good(1U), run("00")); + EXPECT_EQ(good(1U), run("20")); + EXPECT_EQ(good(1U), run("42")); + EXPECT_EQ(good(1U), run("7F")); + EXPECT_EQ(kBad, run("80")); // Out of data. + EXPECT_EQ(good(2U), run("80 00")); // Redundant code. + EXPECT_EQ(good(2U), run("80 01")); + EXPECT_EQ(good(2U), run("FF 00")); // Redundant (unsigned). + EXPECT_EQ(good(2U), run("FF 7F")); + EXPECT_EQ(good(1U), run("00 80")); // Only reads byte 0. + EXPECT_EQ(kBad, run("80 80")); // Out of data. + EXPECT_EQ(kBad, run("F1 88")); // Out of data. + EXPECT_EQ(good(3U), run("80 80 00")); // Redundant code. + EXPECT_EQ(good(3U), run("80 80 01")); + EXPECT_EQ(good(3U), run("80 80 40")); + EXPECT_EQ(good(3U), run("FF FF 7F")); + EXPECT_EQ(good(1U), run("00 00 80")); // Only reads byte 0. + EXPECT_EQ(kBad, run("80 80 80")); // Out of data. + EXPECT_EQ(kBad, run("AB CD EF")); // Out of data. + EXPECT_EQ(good(4U), run("80 80 80 00")); // Redundant code. + EXPECT_EQ(good(4U), run("80 80 C0 00")); + EXPECT_EQ(good(4U), run("80 80 80 01")); + EXPECT_EQ(good(4U), run("80 80 80 40")); + EXPECT_EQ(good(4U), run("FF 80 FF 00")); + EXPECT_EQ(good(5U), run("80 80 80 80 00")); // Redundant code. + EXPECT_EQ(good(5U), run("80 80 80 80 01")); + EXPECT_EQ(good(5U), run("81 81 81 81 01")); + EXPECT_EQ(good(5U), run("FF FF FF FF 07")); + EXPECT_EQ(good(5U), run("80 80 80 80 08")); + EXPECT_EQ(good(5U), run("FF FF FF FF 0F")); + EXPECT_EQ(kBad, run("FF FF FF FF 80")); // Too long / out of data. + EXPECT_EQ(good(5U), run("FF FF FF FF 10")); // "1" discarded. + EXPECT_EQ(good(5U), run("80 80 80 80 20")); // "2" discarded. + EXPECT_EQ(good(5U), run("AA AA AA AA 7A")); // "7" discarded. + EXPECT_EQ(kBad, run("FF FF FF FF FF 00")); // Too long. +} + +} // namespace zucchini diff --git a/buffer_view.h b/buffer_view.h new file mode 100644 index 0000000..a7dfd17 --- /dev/null +++ b/buffer_view.h @@ -0,0 +1,201 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_BUFFER_VIEW_H_ +#define COMPONENTS_ZUCCHINI_BUFFER_VIEW_H_ + +#include +#include + +#include +#include + +#include "base/logging.h" +#include "components/zucchini/algorithm.h" + +namespace zucchini { + +// Describes a region within a buffer, with starting offset and size. +struct BufferRegion { + // The region data are stored as |offset| and |size|, but often it is useful + // to represent it as an interval [lo(), hi()) = [offset, offset + size). + size_t lo() const { return offset; } + size_t hi() const { return offset + size; } + + // Returns whether the Region fits in |[0, container_size)|. Special case: + // a size-0 region starting at |container_size| does not fit. + bool FitsIn(size_t container_size) const { + return offset < container_size && container_size - offset >= size; + } + + // Returns |v| clipped to the inclusive range |[lo(), hi()]|. + size_t InclusiveClamp(size_t v) const { + return zucchini::InclusiveClamp(v, lo(), hi()); + } + friend bool operator==(const BufferRegion& a, const BufferRegion& b) { + return a.offset == b.offset && a.size == b.size; + } + friend bool operator!=(const BufferRegion& a, const BufferRegion& b) { + return !(a == b); + } + + // Region data use size_t to match BufferViewBase::size_type, to make it + // convenient to index into buffer view. + size_t offset; + size_t size; +}; + +namespace internal { + +// TODO(huangs): Rename to BasicBufferView. +// BufferViewBase should not be used directly; it is an implementation used for +// both BufferView and MutableBufferView. +template +class BufferViewBase { + public: + using value_type = T; + using reference = T&; + using pointer = T*; + using iterator = T*; + using const_iterator = typename std::add_const::type*; + using size_type = std::size_t; + using difference_type = std::ptrdiff_t; + + static BufferViewBase FromRange(iterator first, iterator last) { + DCHECK_GE(last, first); + BufferViewBase ret; + ret.first_ = first; + ret.last_ = last; + return ret; + } + + BufferViewBase() = default; + + BufferViewBase(iterator first, size_type size) + : first_(first), last_(first_ + size) { + DCHECK_GE(last_, first_); + } + + template + BufferViewBase(const BufferViewBase& that) + : first_(that.begin()), last_(that.end()) {} + + template + BufferViewBase(BufferViewBase&& that) + : first_(that.begin()), last_(that.end()) {} + + BufferViewBase(const BufferViewBase&) = default; + BufferViewBase& operator=(const BufferViewBase&) = default; + + // Iterators + + iterator begin() const { return first_; } + iterator end() const { return last_; } + const_iterator cbegin() const { return begin(); } + const_iterator cend() const { return end(); } + + // Capacity + + bool empty() const { return first_ == last_; } + size_type size() const { return last_ - first_; } + + // Returns whether the buffer is large enough to cover |region|. + bool covers(const BufferRegion& region) const { + return region.FitsIn(size()); + } + + // Element access + + // Returns the raw value at specified location |pos|. + // If |pos| is not within the range of the buffer, the process is terminated. + reference operator[](size_type pos) const { + CHECK_LT(pos, size()); + return first_[pos]; + } + + // Returns a sub-buffer described by |region|. + BufferViewBase operator[](BufferRegion region) const { + DCHECK_LE(region.offset, size()); + DCHECK_LE(region.size, size() - region.offset); + return {begin() + region.offset, region.size}; + } + + template + const U& read(size_type pos) const { + CHECK_LE(pos + sizeof(U), size()); + return *reinterpret_cast(begin() + pos); + } + + template + void write(size_type pos, const U& value) { + CHECK_LE(pos + sizeof(U), size()); + *reinterpret_cast(begin() + pos) = value; + } + + template + bool can_access(size_type pos) const { + return pos < size() && size() - pos >= sizeof(U); + } + + // Returns a BufferRegion describing the full view, with offset = 0. If the + // BufferViewBase is derived from another, this does *not* return the + // original region used for its definition (hence "local"). + BufferRegion local_region() const { return BufferRegion{0, size()}; } + + bool equals(BufferViewBase other) const { + return size() == other.size() && std::equal(begin(), end(), other.begin()); + } + + // Modifiers + + void shrink(size_type new_size) { + DCHECK_LE(first_ + new_size, last_); + last_ = first_ + new_size; + } + + // Moves the start of the view forward by n bytes. + void remove_prefix(size_type n) { + DCHECK_LE(n, size()); + first_ += n; + } + + // Moves the start of the view to |it|, which is in range [begin(), end()). + void seek(iterator it) { + DCHECK_GE(it, begin()); + DCHECK_LE(it, end()); + first_ = it; + } + + // Given |origin| that contains |*this|, minimally increase |first_| (possibly + // by 0) so that |first_ <= last_|, and |first_ - origin.first_| is a multiple + // of |alignment|. On success, updates |first_| and returns true. Otherwise + // returns false. + bool AlignOn(BufferViewBase origin, size_type alignment) { + DCHECK_GT(alignment, 0U); + DCHECK_LE(origin.first_, first_); + DCHECK_GE(origin.last_, last_); + size_type aligned_size = + ceil(static_cast(first_ - origin.first_), alignment); + if (aligned_size > static_cast(last_ - origin.first_)) + return false; + first_ = origin.first_ + aligned_size; + return true; + } + + private: + iterator first_ = nullptr; + iterator last_ = nullptr; +}; + +} // namespace internal + +// Classes to encapsulate a contiguous sequence of raw data, without owning the +// encapsulated memory regions. These are intended to be used as value types. + +using ConstBufferView = internal::BufferViewBase; +using MutableBufferView = internal::BufferViewBase; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_BUFFER_VIEW_H_ diff --git a/buffer_view_unittest.cc b/buffer_view_unittest.cc new file mode 100644 index 0000000..cfb3d9b --- /dev/null +++ b/buffer_view_unittest.cc @@ -0,0 +1,242 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/buffer_view.h" + +#include +#include + +#include +#include +#include + +#include "base/test/gtest_util.h" +#include "components/zucchini/test_utils.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +class BufferViewTest : public testing::Test { + protected: + // Some tests might modify this. + std::vector bytes_ = ParseHexString("10 32 54 76 98 BA DC FE 10 00"); +}; + +TEST_F(BufferViewTest, Size) { + for (size_t len = 0; len <= bytes_.size(); ++len) { + EXPECT_EQ(len, ConstBufferView(bytes_.data(), len).size()); + EXPECT_EQ(len, MutableBufferView(bytes_.data(), len).size()); + } +} + +TEST_F(BufferViewTest, Empty) { + // Empty view. + EXPECT_TRUE(ConstBufferView(bytes_.data(), 0).empty()); + EXPECT_TRUE(MutableBufferView(bytes_.data(), 0).empty()); + + for (size_t len = 1; len <= bytes_.size(); ++len) { + EXPECT_FALSE(ConstBufferView(bytes_.data(), len).empty()); + EXPECT_FALSE(MutableBufferView(bytes_.data(), len).empty()); + } +} + +TEST_F(BufferViewTest, FromRange) { + constexpr size_t kSize = 10; + uint8_t raw_data[kSize] = {0x10, 0x32, 0x54, 0x76, 0x98, + 0xBA, 0xDC, 0xFE, 0x10, 0x00}; + ConstBufferView buffer = + ConstBufferView::FromRange(std::begin(raw_data), std::end(raw_data)); + EXPECT_EQ(bytes_.size(), buffer.size()); + EXPECT_EQ(std::begin(raw_data), buffer.begin()); + + MutableBufferView mutable_buffer = + MutableBufferView::FromRange(std::begin(raw_data), std::end(raw_data)); + EXPECT_EQ(bytes_.size(), mutable_buffer.size()); + EXPECT_EQ(std::begin(raw_data), mutable_buffer.begin()); + + EXPECT_DCHECK_DEATH( + ConstBufferView::FromRange(std::end(raw_data), std::begin(raw_data))); + + EXPECT_DCHECK_DEATH(MutableBufferView::FromRange(std::begin(raw_data) + 1, + std::begin(raw_data))); +} + +TEST_F(BufferViewTest, Subscript) { + ConstBufferView view(bytes_.data(), bytes_.size()); + + EXPECT_EQ(0x10, view[0]); + static_assert(!std::is_assignable::value, + "BufferView values should not be mutable."); + + MutableBufferView mutable_view(bytes_.data(), bytes_.size()); + + EXPECT_EQ(bytes_.data(), &mutable_view[0]); + mutable_view[0] = 42; + EXPECT_EQ(42, mutable_view[0]); +} + +TEST_F(BufferViewTest, SubRegion) { + ConstBufferView view(bytes_.data(), bytes_.size()); + + ConstBufferView sub_view = view[{2, 4}]; + EXPECT_EQ(view.begin() + 2, sub_view.begin()); + EXPECT_EQ(size_t(4), sub_view.size()); +} + +TEST_F(BufferViewTest, Shrink) { + ConstBufferView buffer(bytes_.data(), bytes_.size()); + + buffer.shrink(bytes_.size()); + EXPECT_EQ(bytes_.size(), buffer.size()); + buffer.shrink(2); + EXPECT_EQ(size_t(2), buffer.size()); + EXPECT_DCHECK_DEATH(buffer.shrink(bytes_.size())); +} + +TEST_F(BufferViewTest, Read) { + ConstBufferView buffer(bytes_.data(), bytes_.size()); + + EXPECT_EQ(0x10U, buffer.read(0)); + EXPECT_EQ(0x54U, buffer.read(2)); + + EXPECT_EQ(0x3210U, buffer.read(0)); + EXPECT_EQ(0x7654U, buffer.read(2)); + + EXPECT_EQ(0x76543210U, buffer.read(0)); + EXPECT_EQ(0xBA987654U, buffer.read(2)); + + EXPECT_EQ(0xFEDCBA9876543210ULL, buffer.read(0)); + + EXPECT_EQ(0x00, buffer.read(9)); + EXPECT_DEATH(buffer.read(10), ""); + + EXPECT_EQ(0x0010FEDCU, buffer.read(6)); + EXPECT_DEATH(buffer.read(7), ""); +} + +TEST_F(BufferViewTest, Write) { + MutableBufferView buffer(bytes_.data(), bytes_.size()); + + buffer.write(0, 0x01234567); + buffer.write(4, 0x89ABCDEF); + EXPECT_EQ(ParseHexString("67 45 23 01 EF CD AB 89 10 00"), + std::vector(buffer.begin(), buffer.end())); + + buffer.write(9, 0xFF); + EXPECT_DEATH(buffer.write(10, 0xFF), ""); + + buffer.write(6, 0xFFFFFFFF); + EXPECT_DEATH(buffer.write(7, 0xFFFFFFFF), ""); +} + +TEST_F(BufferViewTest, CanAccess) { + MutableBufferView buffer(bytes_.data(), bytes_.size()); + EXPECT_TRUE(buffer.can_access(0)); + EXPECT_TRUE(buffer.can_access(6)); + EXPECT_FALSE(buffer.can_access(7)); + EXPECT_FALSE(buffer.can_access(10)); + EXPECT_FALSE(buffer.can_access(0xFFFFFFFFU)); + + EXPECT_TRUE(buffer.can_access(0)); + EXPECT_TRUE(buffer.can_access(7)); + EXPECT_TRUE(buffer.can_access(9)); + EXPECT_FALSE(buffer.can_access(10)); + EXPECT_FALSE(buffer.can_access(0xFFFFFFFF)); +} + +TEST_F(BufferViewTest, LocalRegion) { + ConstBufferView view(bytes_.data(), bytes_.size()); + + BufferRegion region = view.local_region(); + EXPECT_EQ(0U, region.offset); + EXPECT_EQ(bytes_.size(), region.size); +} + +TEST_F(BufferViewTest, Covers) { + EXPECT_FALSE(ConstBufferView().covers({0, 0})); + EXPECT_FALSE(ConstBufferView().covers({0, 1})); + + ConstBufferView view(bytes_.data(), bytes_.size()); + + EXPECT_TRUE(view.covers({0, 0})); + EXPECT_TRUE(view.covers({0, 1})); + EXPECT_TRUE(view.covers({0, bytes_.size()})); + EXPECT_FALSE(view.covers({0, bytes_.size() + 1})); + EXPECT_FALSE(view.covers({1, bytes_.size()})); + + EXPECT_TRUE(view.covers({bytes_.size() - 1, 0})); + EXPECT_TRUE(view.covers({bytes_.size() - 1, 1})); + EXPECT_FALSE(view.covers({bytes_.size() - 1, 2})); + EXPECT_FALSE(view.covers({bytes_.size(), 0})); + EXPECT_FALSE(view.covers({bytes_.size(), 1})); + + EXPECT_FALSE(view.covers({1, size_t(-1)})); + EXPECT_FALSE(view.covers({size_t(-1), 1})); + EXPECT_FALSE(view.covers({size_t(-1), size_t(-1)})); +} + +TEST_F(BufferViewTest, Equals) { + // Almost identical to |bytes_|, except at 2 places: v v + std::vector bytes2 = ParseHexString("10 32 54 76 98 AB CD FE 10 00"); + ConstBufferView view1(bytes_.data(), bytes_.size()); + ConstBufferView view2(&bytes2[0], bytes2.size()); + + EXPECT_TRUE(view1.equals(view1)); + EXPECT_TRUE(view2.equals(view2)); + EXPECT_FALSE(view1.equals(view2)); + EXPECT_FALSE(view2.equals(view1)); + + EXPECT_TRUE((view1[{0, 0}]).equals(view2[{0, 0}])); + EXPECT_TRUE((view1[{0, 0}]).equals(view2[{5, 0}])); + EXPECT_TRUE((view1[{0, 5}]).equals(view2[{0, 5}])); + EXPECT_FALSE((view1[{0, 6}]).equals(view2[{0, 6}])); + EXPECT_FALSE((view1[{0, 7}]).equals(view1[{0, 6}])); + EXPECT_TRUE((view1[{5, 3}]).equals(view1[{5, 3}])); + EXPECT_FALSE((view1[{5, 1}]).equals(view1[{5, 3}])); + EXPECT_TRUE((view2[{0, 1}]).equals(view2[{8, 1}])); + EXPECT_FALSE((view2[{1, 1}]).equals(view2[{8, 1}])); +} + +TEST_F(BufferViewTest, AlignOn) { + using size_type = ConstBufferView::size_type; + ConstBufferView image(bytes_.data(), bytes_.size()); + ConstBufferView view = image; + ASSERT_EQ(10U, view.size()); + + auto get_pos = [&image, &view]() -> size_type { + EXPECT_TRUE(view.begin() >= image.begin()); // Iterator compare. + return static_cast(view.begin() - image.begin()); + }; + + EXPECT_EQ(0U, get_pos()); + view.remove_prefix(1U); + EXPECT_EQ(1U, get_pos()); + view.remove_prefix(4U); + EXPECT_EQ(5U, get_pos()); + + // Align. + EXPECT_TRUE(view.AlignOn(image, 1U)); // Trival case. + EXPECT_EQ(5U, get_pos()); + + EXPECT_TRUE(view.AlignOn(image, 2U)); + EXPECT_EQ(6U, get_pos()); + EXPECT_TRUE(view.AlignOn(image, 2U)); + EXPECT_EQ(6U, get_pos()); + + EXPECT_TRUE(view.AlignOn(image, 4U)); + EXPECT_EQ(8U, get_pos()); + EXPECT_TRUE(view.AlignOn(image, 2U)); + EXPECT_EQ(8U, get_pos()); + + view.remove_prefix(1U); + EXPECT_EQ(9U, get_pos()); + + // Pos is at 9, align to 4 would yield 12, but size is 10, so this fails. + EXPECT_FALSE(view.AlignOn(image, 4U)); + EXPECT_EQ(9U, get_pos()); + EXPECT_TRUE(view.AlignOn(image, 2U)); + EXPECT_EQ(10U, get_pos()); +} + +} // namespace zucchini diff --git a/crc32.cc b/crc32.cc new file mode 100644 index 0000000..8a40296 --- /dev/null +++ b/crc32.cc @@ -0,0 +1,43 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/crc32.h" + +#include + +#include "base/logging.h" + +namespace zucchini { + +namespace { + +std::array MakeCrc32Table() { + constexpr uint32_t kCrc32Poly = 0xEDB88320; + + std::array crc32Table; + for (uint32_t i = 0; i < 256; ++i) { + uint32_t r = i; + for (int j = 0; j < 8; ++j) + r = (r >> 1) ^ (kCrc32Poly & ~((r & 1) - 1)); + crc32Table[i] = r; + } + return crc32Table; +} + +} // namespace + +// Minimalistic CRC-32 implementation for Zucchini usage. Adapted from LZMA SDK +// (found at third_party/lzma_sdk/7zCrc.c), which is public domain. +uint32_t CalculateCrc32(const uint8_t* first, const uint8_t* last) { + DCHECK_GE(last, first); + + static const std::array kCrc32Table = MakeCrc32Table(); + + uint32_t ret = 0xFFFFFFFF; + for (; first != last; ++first) + ret = kCrc32Table[(ret ^ *first) & 0xFF] ^ (ret >> 8); + return ret ^ 0xFFFFFFFF; +} + +} // namespace zucchini diff --git a/crc32.h b/crc32.h new file mode 100644 index 0000000..c729f5b --- /dev/null +++ b/crc32.h @@ -0,0 +1,17 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_CRC32_H_ +#define COMPONENTS_ZUCCHINI_CRC32_H_ + +#include + +namespace zucchini { + +// Calculates CRC-32 of the given range [|first|, |last|). +uint32_t CalculateCrc32(const uint8_t* first, const uint8_t* last); + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_CRC32_H_ diff --git a/crc32_unittest.cc b/crc32_unittest.cc new file mode 100644 index 0000000..5ec85a8 --- /dev/null +++ b/crc32_unittest.cc @@ -0,0 +1,47 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/crc32.h" + +#include + +#include + +#include "base/test/gtest_util.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +constexpr uint8_t bytes[] = {0x10, 0x32, 0x54, 0x76, 0x98, + 0xBA, 0xDC, 0xFE, 0x10, 0x00}; + +TEST(Crc32Test, All) { + // Results can be verified with any CRC-32 calculator found online. + + // Empty region. + EXPECT_EQ(0x00000000U, CalculateCrc32(std::begin(bytes), std::begin(bytes))); + + // Single byte. + EXPECT_EQ(0xCFB5FFE9U, + CalculateCrc32(std::begin(bytes), std::begin(bytes) + 1)); + + // Same byte (0x10) appearing at different location. + EXPECT_EQ(0xCFB5FFE9U, + CalculateCrc32(std::begin(bytes) + 8, std::begin(bytes) + 9)); + + // Single byte of 0. + EXPECT_EQ(0xD202EF8DU, + CalculateCrc32(std::begin(bytes) + 9, std::end(bytes))); + + // Whole region. + EXPECT_EQ(0xA86FD7D6U, CalculateCrc32(std::begin(bytes), std::end(bytes))); + + // Whole region excluding 0 at end. + EXPECT_EQ(0x0762F38BU, + CalculateCrc32(std::begin(bytes), std::begin(bytes) + 9)); + + EXPECT_DCHECK_DEATH(CalculateCrc32(std::begin(bytes) + 1, std::begin(bytes))); +} + +} // namespace zucchini diff --git a/disassembler.cc b/disassembler.cc new file mode 100644 index 0000000..18527a7 --- /dev/null +++ b/disassembler.cc @@ -0,0 +1,36 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/disassembler.h" + +#include "base/logging.h" + +namespace zucchini { + +std::unique_ptr ReferenceGroup::GetReader( + offset_t lower, + offset_t upper, + Disassembler* disasm) const { + DCHECK_LE(lower, upper); + DCHECK_LE(upper, disasm->size()); + return (disasm->*reader_factory_)(lower, upper); +} + +std::unique_ptr ReferenceGroup::GetReader( + Disassembler* disasm) const { + return (disasm->*reader_factory_)(0, static_cast(disasm->size())); +} + +std::unique_ptr ReferenceGroup::GetWriter( + MutableBufferView image, + Disassembler* disasm) const { + DCHECK_EQ(image.begin(), disasm->GetImage().begin()); + DCHECK_EQ(image.size(), disasm->size()); + return (disasm->*writer_factory_)(image); +} + +Disassembler::Disassembler() = default; +Disassembler::~Disassembler() = default; + +} // namespace zucchini diff --git a/disassembler.h b/disassembler.h new file mode 100644 index 0000000..8d41eaa --- /dev/null +++ b/disassembler.h @@ -0,0 +1,133 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_DISASSEMBLER_H_ +#define COMPONENTS_ZUCCHINI_DISASSEMBLER_H_ + +#include + +#include +#include +#include + +#include "base/macros.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/image_utils.h" + +namespace zucchini { + +class Disassembler; + +// A ReferenceGroup is associated with a specific |type| and has convenience +// methods to obtain readers and writers for that type. A ReferenceGroup does +// not store references; it is a lightweight class that communicates with the +// disassembler to operate on them. +class ReferenceGroup { + public: + // Member function pointer used to obtain a ReferenceReader. + using ReaderFactory = std::unique_ptr ( + Disassembler::*)(offset_t lower, offset_t upper); + + // Member function pointer used to obtain a ReferenceWriter. + using WriterFactory = std::unique_ptr (Disassembler::*)( + MutableBufferView image); + + ReferenceGroup() = default; + + // RefinedGeneratorFactory and RefinedReceptorFactory don't have to be + // identical to GeneratorFactory and ReceptorFactory, but they must be + // convertible. As a result, they can be pointer to member function of a + // derived Disassembler. + template + ReferenceGroup(ReferenceTypeTraits traits, + RefinedReaderFactory reader_factory, + RefinedWriterFactory writer_factory) + : traits_(traits), + reader_factory_(static_cast(reader_factory)), + writer_factory_(static_cast(writer_factory)) {} + + // Returns a reader for all references in the binary. + // Invalidates any other writer or reader previously obtained for |disasm|. + std::unique_ptr GetReader(Disassembler* disasm) const; + + // Returns a reader for references whose bytes are entirely contained in + // |[lower, upper)|. + // Invalidates any other writer or reader previously obtained for |disasm|. + std::unique_ptr GetReader(offset_t lower, + offset_t upper, + Disassembler* disasm) const; + + // Returns a writer for references in |image|, assuming that |image| was the + // same one initially parsed by |disasm|. + // Invalidates any other writer or reader previously obtained for |disasm|. + std::unique_ptr GetWriter(MutableBufferView image, + Disassembler* disasm) const; + + // Returns traits describing the reference type. + const ReferenceTypeTraits& traits() const { return traits_; } + + // Shorthand for traits().width. + offset_t width() const { return traits().width; } + + // Shorthand for traits().type_tag. + TypeTag type_tag() const { return traits().type_tag; } + + // Shorthand for traits().pool_tag. + PoolTag pool_tag() const { return traits().pool_tag; } + + private: + ReferenceTypeTraits traits_; + ReaderFactory reader_factory_ = nullptr; + WriterFactory writer_factory_ = nullptr; +}; + +// A Disassembler is used to encapsulate architecture specific operations, to: +// - Describe types of references found in the architecture using traits. +// - Extract references contained in an image file. +// - Correct target for some references. +class Disassembler { + public: + // Attempts to parse |image| and create an architecture-specifc Disassembler, + // as determined by DIS, which is inherited from Disassembler. Returns an + // instance of DIS if successful, and null otherwise. + template + static std::unique_ptr Make(ConstBufferView image) { + auto disasm = std::make_unique(); + if (!disasm->Parse(image)) + return nullptr; + return disasm; + } + + virtual ~Disassembler(); + + // Returns the type of executable handled by the Disassembler. + virtual ExecutableType GetExeType() const = 0; + + // Returns a more detailed description of the executable type. + virtual std::string GetExeTypeString() const = 0; + + // Creates and returns a vector that contains all groups of references. + // Groups must be aggregated by pool. + virtual std::vector MakeReferenceGroups() const = 0; + + ConstBufferView GetImage() const { return image_; } + size_t size() const { return image_.size(); } + + protected: + Disassembler(); + + // Parses |image| and initializes internal states. Returns true on success. + // This must be called once and before any other operation. + virtual bool Parse(ConstBufferView image) = 0; + + // Raw image data. After Parse(), a Disassembler should shrink this to contain + // only the portion containing the executable file it recognizes. + ConstBufferView image_; + + DISALLOW_COPY_AND_ASSIGN(Disassembler); +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_DISASSEMBLER_H_ diff --git a/disassembler_no_op.cc b/disassembler_no_op.cc new file mode 100644 index 0000000..69d0eb8 --- /dev/null +++ b/disassembler_no_op.cc @@ -0,0 +1,28 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/disassembler_no_op.h" + +namespace zucchini { + +DisassemblerNoOp::DisassemblerNoOp() = default; +DisassemblerNoOp::~DisassemblerNoOp() = default; + +ExecutableType DisassemblerNoOp::GetExeType() const { + return kExeTypeNoOp; +} + +std::string DisassemblerNoOp::GetExeTypeString() const { + return "(Unknown)"; +} + +std::vector DisassemblerNoOp::MakeReferenceGroups() const { + return std::vector(); +} + +bool DisassemblerNoOp::Parse(ConstBufferView image) { + return true; +} + +} // namespace zucchini diff --git a/disassembler_no_op.h b/disassembler_no_op.h new file mode 100644 index 0000000..280e8df --- /dev/null +++ b/disassembler_no_op.h @@ -0,0 +1,40 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_DISASSEMBLER_NO_OP_H_ +#define COMPONENTS_ZUCCHINI_DISASSEMBLER_NO_OP_H_ + +#include +#include +#include + +#include "base/macros.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/disassembler.h" +#include "components/zucchini/image_utils.h" + +namespace zucchini { + +// This disassembler works on any file and does not look for reference. +class DisassemblerNoOp : public Disassembler { + public: + DisassemblerNoOp(); + ~DisassemblerNoOp() override; + + // Disassembler: + ExecutableType GetExeType() const override; + std::string GetExeTypeString() const override; + std::vector MakeReferenceGroups() const override; + + private: + friend Disassembler; + + bool Parse(ConstBufferView image) override; + + DISALLOW_COPY_AND_ASSIGN(DisassemblerNoOp); +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_DISASSEMBLER_NO_OP_H_ diff --git a/disassembler_win32.cc b/disassembler_win32.cc new file mode 100644 index 0000000..5bdc503 --- /dev/null +++ b/disassembler_win32.cc @@ -0,0 +1,392 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/disassembler_win32.h" + +#include + +#include + +#include "base/logging.h" +#include "base/numerics/safe_conversions.h" +#include "components/zucchini/abs32_utils.h" +#include "components/zucchini/algorithm.h" +#include "components/zucchini/buffer_source.h" +#include "components/zucchini/rel32_finder.h" +#include "components/zucchini/rel32_utils.h" +#include "components/zucchini/reloc_utils.h" + +namespace zucchini { + +namespace { + +// Decides whether |image| points to a Win32 PE file. If this is a possibility, +// assigns |source| to enable further parsing, and returns true. Otherwise +// leaves |source| at an undefined state and returns false. +template +bool ReadWin32Header(ConstBufferView image, BufferSource* source) { + *source = BufferSource(image); + + // Check "MZ" magic of DOS header. + if (!source->CheckNextBytes({'M', 'Z'})) + return false; + + const auto* dos_header = source->GetPointer(); + if (!dos_header || (dos_header->e_lfanew & 7) != 0) + return false; + + // Offset to PE header is in DOS header. + *source = std::move(BufferSource(image).Skip(dos_header->e_lfanew)); + // Check 'PE\0\0' magic from PE header. + if (!source->ConsumeBytes({'P', 'E', 0, 0})) + return false; + + return true; +} + +template +const pe::ImageDataDirectory* ReadDataDirectory( + const typename Traits::ImageOptionalHeader* optional_header, + size_t index) { + if (index >= optional_header->number_of_rva_and_sizes) + return nullptr; + return &optional_header->data_directory[index]; +} + +// Decides whether |section| (assumed value) is a section that contains code. +template +bool IsWin32CodeSection(const pe::ImageSectionHeader& section) { + return (section.characteristics & kCodeCharacteristics) == + kCodeCharacteristics; +} + +} // namespace + +/******** Win32X86Traits ********/ + +// static +constexpr Bitness Win32X86Traits::kBitness; +constexpr ExecutableType Win32X86Traits::kExeType; +const char Win32X86Traits::kExeTypeString[] = "Windows PE x86"; + +/******** Win32X64Traits ********/ + +// static +constexpr Bitness Win32X64Traits::kBitness; +constexpr ExecutableType Win32X64Traits::kExeType; +const char Win32X64Traits::kExeTypeString[] = "Windows PE x64"; + +/******** DisassemblerWin32 ********/ + +// static. +template +bool DisassemblerWin32::QuickDetect(ConstBufferView image) { + BufferSource source; + return ReadWin32Header(image, &source); +} + +template +DisassemblerWin32::DisassemblerWin32() = default; + +template +DisassemblerWin32::~DisassemblerWin32() = default; + +template +ExecutableType DisassemblerWin32::GetExeType() const { + return Traits::kExeType; +} + +template +std::string DisassemblerWin32::GetExeTypeString() const { + return Traits::kExeTypeString; +} + +template +std::vector DisassemblerWin32::MakeReferenceGroups() + const { + return { + {ReferenceTypeTraits{2, TypeTag(kReloc), PoolTag(kReloc)}, + &DisassemblerWin32::MakeReadRelocs, &DisassemblerWin32::MakeWriteRelocs}, + {ReferenceTypeTraits{Traits::kVAWidth, TypeTag(kAbs32), PoolTag(kAbs32)}, + &DisassemblerWin32::MakeReadAbs32, &DisassemblerWin32::MakeWriteAbs32}, + {ReferenceTypeTraits{4, TypeTag(kRel32), PoolTag(kRel32)}, + &DisassemblerWin32::MakeReadRel32, &DisassemblerWin32::MakeWriteRel32}, + }; +} + +template +std::unique_ptr DisassemblerWin32::MakeReadRelocs( + offset_t lo, + offset_t hi) { + ParseAndStoreRelocBlocks(); + + RelocRvaReaderWin32 reloc_rva_reader(image_, reloc_region_, + reloc_block_offsets_, lo, hi); + CHECK_GE(image_.size(), Traits::kVAWidth); + offset_t offset_bound = + base::checked_cast(image_.size() - Traits::kVAWidth + 1); + return std::make_unique(std::move(reloc_rva_reader), + Traits::kRelocType, offset_bound, + translator_); +} + +template +std::unique_ptr DisassemblerWin32::MakeReadAbs32( + offset_t lo, + offset_t hi) { + ParseAndStoreAbs32(); + Abs32RvaExtractorWin32 abs_rva_extractor( + image_, {Traits::kBitness, image_base_}, abs32_locations_, lo, hi); + return std::make_unique(std::move(abs_rva_extractor), + translator_); +} + +template +std::unique_ptr DisassemblerWin32::MakeReadRel32( + offset_t lo, + offset_t hi) { + ParseAndStoreRel32(); + return std::make_unique(image_, lo, hi, &rel32_locations_, + translator_); +} + +template +std::unique_ptr DisassemblerWin32::MakeWriteRelocs( + MutableBufferView image) { + ParseAndStoreRelocBlocks(); + return std::make_unique(Traits::kRelocType, image, + reloc_region_, reloc_block_offsets_, + translator_); +} + +template +std::unique_ptr DisassemblerWin32::MakeWriteAbs32( + MutableBufferView image) { + return std::make_unique( + image, AbsoluteAddress(Traits::kBitness, image_base_), translator_); +} + +template +std::unique_ptr DisassemblerWin32::MakeWriteRel32( + MutableBufferView image) { + return std::make_unique(image, translator_); +} + +template +bool DisassemblerWin32::Parse(ConstBufferView image) { + image_ = image; + return ParseHeader(); +} + +template +bool DisassemblerWin32::ParseHeader() { + BufferSource source; + + if (!ReadWin32Header(image_, &source)) + return false; + + auto* coff_header = source.GetPointer(); + if (!coff_header || + coff_header->size_of_optional_header < + offsetof(typename Traits::ImageOptionalHeader, data_directory)) { + return false; + } + + auto* optional_header = + source.GetPointer(); + if (!optional_header || optional_header->magic != Traits::kMagic) + return false; + + const size_t kDataDirBase = + offsetof(typename Traits::ImageOptionalHeader, data_directory); + size_t size_of_optional_header = coff_header->size_of_optional_header; + if (size_of_optional_header < kDataDirBase) + return false; + + const size_t data_dir_bound = + (size_of_optional_header - kDataDirBase) / sizeof(pe::ImageDataDirectory); + if (optional_header->number_of_rva_and_sizes > data_dir_bound) + return false; + + base_relocation_table_ = ReadDataDirectory( + optional_header, pe::kIndexOfBaseRelocationTable); + if (!base_relocation_table_) + return false; + + image_base_ = optional_header->image_base; + + // |optional_header->size_of_image| is the size of the image when loaded into + // memory, and not the actual size on disk. + rva_t rva_bound = optional_header->size_of_image; + if (rva_bound >= kRvaBound) + return false; + + // An exclusive upper bound of all offsets used in the image. This gets + // updated as sections get visited. + offset_t offset_bound = + base::checked_cast(source.begin() - image_.begin()); + + // Extract |sections_|. + size_t sections_count = coff_header->number_of_sections; + auto* sections_array = + source.GetArray(sections_count); + if (!sections_array) + return false; + sections_.assign(sections_array, sections_array + sections_count); + + // Prepare |units| for offset-RVA translation. + std::vector units; + units.reserve(sections_count); + + // Visit each section, validate, and add address translation data to |units|. + bool has_text_section = false; + decltype(pe::ImageSectionHeader::virtual_address) prev_virtual_address = 0; + for (size_t i = 0; i < sections_count; ++i) { + const pe::ImageSectionHeader& section = sections_[i]; + // Apply strict checks on section bounds. + if (!image_.covers( + {section.file_offset_of_raw_data, section.size_of_raw_data})) { + return false; + } + if (!RangeIsBounded(section.virtual_address, section.virtual_size, + rva_bound)) { + return false; + } + + // PE sections should be sorted by RVAs. For robustness, we don't rely on + // this, so even if unsorted we don't care. Output warning though. + if (prev_virtual_address > section.virtual_address) + LOG(WARNING) << "RVA anomaly found for Section " << i; + prev_virtual_address = section.virtual_address; + + // Add |section| data for offset-RVA translation. + units.push_back({section.file_offset_of_raw_data, section.size_of_raw_data, + section.virtual_address, section.virtual_size}); + + offset_t end_offset = + section.file_offset_of_raw_data + section.size_of_raw_data; + offset_bound = std::max(end_offset, offset_bound); + if (IsWin32CodeSection(section)) + has_text_section = true; + } + + if (offset_bound > image_.size()) + return false; + if (!has_text_section) + return false; + + // Initialize |translator_| for offset-RVA translations. Any inconsistency + // (e.g., 2 offsets correspond to the same RVA) would invalidate the PE file. + if (translator_.Initialize(std::move(units)) != AddressTranslator::kSuccess) + return false; + + // Resize |image_| to include only contents claimed by sections. Note that + // this may miss digital signatures at end of PE files, but for patching this + // is of minor concern. + image_.shrink(offset_bound); + + return true; +} + +template +bool DisassemblerWin32::ParseAndStoreRelocBlocks() { + if (has_parsed_relocs_) + return true; + has_parsed_relocs_ = true; + DCHECK(reloc_block_offsets_.empty()); + + offset_t relocs_offset = + translator_.RvaToOffset(base_relocation_table_->virtual_address); + size_t relocs_size = base_relocation_table_->size; + reloc_region_ = {relocs_offset, relocs_size}; + // Reject bogus relocs. Note that empty relocs are allowed! + if (!image_.covers(reloc_region_)) + return false; + + // Precompute offsets of all reloc blocks. + return RelocRvaReaderWin32::FindRelocBlocks(image_, reloc_region_, + &reloc_block_offsets_); +} + +// TODO(huangs): Print warning if too few abs32 references are found. +// Empirically, file size / # relocs is < 100, so take 200 as the +// threshold for warning. +template +bool DisassemblerWin32::ParseAndStoreAbs32() { + if (has_parsed_abs32_) + return true; + has_parsed_abs32_ = true; + + ParseAndStoreRelocBlocks(); + + std::unique_ptr relocs = MakeReadRelocs(0, offset_t(size())); + for (auto ref = relocs->GetNext(); ref.has_value(); ref = relocs->GetNext()) + abs32_locations_.push_back(ref->target); + + abs32_locations_.shrink_to_fit(); + std::sort(abs32_locations_.begin(), abs32_locations_.end()); + + // Abs32 reference bodies must not overlap. If found, simply remove them. + size_t num_removed = + RemoveOverlappingAbs32Locations(Traits::kBitness, &abs32_locations_); + LOG_IF(WARNING, num_removed) << "Found and removed " << num_removed + << " abs32 locations with overlapping bodies."; + return true; +} + +template +bool DisassemblerWin32::ParseAndStoreRel32() { + if (has_parsed_rel32_) + return true; + has_parsed_rel32_ = true; + + ParseAndStoreAbs32(); + + AddressTranslator::OffsetToRvaCache location_offset_to_rva(translator_); + AddressTranslator::RvaToOffsetCache target_rva_checker(translator_); + + for (const pe::ImageSectionHeader& section : sections_) { + if (!IsWin32CodeSection(section)) + continue; + + rva_t start_rva = section.virtual_address; + rva_t end_rva = start_rva + section.virtual_size; + + ConstBufferView region = + image_[{section.file_offset_of_raw_data, section.size_of_raw_data}]; + Abs32GapFinder gap_finder(image_, region, abs32_locations_, + Traits::kVAWidth); + typename Traits::RelFinder finder(image_); + // Iterate over gaps between abs32 references, to avoid collision. + for (auto gap = gap_finder.GetNext(); gap.has_value(); + gap = gap_finder.GetNext()) { + finder.Reset(gap.value()); + // Iterate over heuristically detected rel32 references, validate, and add + // to |rel32_locations_|. + for (auto rel32 = finder.GetNext(); rel32.has_value(); + rel32 = finder.GetNext()) { + offset_t rel32_offset = offset_t(rel32->location - image_.begin()); + rva_t rel32_rva = location_offset_to_rva.Convert(rel32_offset); + rva_t target_rva = rel32_rva + 4 + image_.read(rel32_offset); + if (target_rva_checker.IsValid(target_rva) && + (rel32->can_point_outside_section || + (start_rva <= target_rva && target_rva < end_rva))) { + finder.Accept(); + rel32_locations_.push_back(rel32_offset); + } + } + } + } + rel32_locations_.shrink_to_fit(); + // |sections_| entries are usually sorted by offset, but there's no guarantee. + // So sort explicitly, to be sure. + std::sort(rel32_locations_.begin(), rel32_locations_.end()); + return true; +} + +// Explicit instantiation for supported classes. +template class DisassemblerWin32; +template class DisassemblerWin32; + +} // namespace zucchini diff --git a/disassembler_win32.h b/disassembler_win32.h new file mode 100644 index 0000000..8e410ee --- /dev/null +++ b/disassembler_win32.h @@ -0,0 +1,129 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_DISASSEMBLER_WIN32_H_ +#define COMPONENTS_ZUCCHINI_DISASSEMBLER_WIN32_H_ + +#include +#include + +#include +#include +#include +#include + +#include "base/macros.h" +#include "components/zucchini/address_translator.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/disassembler.h" +#include "components/zucchini/image_utils.h" +#include "components/zucchini/type_win_pe.h" + +namespace zucchini { + +class Rel32FinderX86; +class Rel32FinderX64; + +struct Win32X86Traits { + static constexpr Bitness kBitness = kBit32; + static constexpr ExecutableType kExeType = kExeTypeWin32X86; + enum : uint16_t { kMagic = 0x10B }; + enum : uint16_t { kRelocType = 3 }; + enum : offset_t { kVAWidth = 4 }; + static const char kExeTypeString[]; + + using ImageOptionalHeader = pe::ImageOptionalHeader; + using RelFinder = Rel32FinderX86; + using Address = uint32_t; +}; + +struct Win32X64Traits { + static constexpr Bitness kBitness = kBit64; + static constexpr ExecutableType kExeType = kExeTypeWin32X64; + enum : uint16_t { kMagic = 0x20B }; + enum : uint16_t { kRelocType = 10 }; + enum : offset_t { kVAWidth = 8 }; + static const char kExeTypeString[]; + + using ImageOptionalHeader = pe::ImageOptionalHeader64; + using RelFinder = Rel32FinderX64; + using Address = uint64_t; +}; + +template +class DisassemblerWin32 : public Disassembler { + public: + enum ReferenceType : uint8_t { kReloc, kAbs32, kRel32, kTypeCount }; + + // Applies quick checks to determine whether |image| *may* point to the start + // of an executable. Returns true iff the check passes. + static bool QuickDetect(ConstBufferView image); + + DisassemblerWin32(); + ~DisassemblerWin32() override; + + // Disassembler: + ExecutableType GetExeType() const override; + std::string GetExeTypeString() const override; + std::vector MakeReferenceGroups() const override; + + // Functions that return reader / writer for references. + std::unique_ptr MakeReadRelocs(offset_t lo, offset_t hi); + std::unique_ptr MakeReadAbs32(offset_t lo, offset_t hi); + std::unique_ptr MakeReadRel32(offset_t lo, offset_t hi); + std::unique_ptr MakeWriteRelocs(MutableBufferView image); + std::unique_ptr MakeWriteAbs32(MutableBufferView image); + std::unique_ptr MakeWriteRel32(MutableBufferView image); + + private: + friend Disassembler; + + // Disassembler: + bool Parse(ConstBufferView image) override; + + // Parses the file header. Returns true iff successful. + bool ParseHeader(); + + // Parsers to extract references. These are lazily called, and return whether + // parsing was successful (failures are non-fatal). + bool ParseAndStoreRelocBlocks(); + bool ParseAndStoreAbs32(); + bool ParseAndStoreRel32(); + + // In-memory copy of sections. + std::vector sections_; + + // Image base address to translate between RVA and VA. + typename Traits::Address image_base_ = 0; + + // Pointer to data Directory entry of the relocation table. + const pe::ImageDataDirectory* base_relocation_table_ = nullptr; + + // Translator between offsets and RVAs. + AddressTranslator translator_; + + // Reference storage. + BufferRegion reloc_region_; + std::vector reloc_block_offsets_; + offset_t reloc_end_ = 0; + std::vector abs32_locations_; + std::vector rel32_locations_; + + // Initialization states of reference storage, used for lazy initialization. + // TODO(huangs): Investigate whether lazy initialization is useful for memory + // reduction. This is a carryover from Courgette. To be sure we should run + // experiment after Zucchini is able to do ensemble patching. + bool has_parsed_relocs_ = false; + bool has_parsed_abs32_ = false; + bool has_parsed_rel32_ = false; + + DISALLOW_COPY_AND_ASSIGN(DisassemblerWin32); +}; + +using DisassemblerWin32X86 = DisassemblerWin32; +using DisassemblerWin32X64 = DisassemblerWin32; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_DISASSEMBLER_WIN32_H_ diff --git a/element_detection.cc b/element_detection.cc new file mode 100644 index 0000000..d6bba5f --- /dev/null +++ b/element_detection.cc @@ -0,0 +1,84 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/element_detection.h" + +#include + +#include "base/logging.h" +#include "components/zucchini/disassembler.h" +#include "components/zucchini/disassembler_no_op.h" +#include "components/zucchini/disassembler_win32.h" + +namespace zucchini { + +namespace { + +// Impose a minimal program size to eliminate pathological cases. +constexpr size_t kMinProgramSize = 16; + +} // namespace + +/******** Utility Functions ********/ + +std::unique_ptr MakeDisassemblerWithoutFallback( + ConstBufferView image) { + if (DisassemblerWin32X86::QuickDetect(image)) { + auto disasm = Disassembler::Make(image); + if (disasm && disasm->size() >= kMinProgramSize) + return disasm; + } + + if (DisassemblerWin32X64::QuickDetect(image)) { + auto disasm = Disassembler::Make(image); + if (disasm && disasm->size() >= kMinProgramSize) + return disasm; + } + + return nullptr; +} + +std::unique_ptr MakeDisassemblerOfType(ConstBufferView image, + ExecutableType exe_type) { + switch (exe_type) { + case kExeTypeWin32X86: + return Disassembler::Make(image); + case kExeTypeWin32X64: + return Disassembler::Make(image); + case kExeTypeNoOp: + return Disassembler::Make(image); + default: + return nullptr; + } +} + +base::Optional DetectElementFromDisassembler(ConstBufferView image) { + std::unique_ptr disasm = MakeDisassemblerWithoutFallback(image); + if (disasm) + return Element({0, disasm->size()}, disasm->GetExeType()); + return base::nullopt; +} + +/******** ProgramScanner ********/ + +ElementFinder::ElementFinder(ConstBufferView image, ElementDetector&& detector) + : image_(image), detector_(std::move(detector)) {} + +ElementFinder::~ElementFinder() = default; + +base::Optional ElementFinder::GetNext() { + for (; pos_ < image_.size(); ++pos_) { + ConstBufferView test_image = + ConstBufferView::FromRange(image_.begin() + pos_, image_.end()); + base::Optional element = detector_.Run(test_image); + if (element) { + element->offset += pos_; + pos_ = element->EndOffset(); + return element; + } + } + return base::nullopt; +} + +} // namespace zucchini diff --git a/element_detection.h b/element_detection.h new file mode 100644 index 0000000..f90c033 --- /dev/null +++ b/element_detection.h @@ -0,0 +1,60 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_ELEMENT_DETECTION_H_ +#define COMPONENTS_ZUCCHINI_ELEMENT_DETECTION_H_ + +#include + +#include + +#include "base/callback.h" +#include "base/macros.h" +#include "base/optional.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/image_utils.h" + +namespace zucchini { + +class Disassembler; + +// Attempts to detect an executable located at start of |image|. If found, +// returns the corresponding disassembler. Otherwise returns null. +std::unique_ptr MakeDisassemblerWithoutFallback( + ConstBufferView image); + +// Attempts to create a disassembler corresponding to |exe_type| and initialize +// it with |image|, On failure, returns null. +std::unique_ptr MakeDisassemblerOfType(ConstBufferView image, + ExecutableType exe_type); + +// Attempts to detect an element associated with |image| and returns it, or +// returns nullopt if no element is detected. +using ElementDetector = + base::RepeatingCallback(ConstBufferView image)>; + +// Implementation of ElementDetector using disassemblers. +base::Optional DetectElementFromDisassembler(ConstBufferView image); + +// A class to scan through an image and iteratively detect elements. +class ElementFinder { + public: + ElementFinder(ConstBufferView image, ElementDetector&& detector); + ~ElementFinder(); + + // Scans for the next executable using |detector|. Returns the next element + // found, or nullopt if no more element can be found. + base::Optional GetNext(); + + private: + ConstBufferView image_; + ElementDetector detector_; + offset_t pos_ = 0; + + DISALLOW_COPY_AND_ASSIGN(ElementFinder); +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_ELEMENT_DETECTION_H_ diff --git a/element_detection_unittest.cc b/element_detection_unittest.cc new file mode 100644 index 0000000..2200c0b --- /dev/null +++ b/element_detection_unittest.cc @@ -0,0 +1,78 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/element_detection.h" + +#include + +#include "base/bind.h" +#include "components/zucchini/buffer_view.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +namespace { + +using ElementVector = std::vector; + +} // namespace + +TEST(ElementDetectionTest, ElementFinderEmpty) { + std::vector buffer(10, 0); + ElementFinder finder( + ConstBufferView(buffer.data(), buffer.size()), + base::BindRepeating([](ConstBufferView image) -> base::Optional { + return base::nullopt; + })); + EXPECT_EQ(base::nullopt, finder.GetNext()); +} + +ElementVector TestElementFinder(std::vector buffer) { + ConstBufferView image(buffer.data(), buffer.size()); + + ElementFinder finder( + image, + base::BindRepeating( + [](ConstBufferView image, + ConstBufferView region) -> base::Optional { + EXPECT_GE(region.begin(), image.begin()); + EXPECT_LE(region.end(), image.end()); + EXPECT_GE(region.size(), 0U); + + if (region[0] != 0) { + offset_t length = 1; + while (length < region.size() && region[length] == region[0]) + ++length; + return Element{{0, length}, + static_cast(region[0])}; + } + return base::nullopt; + }, + image)); + std::vector elements; + for (auto element = finder.GetNext(); element; element = finder.GetNext()) { + elements.push_back(*element); + } + return elements; +} + +TEST(ElementDetectionTest, ElementFinder) { + EXPECT_EQ(ElementVector(), TestElementFinder({})); + EXPECT_EQ(ElementVector(), TestElementFinder({0, 0})); + EXPECT_EQ(ElementVector({{{0, 2}, kExeTypeWin32X86}}), + TestElementFinder({1, 1})); + EXPECT_EQ( + ElementVector({{{0, 2}, kExeTypeWin32X86}, {{2, 2}, kExeTypeWin32X64}}), + TestElementFinder({1, 1, 2, 2})); + EXPECT_EQ(ElementVector({{{1, 2}, kExeTypeWin32X86}}), + TestElementFinder({0, 1, 1, 0})); + EXPECT_EQ( + ElementVector({{{1, 2}, kExeTypeWin32X86}, {{3, 3}, kExeTypeWin32X64}}), + TestElementFinder({0, 1, 1, 2, 2, 2})); + EXPECT_EQ( + ElementVector({{{1, 2}, kExeTypeWin32X86}, {{4, 3}, kExeTypeWin32X64}}), + TestElementFinder({0, 1, 1, 0, 2, 2, 2})); +} + +} // namespace zucchini diff --git a/encoded_view.cc b/encoded_view.cc new file mode 100644 index 0000000..5b55b51 --- /dev/null +++ b/encoded_view.cc @@ -0,0 +1,77 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/encoded_view.h" + +#include +#include + +#include "base/logging.h" + +namespace zucchini { + +EncodedView::EncodedView(const ImageIndex& image_index) + : image_index_(image_index), pool_infos_(image_index.PoolCount()) {} +EncodedView::~EncodedView() = default; + +EncodedView::value_type EncodedView::Projection(offset_t location) const { + DCHECK_LT(location, image_index_.size()); + + // Find out what lies at |location|. + TypeTag type = image_index_.LookupType(location); + + // |location| points into raw data. + if (type == kNoTypeTag) { + // The projection is the identity function on raw content. + return image_index_.GetRawValue(location); + } + + // |location| points into a Reference. + const ReferenceSet& ref_set = image_index_.refs(type); + IndirectReference ref = ref_set.at(location); + DCHECK_GE(location, ref.location); + DCHECK_LT(location, ref.location + ref_set.width()); + + // |location| is not the first byte of the reference. + if (location != ref.location) { + // Trailing bytes of a reference are all projected to the same value. + return kReferencePaddingProjection; + } + + PoolTag pool_tag = ref_set.pool_tag(); + + // Targets with an associated Label will use its Label index in projection. + DCHECK_EQ(image_index_.pool(pool_tag).size(), + pool_infos_[pool_tag.value()].labels.size()); + uint32_t label = pool_infos_[pool_tag.value()].labels[ref.target_key]; + + // Projection is done on (|target|, |type|), shifted by + // kBaseReferenceProjection to avoid collisions with raw content. + value_type projection = label; + projection *= image_index_.TypeCount(); + projection += type.value(); + return projection + kBaseReferenceProjection; +} + +size_t EncodedView::Cardinality() const { + size_t max_width = 0; + for (const auto& pool_info : pool_infos_) + max_width = std::max(max_width, pool_info.bound); + return max_width * image_index_.TypeCount() + kBaseReferenceProjection; +} + +void EncodedView::SetLabels(PoolTag pool, + std::vector&& labels, + size_t bound) { + DCHECK_EQ(labels.size(), image_index_.pool(pool).size()); + DCHECK(labels.empty() || *max_element(labels.begin(), labels.end()) < bound); + pool_infos_[pool.value()].labels = std::move(labels); + pool_infos_[pool.value()].bound = bound; +} + +EncodedView::PoolInfo::PoolInfo() = default; +EncodedView::PoolInfo::PoolInfo(PoolInfo&&) = default; +EncodedView::PoolInfo::~PoolInfo() = default; + +} // namespace zucchini diff --git a/encoded_view.h b/encoded_view.h new file mode 100644 index 0000000..7ecf59e --- /dev/null +++ b/encoded_view.h @@ -0,0 +1,182 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_ENCODED_VIEW_H_ +#define COMPONENTS_ZUCCHINI_ENCODED_VIEW_H_ + +#include +#include + +#include +#include + +#include "base/macros.h" +#include "components/zucchini/image_index.h" +#include "components/zucchini/image_utils.h" + +namespace zucchini { + +// Zucchini-gen performs semantics-aware matching: +// - Same-typed reference target in "old" and "new" can be associated. +// Associated targets are assigned an identifier called "label" (and for +// unassociated targets, label = 0). +// - EncodedView maps each offset in "old" and "new" images to a "projected +// value", which can be: +// - Raw byte value (0-255) for non-references. +// - Reference "projected value" (> 256) that depends on target {type, label} +// at each reference's location (byte 0). +// - Reference padding value (256) at the body of each reference (bytes 1+). +// - The projected values for "old" and "new" are used to build the equivalence +// map. + +constexpr size_t kReferencePaddingProjection = 256; +constexpr size_t kBaseReferenceProjection = 257; + +// A Range (providing begin and end iterators) that adapts ImageIndex to make +// image data appear as an Encoded Image, that is encoded data under a higher +// level of abstraction than raw bytes. In particular: +// - First byte of each reference become a projection of its type and label. +// - Subsequent bytes of each reference becomes |kReferencePaddingProjection|. +// - Non-reference raw bytes remain as raw bytes. +class EncodedView { + public: + // RandomAccessIterator whose values are the results of Projection(). + class Iterator { + public: + using iterator_category = std::random_access_iterator_tag; + using value_type = size_t; + using difference_type = ptrdiff_t; + using reference = size_t; + using pointer = size_t*; + + Iterator(const EncodedView& encoded_view, difference_type pos) + : encoded_view_(encoded_view), pos_(pos) {} + + value_type operator*() const { + return encoded_view_.Projection(static_cast(pos_)); + } + + value_type operator[](difference_type n) const { + return encoded_view_.Projection(static_cast(pos_ + n)); + } + + Iterator& operator++() { + ++pos_; + return *this; + } + + Iterator operator++(int) { + Iterator tmp = *this; + ++pos_; + return tmp; + } + + Iterator& operator--() { + --pos_; + return *this; + } + + Iterator operator--(int) { + Iterator tmp = *this; + --pos_; + return tmp; + } + + Iterator& operator+=(difference_type n) { + pos_ += n; + return *this; + } + + Iterator& operator-=(difference_type n) { + pos_ -= n; + return *this; + } + + friend bool operator==(Iterator a, Iterator b) { return a.pos_ == b.pos_; } + + friend bool operator!=(Iterator a, Iterator b) { return !(a == b); } + + friend bool operator<(Iterator a, Iterator b) { return a.pos_ < b.pos_; } + + friend bool operator>(Iterator a, Iterator b) { return b < a; } + + friend bool operator<=(Iterator a, Iterator b) { return !(b < a); } + + friend bool operator>=(Iterator a, Iterator b) { return !(a < b); } + + friend difference_type operator-(Iterator a, Iterator b) { + return a.pos_ - b.pos_; + } + + friend Iterator operator+(Iterator it, difference_type n) { + it += n; + return it; + } + + friend Iterator operator-(Iterator it, difference_type n) { + it -= n; + return it; + } + + private: + const EncodedView& encoded_view_; + difference_type pos_; + }; + + using value_type = size_t; + using size_type = offset_t; + using difference_type = ptrdiff_t; + using const_iterator = Iterator; + + // |image_index| is the annotated image being adapted, and is required to + // remain valid for the lifetime of the object. + explicit EncodedView(const ImageIndex& image_index); + ~EncodedView(); + + // Projects |location| to a scalar value that describes the content at a + // higher level of abstraction. + value_type Projection(offset_t location) const; + + bool IsToken(offset_t location) const { + return image_index_.IsToken(location); + } + + // Returns the cardinality of the projection, i.e., the upper bound on + // values returned by Projection(). + value_type Cardinality() const; + + // Associates |labels| to targets for a given |pool|, replacing previous + // association. Values in |labels| must be smaller than |bound|. + void SetLabels(PoolTag pool, std::vector&& labels, size_t bound); + const ImageIndex& image_index() const { return image_index_; } + + // Range functions. + size_type size() const { return size_type(image_index_.size()); } + const_iterator begin() const { + return const_iterator{*this, difference_type(0)}; + } + const_iterator end() const { + return const_iterator{*this, difference_type(size())}; + } + + private: + struct PoolInfo { + PoolInfo(); + PoolInfo(PoolInfo&&); + ~PoolInfo(); + + // |labels| translates IndirectReference target_key to label. + std::vector labels; + size_t bound = 0; + }; + + const ImageIndex& image_index_; + std::vector pool_infos_; + + DISALLOW_COPY_AND_ASSIGN(EncodedView); +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_ENCODED_VIEW_H_ diff --git a/encoded_view_unittest.cc b/encoded_view_unittest.cc new file mode 100644 index 0000000..96d9dc4 --- /dev/null +++ b/encoded_view_unittest.cc @@ -0,0 +1,202 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/encoded_view.h" + +#include +#include +#include + +#include "components/zucchini/image_index.h" +#include "components/zucchini/test_disassembler.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +namespace { + +constexpr size_t PADDING = kReferencePaddingProjection; + +template +void TestInputIterator(It1 first_expected, + It1 last_expected, + It2 first_input, + It2 last_input) { + while (first_expected != last_expected && first_input != last_input) { + EXPECT_EQ(*first_expected, *first_input); + ++first_expected; + ++first_input; + } + EXPECT_EQ(last_input, first_input); + EXPECT_EQ(last_expected, first_expected); +} + +template +void TestForwardIterator(It1 first_expected, + It1 last_expected, + It2 first_input, + It2 last_input) { + TestInputIterator(first_expected, last_expected, first_input, last_input); + + while (first_expected != last_expected && first_input != last_input) { + EXPECT_EQ(*(first_expected++), *(first_input++)); + } + EXPECT_EQ(last_input, first_input); + EXPECT_EQ(last_expected, first_expected); +} + +template +void TestBidirectionalIterator(It1 first_expected, + It1 last_expected, + It2 first_input, + It2 last_input) { + TestForwardIterator(first_expected, last_expected, first_input, last_input); + + while (first_expected != last_expected && first_input != last_input) { + EXPECT_EQ(*(--last_expected), *(--last_input)); + } + EXPECT_EQ(last_input, first_input); + EXPECT_EQ(last_expected, first_expected); +} + +template +void TestRandomAccessIterator(It1 first_expected, + It1 last_expected, + It2 first_input, + It2 last_input) { + TestBidirectionalIterator(first_expected, last_expected, first_input, + last_input); + + using difference_type = typename std::iterator_traits::difference_type; + + difference_type expected_size = last_expected - first_expected; + difference_type input_size = last_input - first_input; + EXPECT_EQ(expected_size, input_size); + + for (difference_type i = 0; i < expected_size; ++i) { + EXPECT_EQ(*(first_expected + i), *(first_input + i)); + EXPECT_EQ(first_expected[i], first_input[i]); + + EXPECT_EQ(0 < i, first_input < first_input + i); + EXPECT_EQ(0 > i, first_input > first_input + i); + EXPECT_EQ(0 <= i, first_input <= first_input + i); + EXPECT_EQ(0 >= i, first_input >= first_input + i); + + EXPECT_EQ(expected_size < i, last_input < first_input + i); + EXPECT_EQ(expected_size > i, last_input > first_input + i); + EXPECT_EQ(expected_size <= i, last_input <= first_input + i); + EXPECT_EQ(expected_size >= i, last_input >= first_input + i); + + It2 input = first_input; + input += i; + EXPECT_EQ(*input, first_expected[i]); + input -= i; + EXPECT_EQ(first_input, input); + input += i; + + EXPECT_EQ(0 < i, first_input < input); + EXPECT_EQ(0 > i, first_input > input); + EXPECT_EQ(0 <= i, first_input <= input); + EXPECT_EQ(0 >= i, first_input >= input); + + EXPECT_EQ(expected_size < i, last_input < input); + EXPECT_EQ(expected_size > i, last_input > input); + EXPECT_EQ(expected_size <= i, last_input <= input); + EXPECT_EQ(expected_size >= i, last_input >= input); + } +} + +} // namespace + +class EncodedViewTest : public testing::Test { + protected: + EncodedViewTest() + : buffer_(20), + image_index_(ConstBufferView(buffer_.data(), buffer_.size())) { + std::iota(buffer_.begin(), buffer_.end(), 0); + TestDisassembler disasm({2, TypeTag(0), PoolTag(0)}, + {{1, 0}, {8, 1}, {10, 2}}, + {4, TypeTag(1), PoolTag(0)}, {{3, 3}}, + {3, TypeTag(2), PoolTag(1)}, {{12, 4}, {17, 5}}); + image_index_.Initialize(&disasm); + } + + void CheckView(std::vector expected, + const EncodedView& encoded_view) const { + for (offset_t i = 0; i < encoded_view.size(); ++i) { + EXPECT_EQ(expected[i], encoded_view.Projection(i)) << i; + } + TestRandomAccessIterator(expected.begin(), expected.end(), + encoded_view.begin(), encoded_view.end()); + } + + std::vector buffer_; + ImageIndex image_index_; +}; + +TEST_F(EncodedViewTest, Unlabeled) { + EncodedView encoded_view(image_index_); + + encoded_view.SetLabels(PoolTag(0), {0, 0, 0, 0}, 1); + encoded_view.SetLabels(PoolTag(1), {0, 0}, 1); + + std::vector expected = { + 0, // raw + kBaseReferenceProjection + 0 + 0 * 3, // ref 0 + PADDING, + kBaseReferenceProjection + 1 + 0 * 3, // ref 1 + PADDING, + PADDING, + PADDING, + 7, // raw + kBaseReferenceProjection + 0 + 0 * 3, // ref 0 + PADDING, + kBaseReferenceProjection + 0 + 0 * 3, // ref 0 + PADDING, + kBaseReferenceProjection + 2 + 0 * 3, // ref 2 + PADDING, + PADDING, + 15, // raw + 16, + kBaseReferenceProjection + 2 + 0 * 3, // ref 2 + PADDING, + PADDING, + }; + EXPECT_EQ(kBaseReferenceProjection + 3 * 1, encoded_view.Cardinality()); + CheckView(expected, encoded_view); +} + +TEST_F(EncodedViewTest, Labeled) { + EncodedView encoded_view(image_index_); + + encoded_view.SetLabels(PoolTag(0), {0, 2, 1, 2}, 3); + encoded_view.SetLabels(PoolTag(1), {0, 0}, 1); + + std::vector expected = { + 0, // raw + kBaseReferenceProjection + 0 + 0 * 3, // ref 0 + PADDING, + kBaseReferenceProjection + 1 + 2 * 3, // ref 1 + PADDING, + PADDING, + PADDING, + 7, // raw + kBaseReferenceProjection + 0 + 2 * 3, // ref 0 + PADDING, + kBaseReferenceProjection + 0 + 1 * 3, // ref 0 + PADDING, + kBaseReferenceProjection + 2 + 0 * 3, // ref 2 + PADDING, + PADDING, + 15, // raw + 16, + kBaseReferenceProjection + 2 + 0 * 3, // ref 2 + PADDING, + PADDING, + }; + EXPECT_EQ(kBaseReferenceProjection + 3 * 3, encoded_view.Cardinality()); + CheckView(expected, encoded_view); +} + +} // namespace zucchini diff --git a/ensemble_matcher.cc b/ensemble_matcher.cc new file mode 100644 index 0000000..eebbae9 --- /dev/null +++ b/ensemble_matcher.cc @@ -0,0 +1,24 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/ensemble_matcher.h" + +#include + +#include "base/logging.h" +#include "base/strings/stringprintf.h" + +namespace zucchini { + +/******** EnsembleMatcher ********/ + +EnsembleMatcher::EnsembleMatcher() = default; + +EnsembleMatcher::~EnsembleMatcher() = default; + +void EnsembleMatcher::Trim() { + // TODO(huangs): Add MultiDex handling logic when we add DEX support. +} + +} // namespace zucchini diff --git a/ensemble_matcher.h b/ensemble_matcher.h new file mode 100644 index 0000000..bc89883 --- /dev/null +++ b/ensemble_matcher.h @@ -0,0 +1,62 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_ENSEMBLE_MATCHER_H_ +#define COMPONENTS_ZUCCHINI_ENSEMBLE_MATCHER_H_ + +#include + +#include + +#include "base/macros.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/element_detection.h" +#include "components/zucchini/image_utils.h" + +namespace zucchini { + +// A base class for ensemble matching strategies, which identify Elements in a +// "new" and "old" archives, and match each "new" Element to an "old" Element. +// Matched pairs can then be passed to Disassembler for architecture-specific +// patching. Notes: +// - A matched Element pair must have the same ExecutableType. +// - Special case: Exact matches are ignored, since they can be patched directly +// without architecture-specific patching. +// - Multiple "new" Elements may match a common "old" Element. +// - A "new" Element may have no match. This can happen when no viable match +// exists, or when an exact match is skipped. +class EnsembleMatcher { + public: + EnsembleMatcher(); + virtual ~EnsembleMatcher(); + + // Interface to main matching feature. Returns whether match was successful. + // This should be called at most once per instace. + virtual bool RunMatch(ConstBufferView old_image, + ConstBufferView new_image) = 0; + + // Accessors to RunMatch() results. + const std::vector& matches() const { return matches_; } + + size_t num_identical() const { return num_identical_; } + + protected: + // Post-processes |matches_| to remove potentially unfavorable entries. + void Trim(); + + // Storage of matched elements: A list of matched pairs, where the list of + // "new" elements have increasing offsets and don't overlap. May be empty. + std::vector matches_; + + // Number of identical matches found in match candidates. These should be + // excluded from |matches_|. + size_t num_identical_ = 0; + + private: + DISALLOW_COPY_AND_ASSIGN(EnsembleMatcher); +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_ENSEMBLE_MATCHER_H_ diff --git a/equivalence_map.cc b/equivalence_map.cc new file mode 100644 index 0000000..b3181ab --- /dev/null +++ b/equivalence_map.cc @@ -0,0 +1,482 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/equivalence_map.h" + +#include +#include + +#include "base/logging.h" +#include "components/zucchini/encoded_view.h" +#include "components/zucchini/patch_reader.h" +#include "components/zucchini/suffix_array.h" + +namespace zucchini { + +/******** Utility Functions ********/ + +double GetTokenSimilarity( + const ImageIndex& old_image_index, + const ImageIndex& new_image_index, + const std::vector& targets_affinities, + offset_t src, + offset_t dst) { + DCHECK(old_image_index.IsToken(src)); + DCHECK(new_image_index.IsToken(dst)); + + TypeTag old_type = old_image_index.LookupType(src); + TypeTag new_type = new_image_index.LookupType(dst); + if (old_type != new_type) + return kMismatchFatal; + + // Raw comparison. + if (!old_image_index.IsReference(src) && !new_image_index.IsReference(dst)) { + return old_image_index.GetRawValue(src) == new_image_index.GetRawValue(dst) + ? 1.0 + : -1.5; + } + + const ReferenceSet& old_ref_set = old_image_index.refs(old_type); + const ReferenceSet& new_ref_set = new_image_index.refs(new_type); + IndirectReference old_reference = old_ref_set.at(src); + IndirectReference new_reference = new_ref_set.at(dst); + PoolTag pool_tag = old_ref_set.pool_tag(); + + double affinity = targets_affinities[pool_tag.value()].AffinityBetween( + old_reference.target_key, new_reference.target_key); + + // Both targets are not associated, which implies a weak match. + if (affinity == 0.0) + return 0.5 * old_ref_set.width(); + + // At least one target is associated, so values are compared. + return affinity > 0.0 ? old_ref_set.width() : -2.0; +} + +double GetEquivalenceSimilarity( + const ImageIndex& old_image_index, + const ImageIndex& new_image_index, + const std::vector& targets_affinities, + const Equivalence& equivalence) { + double similarity = 0.0; + for (offset_t k = 0; k < equivalence.length; ++k) { + // Non-tokens are joined with the nearest previous token: skip until we + // cover the unit. + if (!new_image_index.IsToken(equivalence.dst_offset + k)) + continue; + + similarity += GetTokenSimilarity( + old_image_index, new_image_index, targets_affinities, + equivalence.src_offset + k, equivalence.dst_offset + k); + if (similarity == kMismatchFatal) + return kMismatchFatal; + } + return similarity; +} + +EquivalenceCandidate ExtendEquivalenceForward( + const ImageIndex& old_image_index, + const ImageIndex& new_image_index, + const std::vector& targets_affinities, + const EquivalenceCandidate& candidate, + double min_similarity) { + Equivalence equivalence = candidate.eq; + offset_t best_k = equivalence.length; + double current_similarity = candidate.similarity; + double best_similarity = current_similarity; + double current_penalty = min_similarity; + for (offset_t k = best_k; + equivalence.src_offset + k < old_image_index.size() && + equivalence.dst_offset + k < new_image_index.size(); + ++k) { + // Mismatch in type, |candidate| cannot be extended further. + if (old_image_index.LookupType(equivalence.src_offset + k) != + new_image_index.LookupType(equivalence.dst_offset + k)) { + break; + } + + if (!new_image_index.IsToken(equivalence.dst_offset + k)) { + // Non-tokens are joined with the nearest previous token: skip until we + // cover the unit, and extend |best_k| if applicable. + if (best_k == k) + best_k = k + 1; + continue; + } + + double similarity = GetTokenSimilarity( + old_image_index, new_image_index, targets_affinities, + equivalence.src_offset + k, equivalence.dst_offset + k); + current_similarity += similarity; + current_penalty = std::max(0.0, current_penalty) - similarity; + + if (current_similarity < 0.0 || current_penalty >= min_similarity) + break; + if (current_similarity >= best_similarity) { + best_similarity = current_similarity; + best_k = k + 1; + } + } + equivalence.length = best_k; + return {equivalence, best_similarity}; +} + +EquivalenceCandidate ExtendEquivalenceBackward( + const ImageIndex& old_image_index, + const ImageIndex& new_image_index, + const std::vector& targets_affinities, + const EquivalenceCandidate& candidate, + double min_similarity) { + Equivalence equivalence = candidate.eq; + offset_t best_k = 0; + double current_similarity = candidate.similarity; + double best_similarity = current_similarity; + double current_penalty = 0.0; + for (offset_t k = 1; + k <= equivalence.dst_offset && k <= equivalence.src_offset; ++k) { + // Mismatch in type, |candidate| cannot be extended further. + if (old_image_index.LookupType(equivalence.src_offset - k) != + new_image_index.LookupType(equivalence.dst_offset - k)) { + break; + } + + // Non-tokens are joined with the nearest previous token: skip until we + // reach the next token. + if (!new_image_index.IsToken(equivalence.dst_offset - k)) + continue; + + DCHECK_EQ(old_image_index.LookupType(equivalence.src_offset - k), + new_image_index.LookupType(equivalence.dst_offset - + k)); // Sanity check. + double similarity = GetTokenSimilarity( + old_image_index, new_image_index, targets_affinities, + equivalence.src_offset - k, equivalence.dst_offset - k); + + current_similarity += similarity; + current_penalty = std::max(0.0, current_penalty) - similarity; + + if (current_similarity < 0.0 || current_penalty >= min_similarity) + break; + if (current_similarity >= best_similarity) { + best_similarity = current_similarity; + best_k = k; + } + } + + equivalence.dst_offset -= best_k; + equivalence.src_offset -= best_k; + equivalence.length += best_k; + return {equivalence, best_similarity}; +} + +EquivalenceCandidate VisitEquivalenceSeed( + const ImageIndex& old_image_index, + const ImageIndex& new_image_index, + const std::vector& targets_affinities, + offset_t src, + offset_t dst, + double min_similarity) { + EquivalenceCandidate candidate{{src, dst, 0}, 0.0}; // Empty. + if (!old_image_index.IsToken(src)) + return candidate; + candidate = + ExtendEquivalenceForward(old_image_index, new_image_index, + targets_affinities, candidate, min_similarity); + if (candidate.similarity < min_similarity) + return candidate; // Not worth exploring any more. + return ExtendEquivalenceBackward(old_image_index, new_image_index, + targets_affinities, candidate, + min_similarity); +} + +/******** OffsetMapper ********/ + +OffsetMapper::OffsetMapper(std::vector&& equivalences) + : equivalences_(std::move(equivalences)) { + DCHECK(std::is_sorted(equivalences_.begin(), equivalences_.end(), + [](const Equivalence& a, const Equivalence& b) { + return a.src_offset < b.src_offset; + })); +} + +OffsetMapper::OffsetMapper(EquivalenceSource&& equivalence_source) { + for (auto e = equivalence_source.GetNext(); e.has_value(); + e = equivalence_source.GetNext()) { + equivalences_.push_back(*e); + } + PruneEquivalencesAndSortBySource(&equivalences_); +} + +OffsetMapper::OffsetMapper(const EquivalenceMap& equivalence_map) + : equivalences_(equivalence_map.size()) { + std::transform(equivalence_map.begin(), equivalence_map.end(), + equivalences_.begin(), + [](const EquivalenceCandidate& c) { return c.eq; }); + PruneEquivalencesAndSortBySource(&equivalences_); +} + +OffsetMapper::~OffsetMapper() = default; + +offset_t OffsetMapper::ForwardProject(offset_t offset) const { + auto pos = std::upper_bound( + equivalences_.begin(), equivalences_.end(), offset, + [](offset_t a, const Equivalence& b) { return a < b.src_offset; }); + if (pos != equivalences_.begin()) { + if (pos == equivalences_.end() || offset < pos[-1].src_end() || + offset - pos[-1].src_end() < pos->src_offset - offset) { + --pos; + } + } + return offset - pos->src_offset + pos->dst_offset; +} + +void OffsetMapper::ForwardProjectAll(std::vector* offsets) const { + DCHECK(std::is_sorted(offsets->begin(), offsets->end())); + auto current = equivalences_.begin(); + for (auto& src : *offsets) { + while (current != end() && current->src_end() <= src) { + ++current; + } + + if (current != end() && current->src_offset <= src) { + src = src - current->src_offset + current->dst_offset; + } else { + src = kInvalidOffset; + } + } + offsets->erase(std::remove(offsets->begin(), offsets->end(), kInvalidOffset), + offsets->end()); + offsets->shrink_to_fit(); +} + +void OffsetMapper::PruneEquivalencesAndSortBySource( + std::vector* equivalences) { + std::sort(equivalences->begin(), equivalences->end(), + [](const Equivalence& a, const Equivalence& b) { + return a.src_offset < b.src_offset; + }); + + for (auto current = equivalences->begin(); current != equivalences->end(); + ++current) { + // A "reaper" is an equivalence after |current| that overlaps with it, but + // is longer, and so truncates |current|. For example: + // ****** <= |current| + // ** + // **** + // **** + // ********** <= |next| as reaper. + // If a reaper is found (as |next|), every equivalence strictly between + // |current| and |next| would be truncated to 0 and discarded. Handling this + // case is important to avoid O(n^2) behavior. + bool next_is_reaper = false; + + // Look ahead to resolve overlaps, until a better candidate is found. + auto next = current + 1; + for (; next != equivalences->end(); ++next) { + DCHECK_GE(next->src_offset, current->src_offset); + if (next->src_offset >= current->src_end()) + break; // No more overlap. + + if (current->length < next->length) { + // |next| is better: So it is a reaper that shrinks |current|. + offset_t delta = current->src_end() - next->src_offset; + current->length -= delta; + next_is_reaper = true; + break; + } + } + + if (next_is_reaper) { + // Discard all equivalences strictly between |cur| and |next|. + for (auto reduced = current + 1; reduced != next; ++reduced) + reduced->length = 0; + current = next - 1; + } else { + // Shrink all equivalences that overlap with |current|. These are all + // worse than |current| since no reaper is found. + for (auto reduced = current + 1; reduced != next; ++reduced) { + offset_t delta = current->src_end() - reduced->src_offset; + reduced->length -= std::min(reduced->length, delta); + reduced->src_offset += delta; + reduced->dst_offset += delta; + DCHECK_EQ(reduced->src_offset, current->src_end()); + } + } + } + + // Discard all equivalences with length == 0. + equivalences->erase(std::remove_if(equivalences->begin(), equivalences->end(), + [](const Equivalence& equivalence) { + return equivalence.length == 0; + }), + equivalences->end()); +} + +/******** EquivalenceMap ********/ + +EquivalenceMap::EquivalenceMap() = default; + +EquivalenceMap::EquivalenceMap(std::vector&& equivalences) + : candidates_(std::move(equivalences)) { + SortByDestination(); +} + +EquivalenceMap::EquivalenceMap(EquivalenceMap&&) = default; + +EquivalenceMap::~EquivalenceMap() = default; + +void EquivalenceMap::Build( + const std::vector& old_sa, + const EncodedView& old_view, + const EncodedView& new_view, + const std::vector& targets_affinities, + double min_similarity) { + DCHECK_EQ(old_sa.size(), old_view.size()); + + CreateCandidates(old_sa, old_view, new_view, targets_affinities, + min_similarity); + SortByDestination(); + Prune(old_view, new_view, targets_affinities, min_similarity); + + offset_t coverage = 0; + offset_t current_offset = 0; + for (auto candidate : candidates_) { + DCHECK_GE(candidate.eq.dst_offset, current_offset); + coverage += candidate.eq.length; + current_offset = candidate.eq.dst_end(); + } + LOG(INFO) << "Equivalence Count: " << size(); + LOG(INFO) << "Coverage / Extra / Total: " << coverage << " / " + << new_view.size() - coverage << " / " << new_view.size(); +} + +void EquivalenceMap::CreateCandidates( + const std::vector& old_sa, + const EncodedView& old_view, + const EncodedView& new_view, + const std::vector& targets_affinities, + double min_similarity) { + candidates_.clear(); + + // This is an heuristic to find 'good' equivalences on encoded views. + // Equivalences are found in ascending order of |new_image|. + offset_t dst_offset = 0; + + while (dst_offset < new_view.size()) { + if (!new_view.IsToken(dst_offset)) { + ++dst_offset; + continue; + } + auto match = + SuffixLowerBound(old_sa, old_view.begin(), + new_view.begin() + dst_offset, new_view.end()); + + offset_t next_dst_offset = dst_offset + 1; + // TODO(huangs): Clean up. + double best_similarity = min_similarity; + EquivalenceCandidate best_candidate = {{0, 0, 0}, 0.0}; + for (auto it = match; it != old_sa.end(); ++it) { + EquivalenceCandidate candidate = VisitEquivalenceSeed( + old_view.image_index(), new_view.image_index(), targets_affinities, + static_cast(*it), dst_offset, min_similarity); + if (candidate.similarity > best_similarity) { + best_candidate = candidate; + best_similarity = candidate.similarity; + next_dst_offset = candidate.eq.dst_end(); + } else { + break; + } + } + for (auto it = match; it != old_sa.begin(); --it) { + EquivalenceCandidate candidate = VisitEquivalenceSeed( + old_view.image_index(), new_view.image_index(), targets_affinities, + static_cast(it[-1]), dst_offset, min_similarity); + if (candidate.similarity > best_similarity) { + best_candidate = candidate; + best_similarity = candidate.similarity; + next_dst_offset = candidate.eq.dst_end(); + } else { + break; + } + } + if (best_candidate.similarity >= min_similarity) { + candidates_.push_back(best_candidate); + } + + dst_offset = next_dst_offset; + } +} + +void EquivalenceMap::SortByDestination() { + std::sort(candidates_.begin(), candidates_.end(), + [](const EquivalenceCandidate& a, const EquivalenceCandidate& b) { + return a.eq.dst_offset < b.eq.dst_offset; + }); +} + +void EquivalenceMap::Prune( + const EncodedView& old_view, + const EncodedView& new_view, + const std::vector& target_affinities, + double min_similarity) { + // TODO(etiennep): unify with + // OffsetMapper::PruneEquivalencesAndSortBySource(). + for (auto current = candidates_.begin(); current != candidates_.end(); + ++current) { + if (current->similarity < min_similarity) + continue; // This candidate will be discarded anyways. + + bool next_is_reaper = false; + + // Look ahead to resolve overlaps, until a better candidate is found. + auto next = current + 1; + for (; next != candidates_.end(); ++next) { + DCHECK_GE(next->eq.dst_offset, current->eq.dst_offset); + if (next->eq.dst_offset >= current->eq.dst_offset + current->eq.length) + break; // No more overlap. + + if (current->similarity < next->similarity) { + // |next| is better: So it is a reaper that shrinks |current|. + offset_t delta = current->eq.dst_end() - next->eq.dst_offset; + current->eq.length -= delta; + current->similarity = GetEquivalenceSimilarity( + old_view.image_index(), new_view.image_index(), target_affinities, + current->eq); + + next_is_reaper = true; + break; + } + } + + if (next_is_reaper) { + // Discard all equivalences strictly between |cur| and |next|. + for (auto reduced = current + 1; reduced != next; ++reduced) { + reduced->eq.length = 0; + reduced->similarity = 0; + } + current = next - 1; + } else { + // Shrinks all overlapping candidates following and worse than |current|. + for (auto reduced = current + 1; reduced != next; ++reduced) { + offset_t delta = current->eq.dst_end() - reduced->eq.dst_offset; + reduced->eq.length -= std::min(reduced->eq.length, delta); + reduced->eq.src_offset += delta; + reduced->eq.dst_offset += delta; + reduced->similarity = GetEquivalenceSimilarity( + old_view.image_index(), new_view.image_index(), target_affinities, + reduced->eq); + DCHECK_EQ(reduced->eq.dst_offset, current->eq.dst_end()); + } + } + } + + // Discard all candidates with similarity smaller than |min_similarity|. + candidates_.erase( + std::remove_if(candidates_.begin(), candidates_.end(), + [min_similarity](const EquivalenceCandidate& candidate) { + return candidate.similarity < min_similarity; + }), + candidates_.end()); +} + +} // namespace zucchini diff --git a/equivalence_map.h b/equivalence_map.h new file mode 100644 index 0000000..91b215c --- /dev/null +++ b/equivalence_map.h @@ -0,0 +1,183 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_EQUIVALENCE_MAP_H_ +#define COMPONENTS_ZUCCHINI_EQUIVALENCE_MAP_H_ + +#include + +#include +#include + +#include "components/zucchini/image_index.h" +#include "components/zucchini/image_utils.h" +#include "components/zucchini/targets_affinity.h" + +namespace zucchini { + +constexpr double kMismatchFatal = -std::numeric_limits::infinity(); + +class EncodedView; +class EquivalenceSource; + +// Returns similarity score between a token (raw byte or first byte of a +// reference) in |old_image_index| at |src| and a token in |new_image_index| +// at |dst|. |targets_affinities| describes affinities for each target pool and +// is used to evaluate similarity between references, hence it's size must be +// equal to the number of pools in both |old_image_index| and |new_image_index|. +// Both |src| and |dst| must refer to tokens in |old_image_index| and +// |new_image_index|. +double GetTokenSimilarity( + const ImageIndex& old_image_index, + const ImageIndex& new_image_index, + const std::vector& targets_affinities, + offset_t src, + offset_t dst); + +// Returns a similarity score between content in |old_image_index| and +// |new_image_index| at regions described by |equivalence|, using +// |targets_affinities| to evaluate similarity between references. +double GetEquivalenceSimilarity( + const ImageIndex& old_image_index, + const ImageIndex& new_image_index, + const std::vector& targets_affinities, + const Equivalence& equivalence); + +// Extends |equivalence| forward and returns the result. This is related to +// VisitEquivalenceSeed(). +EquivalenceCandidate ExtendEquivalenceForward( + const ImageIndex& old_image_index, + const ImageIndex& new_image_index, + const std::vector& targets_affinities, + const EquivalenceCandidate& equivalence, + double min_similarity); + +// Extends |equivalence| backward and returns the result. This is related to +// VisitEquivalenceSeed(). +EquivalenceCandidate ExtendEquivalenceBackward( + const ImageIndex& old_image_index, + const ImageIndex& new_image_index, + const std::vector& targets_affinities, + const EquivalenceCandidate& equivalence, + double min_similarity); + +// Creates an equivalence, starting with |src| and |dst| as offset hint, and +// extends it both forward and backward, trying to maximise similarity between +// |old_image_index| and |new_image_index|, and returns the result. +// |targets_affinities| is used to evaluate similarity between references. +// |min_similarity| describes the minimum acceptable similarity score and is +// used as threshold to discard bad equivalences. +EquivalenceCandidate VisitEquivalenceSeed( + const ImageIndex& old_image_index, + const ImageIndex& new_image_index, + const std::vector& targets_affinities, + offset_t src, + offset_t dst, + double min_similarity); + +// Container of pruned equivalences used to map offsets from |old_image| to +// offsets in |new_image|. Equivalences are pruned by cropping smaller +// equivalences to avoid overlaps, to make the equivalence map (for covered +// bytes in |old_image| and |new_image|) one-to-one. +class OffsetMapper { + public: + using const_iterator = std::vector::const_iterator; + + // Constructors for various data sources. + // - From a list of |equivalences|, already sorted (by |src_offset|) and + // pruned, useful for tests. + explicit OffsetMapper(std::vector&& equivalences); + // - From a generator, useful for Zucchini-apply. + explicit OffsetMapper(EquivalenceSource&& equivalence_source); + // - From an EquivalenceMap that needs to be processed, useful for + // Zucchini-gen. + explicit OffsetMapper(const EquivalenceMap& equivalence_map); + ~OffsetMapper(); + + size_t size() const { return equivalences_.size(); } + const_iterator begin() const { return equivalences_.begin(); } + const_iterator end() const { return equivalences_.end(); } + + // Returns an offset in |new_image| corresponding to |offset| in |old_image|. + // If |offset| is not part of an equivalence, the equivalence nearest to + // |offset| is used as if it contained |offset|. This assumes |equivalences_| + // is not empty. + offset_t ForwardProject(offset_t offset) const; + + // Given sorted |offsets|, applies a projection in-place of all offsets that + // are part of a pruned equivalence from |old_image| to |new_image|. Other + // offsets are removed from |offsets|. + void ForwardProjectAll(std::vector* offsets) const; + + // Accessor for testing. + const std::vector equivalences() const { return equivalences_; } + + // Sorts |equivalences| by |src_offset| and removes all source overlaps; so a + // source location that was covered by some Equivalence would become covered + // by exactly one Equivalence. Moreover, for the offset, the equivalence + // corresponds to the largest (pre-pruning) covering Equivalence, and in case + // of a tie, the Equivalence with minimal |src_offset|. |equivalences| may + // change in size since empty Equivalences are removed. + static void PruneEquivalencesAndSortBySource( + std::vector* equivalences); + + private: + std::vector equivalences_; +}; + +// Container of equivalences between |old_image_index| and |new_image_index|, +// sorted by |Equivalence::dst_offset|, only used during patch generation. +class EquivalenceMap { + public: + using const_iterator = std::vector::const_iterator; + + EquivalenceMap(); + // Initializes the object with |equivalences|. + explicit EquivalenceMap(std::vector&& candidates); + EquivalenceMap(EquivalenceMap&&); + EquivalenceMap(const EquivalenceMap&) = delete; + ~EquivalenceMap(); + + // Finds relevant equivalences between |old_view| and |new_view|, using + // suffix array |old_sa| computed from |old_view| and using + // |targets_affinities| to evaluate similarity between references. This + // function is not symmetric. Equivalences might overlap in |old_view|, but + // not in |new_view|. It tries to maximize accumulated similarity within each + // equivalence, while maximizing |new_view| coverage. The minimum similarity + // of an equivalence is given by |min_similarity|. + void Build(const std::vector& old_sa, + const EncodedView& old_view, + const EncodedView& new_view, + const std::vector& targets_affinities, + double min_similarity); + + size_t size() const { return candidates_.size(); } + const_iterator begin() const { return candidates_.begin(); } + const_iterator end() const { return candidates_.end(); } + + private: + // Discovers equivalence candidates between |old_view| and |new_view| and + // stores them in the object. Note that resulting candidates are not sorted + // and might be overlapping in new image. + void CreateCandidates(const std::vector& old_sa, + const EncodedView& old_view, + const EncodedView& new_view, + const std::vector& targets_affinities, + double min_similarity); + // Sorts candidates by their offset in new image. + void SortByDestination(); + // Visits |candidates_| (sorted by |dst_offset|) and remove all destination + // overlaps. Candidates with low similarity scores are more likely to be + // shrunken. Unfit candidates may be removed. + void Prune(const EncodedView& old_view, + const EncodedView& new_view, + const std::vector& targets_affinities, + double min_similarity); + + std::vector candidates_; +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_EQUIVALENCE_MAP_H_ diff --git a/equivalence_map_unittest.cc b/equivalence_map_unittest.cc new file mode 100644 index 0000000..ce8ffe1 --- /dev/null +++ b/equivalence_map_unittest.cc @@ -0,0 +1,446 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/equivalence_map.h" + +#include +#include +#include + +#include "components/zucchini/encoded_view.h" +#include "components/zucchini/image_index.h" +#include "components/zucchini/suffix_array.h" +#include "components/zucchini/targets_affinity.h" +#include "components/zucchini/test_disassembler.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +namespace { + +using OffsetVector = std::vector; + +// Make all references 2 bytes long. +constexpr offset_t kReferenceSize = 2; + +// Creates and initialize an ImageIndex from |a| and with 2 types of references. +// The result is populated with |refs0| and |refs1|. |a| is expected to be a +// string literal valid for the lifetime of the object. +ImageIndex MakeImageIndexForTesting(const char* a, + std::vector&& refs0, + std::vector&& refs1) { + TestDisassembler disasm( + {kReferenceSize, TypeTag(0), PoolTag(0)}, std::move(refs0), + {kReferenceSize, TypeTag(1), PoolTag(0)}, std::move(refs1), + {kReferenceSize, TypeTag(2), PoolTag(1)}, {}); + + ImageIndex image_index( + ConstBufferView(reinterpret_cast(a), std::strlen(a))); + + EXPECT_TRUE(image_index.Initialize(&disasm)); + return image_index; +} + +std::vector MakeTargetsAffinitiesForTesting( + const ImageIndex& old_image_index, + const ImageIndex& new_image_index, + const EquivalenceMap& equivalence_map) { + std::vector target_affinities(old_image_index.PoolCount()); + for (const auto& old_pool_tag_and_targets : old_image_index.target_pools()) { + PoolTag pool_tag = old_pool_tag_and_targets.first; + target_affinities[pool_tag.value()].InferFromSimilarities( + equivalence_map, old_pool_tag_and_targets.second.targets(), + new_image_index.pool(pool_tag).targets()); + } + return target_affinities; +} + +} // namespace + +TEST(EquivalenceMapTest, GetTokenSimilarity) { + ImageIndex old_index = MakeImageIndexForTesting( + "ab1122334455", {{2, 0}, {4, 1}, {6, 2}, {8, 2}}, {{10, 3}}); + // Note: {4, 1} -> {6, 3} and {6, 2} -> {4, 1}, then result is sorted. + ImageIndex new_index = MakeImageIndexForTesting( + "a11b33224455", {{1, 0}, {4, 1}, {6, 3}, {8, 1}}, {{10, 2}}); + std::vector affinities = MakeTargetsAffinitiesForTesting( + old_index, new_index, + EquivalenceMap({{{0, 0, 1}, 1.0}, {{1, 3, 1}, 1.0}})); + + // Raw match. + EXPECT_LT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 0, 0)); + // Raw mismatch. + EXPECT_GT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 0, 1)); + EXPECT_GT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 1, 0)); + + // Type mismatch. + EXPECT_EQ(kMismatchFatal, + GetTokenSimilarity(old_index, new_index, affinities, 0, 1)); + EXPECT_EQ(kMismatchFatal, + GetTokenSimilarity(old_index, new_index, affinities, 2, 0)); + EXPECT_EQ(kMismatchFatal, + GetTokenSimilarity(old_index, new_index, affinities, 2, 10)); + EXPECT_EQ(kMismatchFatal, + GetTokenSimilarity(old_index, new_index, affinities, 10, 1)); + + // Reference strong match. + EXPECT_LT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 2, 1)); + EXPECT_LT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 4, 6)); + + // Reference weak match. + EXPECT_LT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 6, 4)); + EXPECT_LT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 6, 8)); + EXPECT_LT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 8, 4)); + + // Weak match is not greater than strong match. + EXPECT_LE(GetTokenSimilarity(old_index, new_index, affinities, 6, 4), + GetTokenSimilarity(old_index, new_index, affinities, 2, 1)); + + // Reference mismatch. + EXPECT_GT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 2, 4)); + EXPECT_GT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 2, 6)); +} + +TEST(EquivalenceMapTest, GetEquivalenceSimilarity) { + ImageIndex image_index = + MakeImageIndexForTesting("abcdef1122", {{6, 0}}, {{8, 1}}); + std::vector affinities = + MakeTargetsAffinitiesForTesting(image_index, image_index, {}); + + // Sanity check. These are no-op with length-0 equivalences. + EXPECT_EQ(0.0, GetEquivalenceSimilarity(image_index, image_index, affinities, + {0, 0, 0})); + EXPECT_EQ(0.0, GetEquivalenceSimilarity(image_index, image_index, affinities, + {0, 3, 0})); + EXPECT_EQ(0.0, GetEquivalenceSimilarity(image_index, image_index, affinities, + {3, 0, 0})); + + // Now examine larger equivalences. + EXPECT_LT(0.0, GetEquivalenceSimilarity(image_index, image_index, affinities, + {0, 0, 3})); + EXPECT_GE(0.0, GetEquivalenceSimilarity(image_index, image_index, affinities, + {0, 3, 3})); + EXPECT_GE(0.0, GetEquivalenceSimilarity(image_index, image_index, affinities, + {3, 0, 3})); + + EXPECT_LT(0.0, GetEquivalenceSimilarity(image_index, image_index, affinities, + {6, 6, 4})); +} + +TEST(EquivalenceMapTest, ExtendEquivalenceForward) { + auto test_extend_forward = + [](const ImageIndex old_index, const ImageIndex new_index, + const EquivalenceCandidate& equivalence, double base_similarity) { + return ExtendEquivalenceForward( + old_index, new_index, + MakeTargetsAffinitiesForTesting(old_index, new_index, {}), + equivalence, base_similarity) + .eq; + }; + + EXPECT_EQ(Equivalence({0, 0, 0}), + test_extend_forward(MakeImageIndexForTesting("", {}, {}), + MakeImageIndexForTesting("", {}, {}), + {{0, 0, 0}, 0.0}, 8.0)); + + EXPECT_EQ(Equivalence({0, 0, 0}), + test_extend_forward(MakeImageIndexForTesting("banana", {}, {}), + MakeImageIndexForTesting("zzzz", {}, {}), + {{0, 0, 0}, 0.0}, 8.0)); + + EXPECT_EQ(Equivalence({0, 0, 6}), + test_extend_forward(MakeImageIndexForTesting("banana", {}, {}), + MakeImageIndexForTesting("banana", {}, {}), + {{0, 0, 0}, 0.0}, 8.0)); + + EXPECT_EQ(Equivalence({2, 2, 4}), + test_extend_forward(MakeImageIndexForTesting("banana", {}, {}), + MakeImageIndexForTesting("banana", {}, {}), + {{2, 2, 0}, 0.0}, 8.0)); + + EXPECT_EQ(Equivalence({0, 0, 6}), + test_extend_forward(MakeImageIndexForTesting("bananaxx", {}, {}), + MakeImageIndexForTesting("bananayy", {}, {}), + {{0, 0, 0}, 0.0}, 8.0)); + + EXPECT_EQ( + Equivalence({0, 0, 8}), + test_extend_forward(MakeImageIndexForTesting("banana11", {{6, 0}}, {}), + MakeImageIndexForTesting("banana11", {{6, 0}}, {}), + {{0, 0, 0}, 0.0}, 8.0)); + + EXPECT_EQ( + Equivalence({0, 0, 6}), + test_extend_forward(MakeImageIndexForTesting("banana11", {{6, 0}}, {}), + MakeImageIndexForTesting("banana22", {}, {{6, 0}}), + {{0, 0, 0}, 0.0}, 8.0)); + + EXPECT_EQ( + Equivalence({0, 0, 17}), + test_extend_forward(MakeImageIndexForTesting("bananaxxpineapple", {}, {}), + MakeImageIndexForTesting("bananayypineapple", {}, {}), + {{0, 0, 0}, 0.0}, 8.0)); + + EXPECT_EQ( + Equivalence({3, 0, 19}), + test_extend_forward( + MakeImageIndexForTesting("foobanana11xxpineapplexx", {{9, 0}}, {}), + MakeImageIndexForTesting("banana11yypineappleyy", {{6, 0}}, {}), + {{3, 0, 0}, 0.0}, 8.0)); +} + +TEST(EquivalenceMapTest, ExtendEquivalenceBackward) { + auto test_extend_backward = + [](const ImageIndex old_index, const ImageIndex new_index, + const EquivalenceCandidate& equivalence, double base_similarity) { + return ExtendEquivalenceBackward( + old_index, new_index, + MakeTargetsAffinitiesForTesting(old_index, new_index, {}), + equivalence, base_similarity) + .eq; + }; + + EXPECT_EQ(Equivalence({0, 0, 0}), + test_extend_backward(MakeImageIndexForTesting("", {}, {}), + MakeImageIndexForTesting("", {}, {}), + {{0, 0, 0}, 0.0}, 8.0)); + + EXPECT_EQ(Equivalence({6, 4, 0}), + test_extend_backward(MakeImageIndexForTesting("banana", {}, {}), + MakeImageIndexForTesting("zzzz", {}, {}), + {{6, 4, 0}, 0.0}, 8.0)); + + EXPECT_EQ(Equivalence({0, 0, 6}), + test_extend_backward(MakeImageIndexForTesting("banana", {}, {}), + MakeImageIndexForTesting("banana", {}, {}), + {{6, 6, 0}, 0.0}, 8.0)); + + EXPECT_EQ(Equivalence({2, 2, 6}), + test_extend_backward(MakeImageIndexForTesting("xxbanana", {}, {}), + MakeImageIndexForTesting("yybanana", {}, {}), + {{8, 8, 0}, 0.0}, 8.0)); + + EXPECT_EQ( + Equivalence({0, 0, 8}), + test_extend_backward(MakeImageIndexForTesting("11banana", {{0, 0}}, {}), + MakeImageIndexForTesting("11banana", {{0, 0}}, {}), + {{8, 8, 0}, 0.0}, 8.0)); + + EXPECT_EQ( + Equivalence({2, 2, 6}), + test_extend_backward(MakeImageIndexForTesting("11banana", {{0, 0}}, {}), + MakeImageIndexForTesting("22banana", {}, {{0, 0}}), + {{8, 8, 0}, 0.0}, 8.0)); + + EXPECT_EQ(Equivalence({0, 0, 17}), + test_extend_backward( + MakeImageIndexForTesting("bananaxxpineapple", {}, {}), + MakeImageIndexForTesting("bananayypineapple", {}, {}), + {{8, 8, 9}, 9.0}, 8.0)); + + EXPECT_EQ( + Equivalence({3, 0, 19}), + test_extend_backward( + MakeImageIndexForTesting("foobanana11xxpineapplexx", {{9, 0}}, {}), + MakeImageIndexForTesting("banana11yypineappleyy", {{6, 0}}, {}), + {{22, 19, 0}, 0.0}, 8.0)); +} + +TEST(EquivalenceMapTest, PruneEquivalencesAndSortBySource) { + auto PruneEquivalencesAndSortBySourceTest = + [](std::vector&& equivalences) { + OffsetMapper::PruneEquivalencesAndSortBySource(&equivalences); + return equivalences; + }; + + EXPECT_EQ(std::vector(), + PruneEquivalencesAndSortBySourceTest({})); + EXPECT_EQ(std::vector({{0, 10, 1}}), + PruneEquivalencesAndSortBySourceTest({{0, 10, 1}})); + EXPECT_EQ(std::vector(), + PruneEquivalencesAndSortBySourceTest({{0, 10, 0}})); + EXPECT_EQ(std::vector({{0, 10, 1}, {1, 11, 1}}), + PruneEquivalencesAndSortBySourceTest({{0, 10, 1}, {1, 11, 1}})); + EXPECT_EQ(std::vector({{0, 10, 2}, {2, 13, 1}}), + PruneEquivalencesAndSortBySourceTest({{0, 10, 2}, {1, 12, 2}})); + EXPECT_EQ(std::vector({{0, 10, 2}}), + PruneEquivalencesAndSortBySourceTest({{0, 10, 2}, {1, 12, 1}})); + EXPECT_EQ(std::vector({{0, 10, 2}, {2, 14, 1}}), + PruneEquivalencesAndSortBySourceTest({{0, 10, 2}, {1, 13, 2}})); + EXPECT_EQ(std::vector({{0, 10, 1}, {1, 12, 3}}), + PruneEquivalencesAndSortBySourceTest({{0, 10, 2}, {1, 12, 3}})); + EXPECT_EQ(std::vector({{0, 10, 3}, {3, 16, 2}}), + PruneEquivalencesAndSortBySourceTest( + {{0, 10, 3}, {1, 13, 3}, {3, 16, 2}})); // Pruning is greedy + + // Consider following pattern that may cause O(n^2) behavior if not handled + // properly. + // *************** + // ********** + // ******** + // ****** + // **** + // ** + // *************** + // This test case makes sure the function does not stall on a large instance + // of this pattern. + EXPECT_EQ(std::vector({{0, 10, +300000}, {300000, 30, +300000}}), + PruneEquivalencesAndSortBySourceTest([] { + std::vector equivalenses; + equivalenses.push_back({0, 10, +300000}); + for (offset_t i = 0; i < 100000; ++i) + equivalenses.push_back({200000 + i, 20, +200000 - 2 * i}); + equivalenses.push_back({300000, 30, +300000}); + return equivalenses; + }())); +} + +TEST(EquivalenceMapTest, ForwardProject) { + auto ForwardProjectAllTest = [](const OffsetMapper& offset_mapper, + std::initializer_list offsets) { + OffsetVector offsets_vec(offsets); + offset_mapper.ForwardProjectAll(&offsets_vec); + return offsets_vec; + }; + + OffsetMapper offset_mapper1({{0, 10, 2}, {2, 13, 1}, {4, 16, 2}}); + EXPECT_EQ(OffsetVector({10}), ForwardProjectAllTest(offset_mapper1, {0})); + EXPECT_EQ(OffsetVector({13}), ForwardProjectAllTest(offset_mapper1, {2})); + EXPECT_EQ(OffsetVector({}), ForwardProjectAllTest(offset_mapper1, {3})); + EXPECT_EQ(OffsetVector({10, 13}), + ForwardProjectAllTest(offset_mapper1, {0, 2})); + EXPECT_EQ(OffsetVector({11, 13, 17}), + ForwardProjectAllTest(offset_mapper1, {1, 2, 5})); + EXPECT_EQ(OffsetVector({11, 17}), + ForwardProjectAllTest(offset_mapper1, {1, 3, 5})); + EXPECT_EQ(OffsetVector({10, 11, 13, 16, 17}), + ForwardProjectAllTest(offset_mapper1, {0, 1, 2, 3, 4, 5, 6})); + + OffsetMapper offset_mapper2({{0, 10, 2}, {13, 2, 1}, {16, 4, 2}}); + EXPECT_EQ(OffsetVector({2}), ForwardProjectAllTest(offset_mapper2, {13})); + EXPECT_EQ(OffsetVector({10, 2}), + ForwardProjectAllTest(offset_mapper2, {0, 13})); + EXPECT_EQ(OffsetVector({11, 2, 5}), + ForwardProjectAllTest(offset_mapper2, {1, 13, 17})); + EXPECT_EQ(OffsetVector({11, 5}), + ForwardProjectAllTest(offset_mapper2, {1, 14, 17})); + EXPECT_EQ(OffsetVector({10, 11, 2, 4, 5}), + ForwardProjectAllTest(offset_mapper2, {0, 1, 13, 14, 16, 17, 18})); +} + +TEST(EquivalenceMapTest, ProjectOffset) { + OffsetMapper offset_mapper1({{0, 10, 2}, {2, 13, 1}, {4, 16, 2}}); + EXPECT_EQ(10U, offset_mapper1.ForwardProject(0)); + EXPECT_EQ(11U, offset_mapper1.ForwardProject(1)); + EXPECT_EQ(13U, offset_mapper1.ForwardProject(2)); + EXPECT_EQ(14U, offset_mapper1.ForwardProject(3)); // Previous equivalence. + EXPECT_EQ(16U, offset_mapper1.ForwardProject(4)); + EXPECT_EQ(17U, offset_mapper1.ForwardProject(5)); + EXPECT_EQ(18U, offset_mapper1.ForwardProject(6)); // Previous equivalence. + + OffsetMapper offset_mapper2({{0, 10, 2}, {13, 2, 1}, {16, 4, 2}}); + EXPECT_EQ(10U, offset_mapper2.ForwardProject(0)); + EXPECT_EQ(11U, offset_mapper2.ForwardProject(1)); + EXPECT_EQ(2U, offset_mapper2.ForwardProject(13)); + EXPECT_EQ(3U, offset_mapper2.ForwardProject(14)); // Previous equivalence. + EXPECT_EQ(4U, offset_mapper2.ForwardProject(16)); + EXPECT_EQ(5U, offset_mapper2.ForwardProject(17)); + EXPECT_EQ(6U, offset_mapper2.ForwardProject(18)); // Previous equivalence. +} + +TEST(EquivalenceMapTest, Build) { + auto test_build_equivalence = [](const ImageIndex old_index, + const ImageIndex new_index, + double minimum_similarity) { + auto affinities = MakeTargetsAffinitiesForTesting(old_index, new_index, {}); + + EncodedView old_view(old_index); + EncodedView new_view(new_index); + + for (const auto& old_pool_tag_and_targets : old_index.target_pools()) { + PoolTag pool_tag = old_pool_tag_and_targets.first; + std::vector old_labels; + std::vector new_labels; + size_t label_bound = affinities[pool_tag.value()].AssignLabels( + 1.0, &old_labels, &new_labels); + old_view.SetLabels(pool_tag, std::move(old_labels), label_bound); + new_view.SetLabels(pool_tag, std::move(new_labels), label_bound); + } + + std::vector old_sa = + MakeSuffixArray(old_view, old_view.Cardinality()); + + EquivalenceMap equivalence_map; + equivalence_map.Build(old_sa, old_view, new_view, affinities, + minimum_similarity); + + offset_t current_dst_offset = 0; + offset_t coverage = 0; + for (const auto& candidate : equivalence_map) { + EXPECT_GE(candidate.eq.dst_offset, current_dst_offset); + EXPECT_GT(candidate.eq.length, offset_t(0)); + EXPECT_LE(candidate.eq.src_offset + candidate.eq.length, + old_index.size()); + EXPECT_LE(candidate.eq.dst_offset + candidate.eq.length, + new_index.size()); + EXPECT_GE(candidate.similarity, minimum_similarity); + current_dst_offset = candidate.eq.dst_offset; + coverage += candidate.eq.length; + } + return coverage; + }; + + EXPECT_EQ(0U, + test_build_equivalence(MakeImageIndexForTesting("", {}, {}), + MakeImageIndexForTesting("", {}, {}), 4.0)); + + EXPECT_EQ(0U, test_build_equivalence( + MakeImageIndexForTesting("", {}, {}), + MakeImageIndexForTesting("banana", {}, {}), 4.0)); + + EXPECT_EQ(0U, + test_build_equivalence(MakeImageIndexForTesting("banana", {}, {}), + MakeImageIndexForTesting("", {}, {}), 4.0)); + + EXPECT_EQ(0U, test_build_equivalence( + MakeImageIndexForTesting("banana", {}, {}), + MakeImageIndexForTesting("zzzz", {}, {}), 4.0)); + + EXPECT_EQ(6U, test_build_equivalence( + MakeImageIndexForTesting("banana", {}, {}), + MakeImageIndexForTesting("banana", {}, {}), 4.0)); + + EXPECT_EQ(6U, test_build_equivalence( + MakeImageIndexForTesting("bananaxx", {}, {}), + MakeImageIndexForTesting("bananayy", {}, {}), 4.0)); + + EXPECT_EQ(8U, test_build_equivalence( + MakeImageIndexForTesting("banana11", {{6, 0}}, {}), + MakeImageIndexForTesting("banana11", {{6, 0}}, {}), 4.0)); + + EXPECT_EQ(6U, test_build_equivalence( + MakeImageIndexForTesting("banana11", {{6, 0}}, {}), + MakeImageIndexForTesting("banana22", {}, {{6, 0}}), 4.0)); + + EXPECT_EQ( + 15U, + test_build_equivalence( + MakeImageIndexForTesting("banana11pineapple", {{6, 0}}, {}), + MakeImageIndexForTesting("banana22pineapple", {}, {{6, 0}}), 4.0)); + + EXPECT_EQ( + 15U, + test_build_equivalence( + MakeImageIndexForTesting("bananaxxxxxxxxpineapple", {}, {}), + MakeImageIndexForTesting("bananayyyyyyyypineapple", {}, {}), 4.0)); + + EXPECT_EQ( + 19U, + test_build_equivalence( + MakeImageIndexForTesting("foobanana11xxpineapplexx", {{9, 0}}, {}), + MakeImageIndexForTesting("banana11yypineappleyy", {{6, 0}}, {}), + 4.0)); +} + +} // namespace zucchini diff --git a/heuristic_ensemble_matcher.cc b/heuristic_ensemble_matcher.cc new file mode 100644 index 0000000..aead5dc --- /dev/null +++ b/heuristic_ensemble_matcher.cc @@ -0,0 +1,369 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/heuristic_ensemble_matcher.h" + +#include +#include +#include +#include +#include + +#include "base/bind.h" +#include "base/numerics/safe_conversions.h" +#include "base/strings/stringprintf.h" +#include "components/zucchini/binary_data_histogram.h" +#include "components/zucchini/element_detection.h" +#include "components/zucchini/image_utils.h" +#include "components/zucchini/io_utils.h" + +namespace zucchini { + +namespace { + +/******** Helper Functions ********/ + +// Uses |detector| to find embedded executables inside |image|, and returns the +// result on success, or base::nullopt on failure, which occurs if too many (> +// |kElementLimit|) elements are found. +base::Optional> FindEmbeddedElements( + ConstBufferView image, + const std::string& name, + ElementDetector&& detector) { + // Maximum number of Elements in a file. This is enforced because our matching + // algorithm is O(n^2), which suffices for regular archive files that should + // have up to 10's of executable files. An archive containing 100's of + // executables is likely pathological, and is rejected to prevent exploits. + static constexpr size_t kElementLimit = 256; + std::vector elements; + ElementFinder element_finder(image, std::move(detector)); + for (auto element = element_finder.GetNext(); + element.has_value() && elements.size() <= kElementLimit; + element = element_finder.GetNext()) { + elements.push_back(*element); + } + if (elements.size() >= kElementLimit) { + LOG(WARNING) << name << ": Found too many elements."; + return base::nullopt; + } + LOG(INFO) << name << ": Found " << elements.size() << " elements."; + return elements; +} + +// Determines whether a proposed comparison between Elements should be rejected +// early, to decrease the likelihood of creating false-positive matches, which +// may be costly for patching. Our heuristic simply prohibits big difference in +// size (relative and absolute) between matched elements. +bool UnsafeDifference(const Element& old_element, const Element& new_element) { + static constexpr double kMaxBloat = 2.0; + static constexpr size_t kMinWorrysomeDifference = 2 << 20; // 2MB + size_t lo_size = std::min(old_element.size, new_element.size); + size_t hi_size = std::max(old_element.size, new_element.size); + if (hi_size - lo_size < kMinWorrysomeDifference) + return false; + if (hi_size < lo_size * kMaxBloat) + return false; + return true; +} + +std::ostream& operator<<(std::ostream& stream, const Element& elt) { + stream << "(" << elt.exe_type << ", " << AsHex<8, size_t>(elt.offset) << " +" + << AsHex<8, size_t>(elt.size) << ")"; + return stream; +} + +/******** MatchingInfoOut ********/ + +// A class to output detailed information during ensemble matching. Extracting +// the functionality to a separate class decouples formatting and printing logic +// from matching logic. The base class consists of stubs. +class MatchingInfoOut { + protected: + MatchingInfoOut() = default; + + public: + virtual ~MatchingInfoOut() = default; + virtual void InitSizes(size_t old_size, size_t new_size) {} + virtual void DeclareTypeMismatch(int iold, int inew) {} + virtual void DeclareUnsafeDistance(int iold, int inew) {} + virtual void DeclareCandidate(int iold, int inew) {} + virtual void DeclareMatch(int iold, + int inew, + double dist, + bool is_identical) {} + virtual void DeclareOutlier(int iold, int inew) {} + + virtual void OutputCompare(const Element& old_element, + const Element& new_element, + double dist) {} + + virtual void OutputMatch(const Element& best_old_element, + const Element& new_element, + bool is_identical, + double best_dist) {} + + virtual void OutputScores(const std::string& stats) {} + + virtual void OutputTextGrid() {} + + private: + DISALLOW_COPY_AND_ASSIGN(MatchingInfoOut); +}; + +/******** MatchingInfoTerse ********/ + +// A terse MatchingInfoOut that prints only basic information, using LOG(). +class MatchingInfoOutTerse : public MatchingInfoOut { + public: + MatchingInfoOutTerse() = default; + ~MatchingInfoOutTerse() override = default; + + void OutputScores(const std::string& stats) override { + LOG(INFO) << "Best dists: " << stats; + } + + private: + DISALLOW_COPY_AND_ASSIGN(MatchingInfoOutTerse); +}; + +/******** MatchingInfoOutVerbose ********/ + +// A verbose MatchingInfoOut that prints detailed information using |out_|, +// including comparison pairs, scores, and a text grid representation of +// pairwise matching results. +class MatchingInfoOutVerbose : public MatchingInfoOut { + public: + explicit MatchingInfoOutVerbose(std::ostream& out) : out_(out) {} + ~MatchingInfoOutVerbose() override = default; + + // Outputs sizes and initializes |text_grid_|. + void InitSizes(size_t old_size, size_t new_size) override { + out_ << "Comparing old (" << old_size << " elements) and new (" << new_size + << " elements)" << std::endl; + text_grid_.assign(new_size, std::string(old_size, '-')); + best_dist_.assign(new_size, -1.0); + } + + // Functions to update match status in text grid representation. + + void DeclareTypeMismatch(int iold, int inew) override { + text_grid_[inew][iold] = 'T'; + } + void DeclareUnsafeDistance(int iold, int inew) override { + text_grid_[inew][iold] = 'U'; + } + void DeclareCandidate(int iold, int inew) override { + text_grid_[inew][iold] = 'C'; // Provisional. + } + void DeclareMatch(int iold, + int inew, + double dist, + bool is_identical) override { + text_grid_[inew][iold] = is_identical ? 'I' : 'M'; + best_dist_[inew] = dist; + } + void DeclareOutlier(int iold, int inew) override { + text_grid_[inew][iold] = 'O'; + } + + // Functions to print detailed information. + + void OutputCompare(const Element& old_element, + const Element& new_element, + double dist) override { + out_ << "Compare old" << old_element << " to new" << new_element << " --> " + << base::StringPrintf("%.5f", dist) << std::endl; + } + + void OutputMatch(const Element& best_old_element, + const Element& new_element, + bool is_identical, + double best_dist) override { + if (is_identical) { + out_ << "Skipped old" << best_old_element << " - identical to new" + << new_element; + } else { + out_ << "Matched old" << best_old_element << " to new" << new_element + << " --> " << base::StringPrintf("%.5f", best_dist); + } + out_ << std::endl; + } + + void OutputScores(const std::string& stats) override { + out_ << "Best dists: " << stats << std::endl; + } + + void OutputTextGrid() override { + int new_size = static_cast(text_grid_.size()); + for (int inew = 0; inew < new_size; ++inew) { + const std::string& line = text_grid_[inew]; + out_ << " "; + for (char ch : line) { + char prefix = (ch == 'I' || ch == 'M') ? '(' : ' '; + char suffix = (ch == 'I' || ch == 'M') ? ')' : ' '; + out_ << prefix << ch << suffix; + } + if (best_dist_[inew] >= 0) + out_ << " " << base::StringPrintf("%.5f", best_dist_[inew]); + out_ << std::endl; + } + if (!text_grid_.empty()) { + out_ << " Legend: I = identical, M = matched, T = type mismatch, " + "U = unsafe distance, C = candidate, O = outlier, - = skipped." + << std::endl; + } + } + + private: + std::ostream& out_; + + // Text grid representation of matches. Rows correspond to "old" and columns + // correspond to "new". + std::vector text_grid_; + + // For each "new" element, distance of best match. -1 denotes no match. + std::vector best_dist_; + + private: + DISALLOW_COPY_AND_ASSIGN(MatchingInfoOutVerbose); +}; + +} // namespace + +/******** HeuristicEnsembleMatcher ********/ + +HeuristicEnsembleMatcher::HeuristicEnsembleMatcher(std::ostream* out) + : out_(out) {} + +HeuristicEnsembleMatcher::~HeuristicEnsembleMatcher() = default; + +bool HeuristicEnsembleMatcher::RunMatch(ConstBufferView old_image, + ConstBufferView new_image) { + DCHECK(matches_.empty()); + LOG(INFO) << "Start matching."; + + // Find all elements in "old" and "new". + base::Optional> old_elements = + FindEmbeddedElements(old_image, "Old file", + base::BindRepeating(DetectElementFromDisassembler)); + if (!old_elements.has_value()) + return false; + base::Optional> new_elements = + FindEmbeddedElements(new_image, "New file", + base::BindRepeating(DetectElementFromDisassembler)); + if (!new_elements.has_value()) + return false; + + std::unique_ptr info_out; + if (out_) + info_out = std::make_unique(*out_); + else + info_out = std::make_unique(); + + const int num_new_elements = base::checked_cast(new_elements->size()); + const int num_old_elements = base::checked_cast(old_elements->size()); + info_out->InitSizes(num_old_elements, num_new_elements); + + // For each "new" element, match it with the "old" element that's nearest to + // it, with distance determined by BinaryDataHistogram. The resulting + // "old"-"new" pairs are stored into |results|. Possibilities: + // - Type mismatch: No match. + // - UnsafeDifference() heuristics fail: No match. + // - Identical match: Skip "new" since this is a trivial case. + // - Non-identical match: Match "new" with "old" with min distance. + // - No match: Skip "new". + struct Results { + int iold; + int inew; + double dist; + }; + std::vector results; + + // Precompute histograms for "old" since they get reused. + std::vector old_his(num_old_elements); + for (int iold = 0; iold < num_old_elements; ++iold) { + ConstBufferView sub_image(old_image[(*old_elements)[iold]]); + old_his[iold].Compute(sub_image); + // ProgramDetector should have imposed minimal size limit to |sub_image|. + // Therefore resulting histogram are expected to be valid. + CHECK(old_his[iold].IsValid()); + } + + const int kUninitIold = num_old_elements; + for (int inew = 0; inew < num_new_elements; ++inew) { + const Element& cur_new_element = (*new_elements)[inew]; + ConstBufferView cur_new_sub_image(new_image[cur_new_element.region()]); + BinaryDataHistogram new_his; + new_his.Compute(cur_new_sub_image); + CHECK(new_his.IsValid()); + + double best_dist = HUGE_VAL; + int best_iold = kUninitIold; + bool is_identical = false; + + for (int iold = 0; iold < num_old_elements; ++iold) { + const Element& cur_old_element = (*old_elements)[iold]; + if (cur_old_element.exe_type != cur_new_element.exe_type) { + info_out->DeclareTypeMismatch(iold, inew); + continue; + } + if (UnsafeDifference(cur_old_element, cur_new_element)) { + info_out->DeclareUnsafeDistance(iold, inew); + continue; + } + double dist = old_his[iold].Distance(new_his); + info_out->DeclareCandidate(iold, inew); + info_out->OutputCompare(cur_old_element, cur_new_element, dist); + if (best_dist > dist) { // Tie resolution: First-one, first-serve. + best_iold = iold; + best_dist = dist; + if (best_dist == 0) { + ConstBufferView sub_image(old_image[cur_old_element.region()]); + if (sub_image.equals(cur_new_sub_image)) { + is_identical = true; + break; + } + } + } + } + + if (best_iold != kUninitIold) { + const Element& best_old_element = (*old_elements)[best_iold]; + info_out->DeclareMatch(best_iold, inew, best_dist, is_identical); + if (is_identical) // Skip "new" if identical match is found. + ++num_identical_; + else + results.push_back({best_iold, inew, best_dist}); + info_out->OutputMatch(best_old_element, cur_new_element, is_identical, + best_dist); + } + } + + // Populate |matches_| from |result|. To reduce that chance of false-positive + // matches, statistics on dists are computed. If a match's |dist| is an + // outlier then it is rejected. + if (results.size() > 0) { + OutlierDetector detector; + for (const auto& result : results) { + if (result.dist > 0) + detector.Add(result.dist); + } + detector.Prepare(); + info_out->OutputScores(detector.RenderStats()); + for (const Results& result : results) { + if (detector.DecideOutlier(result.dist) > 0) { + info_out->DeclareOutlier(result.iold, result.inew); + } else { + matches_.push_back( + {(*old_elements)[result.iold], (*new_elements)[result.inew]}); + } + } + info_out->OutputTextGrid(); + } + + Trim(); + return true; +} + +} // namespace zucchini diff --git a/heuristic_ensemble_matcher.h b/heuristic_ensemble_matcher.h new file mode 100644 index 0000000..1adb998 --- /dev/null +++ b/heuristic_ensemble_matcher.h @@ -0,0 +1,39 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_HEURISTIC_ENSEMBLE_MATCHER_H_ +#define COMPONENTS_ZUCCHINI_HEURISTIC_ENSEMBLE_MATCHER_H_ + +#include + +#include "base/macros.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/ensemble_matcher.h" + +namespace zucchini { + +// An ensemble matcher that: +// - Detects embedded elements in "old" and "new" archive files. +// - Applies heuristics to create matched pairs. +// It is desired to have matched pairs that: +// - Have "reasonable" size difference (see UnsafeDifference() in the .cc file). +// - Have "minimal distance" among other potential matched pairs. +class HeuristicEnsembleMatcher : public EnsembleMatcher { + public: + explicit HeuristicEnsembleMatcher(std::ostream* out); + ~HeuristicEnsembleMatcher() override; + + // EnsembleMatcher: + bool RunMatch(ConstBufferView old_image, ConstBufferView new_image) override; + + private: + // Optional stream to print detailed information during matching. + std::ostream* out_ = nullptr; + + DISALLOW_COPY_AND_ASSIGN(HeuristicEnsembleMatcher); +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_HEURISTIC_ENSEMBLE_MATCHER_H_ diff --git a/image_index.cc b/image_index.cc new file mode 100644 index 0000000..6c7a28b --- /dev/null +++ b/image_index.cc @@ -0,0 +1,78 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/image_index.h" + +#include +#include + +#include "components/zucchini/algorithm.h" +#include "components/zucchini/disassembler.h" + +namespace zucchini { + +ImageIndex::ImageIndex(ConstBufferView image) + : image_(image), type_tags_(image.size(), kNoTypeTag) {} + +ImageIndex::ImageIndex(ImageIndex&&) = default; + +ImageIndex::~ImageIndex() = default; + +bool ImageIndex::Initialize(Disassembler* disasm) { + std::vector ref_groups = disasm->MakeReferenceGroups(); + for (const auto& group : ref_groups) { + // Build pool-to-type mapping. + DCHECK_NE(kNoPoolTag, group.pool_tag()); + TargetPool& target_pool = target_pools_[group.pool_tag()]; + target_pool.AddType(group.type_tag()); + target_pool.InsertTargets(std::move(*group.GetReader(disasm))); + } + for (const auto& group : ref_groups) { + // Find and store all references for each type, returns false on finding + // any overlap, to signal error. + if (!InsertReferences(group.traits(), + std::move(*group.GetReader(disasm)))) { + return false; + } + } + return true; +} + +bool ImageIndex::IsToken(offset_t location) const { + TypeTag type = LookupType(location); + + // |location| points into raw data. + if (type == kNoTypeTag) + return true; + + // |location| points into a Reference. + IndirectReference reference = refs(type).at(location); + // Only the first byte of a reference is a token. + return location == reference.location; +} + +bool ImageIndex::InsertReferences(const ReferenceTypeTraits& traits, + ReferenceReader&& ref_reader) { + // Store ReferenceSet for current type (of |group|). + DCHECK_NE(kNoTypeTag, traits.type_tag); + auto result = reference_sets_.emplace( + traits.type_tag, ReferenceSet(traits, pool(traits.pool_tag))); + DCHECK(result.second); + + result.first->second.InitReferences(std::move(ref_reader)); + for (auto ref : reference_sets_.at(traits.type_tag)) { + DCHECK(RangeIsBounded(ref.location, traits.width, size())); + auto cur_type_tag = type_tags_.begin() + ref.location; + + // Check for overlap with existing reference. If found, then invalidate. + if (std::any_of(cur_type_tag, cur_type_tag + traits.width, + [](TypeTag type) { return type != kNoTypeTag; })) { + return false; + } + std::fill(cur_type_tag, cur_type_tag + traits.width, traits.type_tag); + } + return true; +} + +} // namespace zucchini diff --git a/image_index.h b/image_index.h new file mode 100644 index 0000000..4f07015 --- /dev/null +++ b/image_index.h @@ -0,0 +1,116 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_IMAGE_INDEX_H_ +#define COMPONENTS_ZUCCHINI_IMAGE_INDEX_H_ + +#include +#include + +#include +#include + +#include "base/logging.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/image_utils.h" +#include "components/zucchini/reference_set.h" +#include "components/zucchini/target_pool.h" + +namespace zucchini { + +class Disassembler; + +// A class that holds annotations of an image, allowing quick access to its raw +// and reference content. The memory overhead of storing all references is +// relatively high, so this is only used during patch generation. +class ImageIndex { + public: + explicit ImageIndex(ConstBufferView image); + ImageIndex(const ImageIndex&) = delete; + ImageIndex(ImageIndex&&); + ~ImageIndex(); + + // Inserts all references read from |disasm|. This should be called exactly + // once. If overlap between any two references of any type is encountered, + // returns false and leaves the object in an invalid state. Otherwise, + // returns true. + // TODO(huangs): Refactor ReaderFactory and WriterFactory so + // |const Disassembler&| can be used here. + bool Initialize(Disassembler* disasm); + + // Returns the array size needed to accommodate all reference type values. + size_t TypeCount() const { + if (reference_sets_.empty()) + return 0U; + return reference_sets_.rbegin()->first.value() + 1; + } + + // Returns the array size needed to accommodate all pool values. + size_t PoolCount() const { + if (target_pools_.empty()) + return 0U; + return target_pools_.rbegin()->first.value() + 1; + } + + // Returns true if |image_[location]| is either: + // - A raw value. + // - The first byte of a reference. + bool IsToken(offset_t location) const; + + // Returns true if |image_[location]| is part of a reference. + bool IsReference(offset_t location) const { + return LookupType(location) != kNoTypeTag; + } + + // Returns the type tag of the reference covering |location|, or kNoTypeTag if + // |location| is not part of a reference. + TypeTag LookupType(offset_t location) const { + DCHECK_LT(location, size()); + return type_tags_[location]; + } + + // Returns the raw value at |location|. + uint8_t GetRawValue(offset_t location) const { + DCHECK_LT(location, size()); + return image_[location]; + } + + const std::map& target_pools() const { + return target_pools_; + } + const std::map& reference_sets() const { + return reference_sets_; + } + + const TargetPool& pool(PoolTag pool_tag) const { + return target_pools_.at(pool_tag); + } + const ReferenceSet& refs(TypeTag type_tag) const { + return reference_sets_.at(type_tag); + } + + // Returns the size of the image. + size_t size() const { return image_.size(); } + + private: + // Inserts to |*this| index, all references described by |traits| read from + // |ref_reader|, which gets consumed. This should be called exactly once for + // each reference type. If overlap between any two references of any type is + // encountered, returns false and leaves the object in an invalid state. + // Otherwise, returns true. + bool InsertReferences(const ReferenceTypeTraits& traits, + ReferenceReader&& ref_reader); + + const ConstBufferView image_; + + // Used for random access lookup of reference type, for each byte in |image_|. + std::vector type_tags_; + + std::map target_pools_; + std::map reference_sets_; +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_IMAGE_INDEX_H_ diff --git a/image_index_unittest.cc b/image_index_unittest.cc new file mode 100644 index 0000000..cf6f8a7 --- /dev/null +++ b/image_index_unittest.cc @@ -0,0 +1,131 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/image_index.h" + +#include + +#include +#include + +#include "base/test/gtest_util.h" +#include "components/zucchini/image_utils.h" +#include "components/zucchini/test_disassembler.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +class ImageIndexTest : public testing::Test { + protected: + ImageIndexTest() + : buffer_(20), + image_index_(ConstBufferView(buffer_.data(), buffer_.size())) { + std::iota(buffer_.begin(), buffer_.end(), 0); + } + + void InitializeWithDefaultTestData() { + TestDisassembler disasm({2, TypeTag(0), PoolTag(0)}, + {{1, 0}, {8, 1}, {10, 2}}, + {4, TypeTag(1), PoolTag(0)}, {{3, 3}}, + {3, TypeTag(2), PoolTag(1)}, {{12, 4}, {17, 5}}); + EXPECT_TRUE(image_index_.Initialize(&disasm)); + } + + std::vector buffer_; + ImageIndex image_index_; +}; + +TEST_F(ImageIndexTest, TypeAndPool) { + TestDisassembler disasm({2, TypeTag(0), PoolTag(0)}, {}, + {4, TypeTag(1), PoolTag(0)}, {}, + {3, TypeTag(2), PoolTag(1)}, {}); + EXPECT_TRUE(image_index_.Initialize(&disasm)); + + EXPECT_EQ(3U, image_index_.TypeCount()); + EXPECT_EQ(2U, image_index_.PoolCount()); + + EXPECT_EQ(TypeTag(0), image_index_.refs(TypeTag(0)).type_tag()); + EXPECT_EQ(TypeTag(1), image_index_.refs(TypeTag(1)).type_tag()); + EXPECT_EQ(TypeTag(2), image_index_.refs(TypeTag(2)).type_tag()); + + EXPECT_EQ(PoolTag(0), image_index_.refs(TypeTag(0)).pool_tag()); + EXPECT_EQ(PoolTag(0), image_index_.refs(TypeTag(1)).pool_tag()); + EXPECT_EQ(PoolTag(1), image_index_.refs(TypeTag(2)).pool_tag()); +} + +TEST_F(ImageIndexTest, InvalidInitialize1) { + // Overlap within the same group. + TestDisassembler disasm({2, TypeTag(0), PoolTag(0)}, {{1, 0}, {2, 0}}, + {4, TypeTag(1), PoolTag(0)}, {}, + {3, TypeTag(2), PoolTag(1)}, {}); + EXPECT_FALSE(image_index_.Initialize(&disasm)); +} + +TEST_F(ImageIndexTest, InvalidInitialize2) { + // Overlap across different readers. + TestDisassembler disasm({2, TypeTag(0), PoolTag(0)}, + {{1, 0}, {8, 1}, {10, 2}}, + {4, TypeTag(1), PoolTag(0)}, {{3, 3}}, + {3, TypeTag(2), PoolTag(1)}, {{11, 0}}); + EXPECT_FALSE(image_index_.Initialize(&disasm)); +} + +TEST_F(ImageIndexTest, LookupType) { + InitializeWithDefaultTestData(); + + std::vector expected = { + -1, // raw + 0, 0, // ref 0 + 1, 1, 1, 1, // ref 1 + -1, // raw + 0, 0, // ref 0 + 0, 0, // ref 0 + 2, 2, 2, // ref 2 + -1, -1, // raw + 2, 2, 2, // ref 2 + }; + + for (offset_t i = 0; i < image_index_.size(); ++i) + EXPECT_EQ(TypeTag(expected[i]), image_index_.LookupType(i)); +} + +TEST_F(ImageIndexTest, IsToken) { + InitializeWithDefaultTestData(); + + std::vector expected = { + 1, // raw + 1, 0, // ref 0 + 1, 0, 0, 0, // ref 1 + 1, // raw + 1, 0, // ref 0 + 1, 0, // ref 0 + 1, 0, 0, // ref 2 + 1, 1, // raw + 1, 0, 0, // ref 2 + }; + + for (offset_t i = 0; i < image_index_.size(); ++i) + EXPECT_EQ(expected[i], image_index_.IsToken(i)); +} + +TEST_F(ImageIndexTest, IsReference) { + InitializeWithDefaultTestData(); + + std::vector expected = { + 0, // raw + 1, 1, // ref 0 + 1, 1, 1, 1, // ref 1 + 0, // raw + 1, 1, // ref 0 + 1, 1, // ref 0 + 1, 1, 1, // ref 2 + 0, 0, // raw + 1, 1, 1, // ref 2 + }; + + for (offset_t i = 0; i < image_index_.size(); ++i) + EXPECT_EQ(expected[i], image_index_.IsReference(i)); +} + +} // namespace zucchini diff --git a/image_utils.h b/image_utils.h new file mode 100644 index 0000000..c3db9ed --- /dev/null +++ b/image_utils.h @@ -0,0 +1,206 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_IMAGE_UTILS_H_ +#define COMPONENTS_ZUCCHINI_IMAGE_UTILS_H_ + +#include +#include + +#include "base/numerics/safe_conversions.h" +#include "base/optional.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/typed_value.h" + +namespace zucchini { + +// offset_t is used to describe an offset in an image. +// Files bigger than 4GB are not supported. +using offset_t = uint32_t; +// Divide by 2 since label marking uses the most significant bit. +constexpr offset_t kOffsetBound = static_cast(-1) / 2; +constexpr offset_t kInvalidOffset = static_cast(-1); + +// key_t is used to identify an offset in a table. +using key_t = uint32_t; + +enum Bitness : uint8_t { + // The numerical values are intended to simplify WidthOf() below. + kBit32 = 4, + kBit64 = 8 +}; + +inline uint32_t WidthOf(Bitness bitness) { + return static_cast(bitness); +} + +// Used to uniquely identify a reference type. +// Strongly typed objects are used to avoid ambiguitees with PoolTag. +struct TypeTag : public TypedValue { + // inheriting constructor: + using TypedValue::TypedValue; +}; + +// Used to uniquely identify a pool. +struct PoolTag : public TypedValue { + // inheriting constructor: + using TypedValue::TypedValue; +}; + +constexpr TypeTag kNoTypeTag(0xFF); // Typically used to identify raw data. +constexpr PoolTag kNoPoolTag(0xFF); + +// Specification of references in an image file. +struct ReferenceTypeTraits { + constexpr ReferenceTypeTraits(offset_t width_in, + TypeTag type_tag_in, + PoolTag pool_tag_in) + : width(width_in), type_tag(type_tag_in), pool_tag(pool_tag_in) {} + + // |width| specifies number of bytes covered by the reference's binary + // encoding. + const offset_t width; + // |type_tag| identifies the reference type being described. + const TypeTag type_tag; + // |pool_tag| identifies the pool this type belongs to. + const PoolTag pool_tag; +}; + +// There is no need to store |type| because references of the same type are +// always aggregated into the same container, and so during iteration we'd have +// |type| already. +struct Reference { + offset_t location; + offset_t target; +}; + +inline bool operator==(const Reference& a, const Reference& b) { + return a.location == b.location && a.target == b.target; +} + +struct IndirectReference { + offset_t location; + key_t target_key; // Key within a pool of references with same semantics. +}; + +inline bool operator==(const IndirectReference& a, const IndirectReference& b) { + return a.location == b.location && a.target_key == b.target_key; +} + +// Interface for extracting References through member function GetNext(). +// This is used by Disassemblers to extract references from an image file. +// Typically, a Reader lazily extracts values and does not hold any storage. +class ReferenceReader { + public: + virtual ~ReferenceReader() = default; + + // Returns the next available Reference, or nullopt_t if exhausted. + // Extracted References must be ordered by their location in the image. + virtual base::Optional GetNext() = 0; +}; + +// Interface for writing References through member function +// PutNext(reference). This is used by Disassemblers to write new References +// in the image file. +class ReferenceWriter { + public: + virtual ~ReferenceWriter() = default; + + // Writes |reference| in the underlying image file. This operation always + // succeeds. + virtual void PutNext(Reference reference) = 0; +}; + +// Position of the most significant bit of offset_t. +constexpr offset_t kIndexMarkBitPosition = sizeof(offset_t) * 8 - 1; + +// Helper functions to mark an offset_t, so we can distinguish file offsets from +// Label indices. Implementation: Marking is flagged by the most significant bit +// (MSB). +constexpr inline bool IsMarked(offset_t value) { + return value >> kIndexMarkBitPosition != 0; +} +constexpr inline offset_t MarkIndex(offset_t value) { + return value | (offset_t(1) << kIndexMarkBitPosition); +} +constexpr inline offset_t UnmarkIndex(offset_t value) { + return value & ~(offset_t(1) << kIndexMarkBitPosition); +} + +// Constant as placeholder for non-existing offset for an index. +constexpr offset_t kUnusedIndex = offset_t(-1); +static_assert(IsMarked(kUnusedIndex), "kUnusedIndex must be marked"); + +// An Equivalence is a block of length |length| that approximately match in +// |old_image| at an offset of |src_offset| and in |new_image| at an offset of +// |dst_offset|. +struct Equivalence { + offset_t src_offset; + offset_t dst_offset; + offset_t length; + + offset_t src_end() const { return src_offset + length; } + offset_t dst_end() const { return dst_offset + length; } +}; + +inline bool operator==(const Equivalence& a, const Equivalence& b) { + return a.src_offset == b.src_offset && a.dst_offset == b.dst_offset && + a.length == b.length; +} + +// Same as Equivalence, but with a similarity score. This is only used when +// generating the patch. +struct EquivalenceCandidate { + Equivalence eq; + double similarity; +}; + +// Enumerations for supported executables. +enum ExecutableType : uint32_t { + kExeTypeUnknown = UINT32_MAX, + kExeTypeNoOp = 0, + kExeTypeWin32X86 = 1, + kExeTypeWin32X64 = 2, + kExeTypeElfX86 = 3, + kExeTypeElfX64 = 4, + kExeTypeElfArm32 = 5, + kExeTypeElfAArch64 = 6, + kExeTypeDex = 7, + kNumExeType +}; + +// A region in an image with associated executable type |exe_type|. If +// |exe_type == kExeTypeNoOp|, then the Element represents a region of raw data. +struct Element : public BufferRegion { + Element() = default; + constexpr Element(const BufferRegion& region_in, ExecutableType exe_type_in) + : BufferRegion(region_in), exe_type(exe_type_in) {} + constexpr explicit Element(const BufferRegion& region_in) + : BufferRegion(region_in), exe_type(kExeTypeNoOp) {} + + // Similar to lo() and hi(), but returns values in offset_t. + offset_t BeginOffset() const { return base::checked_cast(lo()); } + offset_t EndOffset() const { return base::checked_cast(hi()); } + + BufferRegion region() const { return {offset, size}; } + + friend bool operator==(const Element& a, const Element& b) { + return a.exe_type == b.exe_type && a.offset == b.offset && a.size == b.size; + } + + ExecutableType exe_type; +}; + +// A matched pair of Elements. +struct ElementMatch { + bool IsValid() const { return old_element.exe_type == new_element.exe_type; } + ExecutableType exe_type() const { return old_element.exe_type; } + + Element old_element; + Element new_element; +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_IMAGE_UTILS_H_ diff --git a/image_utils_unittest.cc b/image_utils_unittest.cc new file mode 100644 index 0000000..7cae9d2 --- /dev/null +++ b/image_utils_unittest.cc @@ -0,0 +1,77 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/image_utils.h" + +#include "base/logging.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +TEST(ImageUtilsTest, Bitness) { + EXPECT_EQ(4U, WidthOf(kBit32)); + EXPECT_EQ(8U, WidthOf(kBit64)); +} + +TEST(ImageUtilsTest, IsMarked) { + EXPECT_FALSE(IsMarked(0x00000000)); + EXPECT_TRUE(IsMarked(0x80000000)); + + EXPECT_FALSE(IsMarked(0x00000001)); + EXPECT_TRUE(IsMarked(0x80000001)); + + EXPECT_FALSE(IsMarked(0x70000000)); + EXPECT_TRUE(IsMarked(0xF0000000)); + + EXPECT_FALSE(IsMarked(0x7FFFFFFF)); + EXPECT_TRUE(IsMarked(0xFFFFFFFF)); + + EXPECT_FALSE(IsMarked(0x70000000)); + EXPECT_TRUE(IsMarked(0xC0000000)); + + EXPECT_FALSE(IsMarked(0x0000BEEF)); + EXPECT_TRUE(IsMarked(0x8000BEEF)); +} + +TEST(ImageUtilsTest, MarkIndex) { + EXPECT_EQ(offset_t(0x80000000), MarkIndex(0x00000000)); + EXPECT_EQ(offset_t(0x80000000), MarkIndex(0x80000000)); + + EXPECT_EQ(offset_t(0x80000001), MarkIndex(0x00000001)); + EXPECT_EQ(offset_t(0x80000001), MarkIndex(0x80000001)); + + EXPECT_EQ(offset_t(0xF0000000), MarkIndex(0x70000000)); + EXPECT_EQ(offset_t(0xF0000000), MarkIndex(0xF0000000)); + + EXPECT_EQ(offset_t(0xFFFFFFFF), MarkIndex(0x7FFFFFFF)); + EXPECT_EQ(offset_t(0xFFFFFFFF), MarkIndex(0xFFFFFFFF)); + + EXPECT_EQ(offset_t(0xC0000000), MarkIndex(0x40000000)); + EXPECT_EQ(offset_t(0xC0000000), MarkIndex(0xC0000000)); + + EXPECT_EQ(offset_t(0x8000BEEF), MarkIndex(0x0000BEEF)); + EXPECT_EQ(offset_t(0x8000BEEF), MarkIndex(0x8000BEEF)); +} + +TEST(ImageUtilsTest, UnmarkIndex) { + EXPECT_EQ(offset_t(0x00000000), UnmarkIndex(0x00000000)); + EXPECT_EQ(offset_t(0x00000000), UnmarkIndex(0x80000000)); + + EXPECT_EQ(offset_t(0x00000001), UnmarkIndex(0x00000001)); + EXPECT_EQ(offset_t(0x00000001), UnmarkIndex(0x80000001)); + + EXPECT_EQ(offset_t(0x70000000), UnmarkIndex(0x70000000)); + EXPECT_EQ(offset_t(0x70000000), UnmarkIndex(0xF0000000)); + + EXPECT_EQ(offset_t(0x7FFFFFFF), UnmarkIndex(0x7FFFFFFF)); + EXPECT_EQ(offset_t(0x7FFFFFFF), UnmarkIndex(0xFFFFFFFF)); + + EXPECT_EQ(offset_t(0x40000000), UnmarkIndex(0x40000000)); + EXPECT_EQ(offset_t(0x40000000), UnmarkIndex(0xC0000000)); + + EXPECT_EQ(offset_t(0x0000BEEF), UnmarkIndex(0x0000BEEF)); + EXPECT_EQ(offset_t(0x0000BEEF), UnmarkIndex(0x8000BEEF)); +} + +} // namespace zucchini diff --git a/integration_test.cc b/integration_test.cc new file mode 100644 index 0000000..b0ec864 --- /dev/null +++ b/integration_test.cc @@ -0,0 +1,104 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include + +#include +#include +#include + +#include "base/files/file_path.h" +#include "base/files/memory_mapped_file.h" +#include "base/optional.h" +#include "base/path_service.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/patch_reader.h" +#include "components/zucchini/patch_writer.h" +#include "components/zucchini/zucchini.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +base::FilePath MakeTestPath(const std::string& filename) { + base::FilePath path; + DCHECK(PathService::Get(base::DIR_SOURCE_ROOT, &path)); + return path.AppendASCII("chrome") + .AppendASCII("installer") + .AppendASCII("zucchini") + .AppendASCII("testdata") + .AppendASCII(filename); +} + +void TestGenApply(const std::string& old_filename, + const std::string& new_filename, + bool raw) { + base::FilePath old_path = MakeTestPath(old_filename); + base::FilePath new_path = MakeTestPath(new_filename); + + base::MemoryMappedFile old_file; + ASSERT_TRUE(old_file.Initialize(old_path)); + + base::MemoryMappedFile new_file; + ASSERT_TRUE(new_file.Initialize(new_path)); + + ConstBufferView old_region(old_file.data(), old_file.length()); + ConstBufferView new_region(new_file.data(), new_file.length()); + + EnsemblePatchWriter patch_writer(old_region, new_region); + + // Generate patch from "old" to "new". + ASSERT_EQ(status::kStatusSuccess, + raw ? GenerateRaw(old_region, new_region, &patch_writer) + : GenerateEnsemble(old_region, new_region, &patch_writer)); + + size_t patch_size = patch_writer.SerializedSize(); + EXPECT_GE(patch_size, 80U); // Minimum size is empty patch. + // TODO(etiennep): Add check on maximum expected size. + + std::vector patch_buffer(patch_writer.SerializedSize()); + patch_writer.SerializeInto({patch_buffer.data(), patch_buffer.size()}); + + // Read back generated patch. + base::Optional patch_reader = + EnsemblePatchReader::Create({patch_buffer.data(), patch_buffer.size()}); + ASSERT_TRUE(patch_reader.has_value()); + + // Check basic properties. + EXPECT_TRUE(patch_reader->CheckOldFile(old_region)); + EXPECT_TRUE(patch_reader->CheckNewFile(new_region)); + EXPECT_EQ(old_file.length(), patch_reader->header().old_size); + // If new_size doesn't match expectation, the function is aborted. + ASSERT_EQ(new_file.length(), patch_reader->header().new_size); + + // Apply patch to "old" to get "patched new", ensure it's identical to "new". + std::vector patched_new_buffer(new_region.size()); + ASSERT_EQ(status::kStatusSuccess, + Apply(old_region, *patch_reader, + {patched_new_buffer.data(), patched_new_buffer.size()})); + + // Note that |new_region| and |patched_new_buffer| are the same size. + EXPECT_TRUE(std::equal(new_region.begin(), new_region.end(), + patched_new_buffer.begin())); +} + +TEST(EndToEndTest, GenApplyRaw) { + TestGenApply("setup1.exe", "setup2.exe", true); + TestGenApply("chrome64_1.exe", "chrome64_2.exe", true); +} + +TEST(EndToEndTest, GenApplyIdentity) { + TestGenApply("setup1.exe", "setup1.exe", false); +} + +TEST(EndToEndTest, GenApplySimple) { + TestGenApply("setup1.exe", "setup2.exe", false); + TestGenApply("setup2.exe", "setup1.exe", false); + TestGenApply("chrome64_1.exe", "chrome64_2.exe", false); +} + +TEST(EndToEndTest, GenApplyCross) { + TestGenApply("setup1.exe", "chrome64_1.exe", false); +} + +} // namespace zucchini diff --git a/io_utils.cc b/io_utils.cc new file mode 100644 index 0000000..aa493d0 --- /dev/null +++ b/io_utils.cc @@ -0,0 +1,52 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/io_utils.h" + +#include + +namespace zucchini { + +/******** LimitedOutputStream::StreamBuf ********/ + +LimitedOutputStream::StreamBuf::StreamBuf(std::ostream& os, int limit) + : os_(os), limit_(limit) {} + +LimitedOutputStream::StreamBuf::~StreamBuf() { + // Display warning in case we forget to flush data with std::endl. + if (!str().empty()) { + std::cerr << "Warning: LimitedOutputStream has " << str().length() + << " bytes of unflushed output." << std::endl; + } +} + +int LimitedOutputStream::StreamBuf::sync() { + if (full()) { + str(""); + return 0; + } + os_ << str(); + str(""); + if (++counter_ >= limit_) + os_ << "(Additional output suppressed)\n"; + os_.flush(); + return 0; +} + +/******** LimitedOutputStream ********/ + +LimitedOutputStream::LimitedOutputStream(std::ostream& os, int limit) + : std::ostream(&buf_), buf_(os, limit) {} + +/******** PrefixSep ********/ + +std::ostream& operator<<(std::ostream& ostr, PrefixSep& obj) { + if (obj.first_) + obj.first_ = false; + else + ostr << obj.sep_str_; + return ostr; +} + +} // namespace zucchini diff --git a/io_utils.h b/io_utils.h new file mode 100644 index 0000000..56f7075 --- /dev/null +++ b/io_utils.h @@ -0,0 +1,146 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_IO_UTILS_H_ +#define COMPONENTS_ZUCCHINI_IO_UTILS_H_ + +#include + +#include +#include +#include +#include +#include + +#include "base/macros.h" + +namespace zucchini { + +// An std::ostream wrapper that that limits number of std::endl lines to output, +// useful for preventing excessive debug message output. Usage requires some +// work by the caller. Sample: +// static LimitedOutputStream los(std::cerr, 10); +// if (!los.full()) { +// ... // Prepare message. Block may be skipped so don't do other work! +// los << message; +// los << std::endl; // Important! +// } +class LimitedOutputStream : public std::ostream { + private: + class StreamBuf : public std::stringbuf { + public: + StreamBuf(std::ostream& os, int limit); + ~StreamBuf() override; + + int sync() override; + bool full() const { return counter_ >= limit_; } + + private: + std::ostream& os_; + const int limit_; + int counter_ = 0; + }; + + public: + LimitedOutputStream(std::ostream& os, int limit); + bool full() const { return buf_.full(); } + + private: + StreamBuf buf_; + + DISALLOW_COPY_AND_ASSIGN(LimitedOutputStream); +}; + +// A class to render hexadecimal numbers for std::ostream with 0-padding. This +// is more concise and flexible than stateful STL manipulator alternatives; so: +// std::ios old_fmt(nullptr); +// old_fmt.copyfmt(std::cout); +// std::cout << std::uppercase << std::hex; +// std::cout << std::setfill('0') << std::setw(8) << int_data << std::endl; +// std::cout.copyfmt(old_fmt); +// can be expressed as: +// std::cout << AxHex<8>(int_data) << std::endl; +template +struct AsHex { + explicit AsHex(T value_in) : value(value_in) {} + T value; +}; + +template +std::ostream& operator<<(std::ostream& os, const AsHex& as_hex) { + char buf[N + 1]; + buf[N] = '\0'; + T value = as_hex.value; + for (int i = N - 1; i >= 0; --i, value >>= 4) + buf[i] = "0123456789ABCDEF"[static_cast(value & 0x0F)]; + if (value) + os << "..."; // To indicate data truncation, or negative values. + os << buf; + return os; +} + +// An output manipulator to simplify printing list separators. Sample usage: +// PrefixSep sep(","); +// for (int i : {3, 1, 4, 1, 5, 9}) +// std::cout << sep << i; +// std::cout << std::endl; // Outputs "3,1,4,1,5,9\n". +class PrefixSep { + public: + explicit PrefixSep(const std::string& sep_str) : sep_str_(sep_str) {} + + friend std::ostream& operator<<(std::ostream& ostr, PrefixSep& obj); + + private: + std::string sep_str_; + bool first_ = true; + + DISALLOW_COPY_AND_ASSIGN(PrefixSep); +}; + +// An input manipulator that dictates the expected next character in +// |std::istream|, and invalidates the stream if expectation is not met. +class EatChar { + public: + explicit EatChar(char ch) : ch_(ch) {} + + friend inline std::istream& operator>>(std::istream& istr, + const EatChar& obj) { + if (!istr.fail() && istr.get() != obj.ch_) + istr.setstate(std::ios_base::failbit); + return istr; + } + + private: + char ch_; + + DISALLOW_COPY_AND_ASSIGN(EatChar); +}; + +// An input manipulator that reads an unsigned integer from |std::istream|, +// and invalidates the stream on failure. Intolerant of leading white spaces, +template +class StrictUInt { + public: + explicit StrictUInt(T& var) : var_(var) {} + StrictUInt(const StrictUInt&) = default; + + friend std::istream& operator>>(std::istream& istr, StrictUInt obj) { + if (!istr.fail() && !::isdigit(istr.peek())) { + istr.setstate(std::ios_base::failbit); + return istr; + } + return istr >> obj.var_; + } + + private: + T& var_; +}; + +// Stub out uint8_t: istream treats it as char, and value won't be read as int! +template <> +struct StrictUInt {}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_IO_UTILS_H_ diff --git a/io_utils_unittest.cc b/io_utils_unittest.cc new file mode 100644 index 0000000..f3e82ec --- /dev/null +++ b/io_utils_unittest.cc @@ -0,0 +1,161 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/io_utils.h" + +#include + +#include +#include + +#include "base/logging.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +TEST(IOUtilsTest, LimitedOutputStream) { + std::ostringstream oss; + LimitedOutputStream los(oss, 3); + EXPECT_FALSE(los.full()); + EXPECT_EQ("", oss.str()); + // Line 1. + los << "a" << 1 << "b" << 2 << "c" << 3 << std::endl; + EXPECT_FALSE(los.full()); + EXPECT_EQ("a1b2c3\n", oss.str()); + // Line 2. + oss.str(""); + los << "\r\r\n\n" << std::endl; // Manual new lines don't count. + EXPECT_FALSE(los.full()); + EXPECT_EQ("\r\r\n\n\n", oss.str()); + // Line 3. + oss.str(""); + los << "blah" << 137; + EXPECT_FALSE(los.full()); + los << std::endl; + EXPECT_TRUE(los.full()); + EXPECT_EQ("blah137\n(Additional output suppressed)\n", oss.str()); + // Not testing adding more lines: the behavior is undefined since we rely on + // caller suppressing output if |los.full()| is true. +} + +TEST(IOUtilsTest, AsHex) { + std::ostringstream oss; + // Helper for single-line tests. Eats dummy std::ostream& from operator<<(). + auto extract = [&oss](std::ostream&) -> std::string { + std::string ret = oss.str(); + oss.str(""); + return ret; + }; + + EXPECT_EQ("00000000", extract(oss << AsHex<8>(0))); + EXPECT_EQ("12345678", extract(oss << AsHex<8>(0x12345678U))); + EXPECT_EQ("9ABCDEF0", extract(oss << AsHex<8>(0x9ABCDEF0U))); + EXPECT_EQ("(00000064)", extract(oss << "(" << AsHex<8>(100) << ")")); + EXPECT_EQ("00FFFF", extract(oss << AsHex<6>(0xFFFFU))); + EXPECT_EQ("FFFF", extract(oss << AsHex<4>(0xFFFFU))); + EXPECT_EQ("...FF", extract(oss << AsHex<2>(0xFFFFU))); + EXPECT_EQ("...00", extract(oss << AsHex<2>(0x100U))); + EXPECT_EQ("FF\n", extract(oss << AsHex<2>(0xFFU) << std::endl)); + EXPECT_EQ("132457689BACDEF0", + extract(oss << AsHex<16, uint64_t>(0x132457689BACDEF0LLU))); + EXPECT_EQ("000000000001", extract(oss << AsHex<12, uint8_t>(1))); + EXPECT_EQ("00000089", extract(oss << AsHex<8, int32_t>(137))); + EXPECT_EQ("...FFFFFFFF", extract(oss << AsHex<8, int32_t>(-1))); + EXPECT_EQ("7FFF", extract(oss << AsHex<4, int16_t>(0x7FFFU))); + EXPECT_EQ("...8000", extract(oss << AsHex<4, int16_t>(0x8000U))); + EXPECT_EQ("8000", extract(oss << AsHex<4, uint16_t>(0x8000U))); +} + +TEST(IOUtilsTest, PrefixSep) { + std::ostringstream oss; + PrefixSep sep(","); + oss << sep << 3; + EXPECT_EQ("3", oss.str()); + oss << sep << 1; + EXPECT_EQ("3,1", oss.str()); + oss << sep << 4 << sep << 1 << sep << "59"; + EXPECT_EQ("3,1,4,1,59", oss.str()); +} + +TEST(IOUtilsTest, PrefixSepAlt) { + std::ostringstream oss; + PrefixSep sep(" "); + oss << sep << 3; + EXPECT_EQ("3", oss.str()); + oss << sep << 1; + EXPECT_EQ("3 1", oss.str()); + oss << sep << 4 << sep << 1 << sep << "59"; + EXPECT_EQ("3 1 4 1 59", oss.str()); +} + +TEST(IOUtilsTest, EatChar) { + std::istringstream main_iss; + // Helper for single-line tests. + auto iss = [&main_iss](const std::string s) -> std::istringstream& { + main_iss.clear(); + main_iss.str(s); + return main_iss; + }; + + EXPECT_TRUE(iss("a,1") >> EatChar('a') >> EatChar(',') >> EatChar('1')); + EXPECT_FALSE(iss("a,a") >> EatChar('a') >> EatChar(',') >> EatChar('1')); + EXPECT_FALSE(iss("a") >> EatChar('a') >> EatChar(',') >> EatChar('1')); + EXPECT_FALSE(iss("x") >> EatChar('X')); + EXPECT_TRUE(iss("_\n") >> EatChar('_') >> EatChar('\n')); +} + +TEST(IOUtilsTest, StrictUInt) { + std::istringstream main_iss; + // Helper for single-line tests. + auto iss = [&main_iss](const std::string& s) -> std::istringstream& { + main_iss.clear(); + main_iss.str(s); + return main_iss; + }; + + uint32_t u32 = 0; + EXPECT_TRUE(iss("1234") >> StrictUInt(u32)); + EXPECT_EQ(uint32_t(1234), u32); + EXPECT_TRUE(iss("001234") >> StrictUInt(u32)); + EXPECT_EQ(uint32_t(1234), u32); + EXPECT_FALSE(iss("blahblah") >> StrictUInt(u32)); + EXPECT_EQ(uint32_t(1234), u32); // No overwrite on failure. + EXPECT_TRUE(iss("137suffix") >> StrictUInt(u32)); + EXPECT_EQ(uint32_t(137), u32); + EXPECT_FALSE(iss(" 1234") >> StrictUInt(u32)); + EXPECT_FALSE(iss("-1234") >> StrictUInt(u32)); + + uint16_t u16 = 0; + EXPECT_TRUE(iss("65535") >> StrictUInt(u16)); + EXPECT_EQ(uint16_t(65535), u16); + EXPECT_FALSE(iss("65536") >> StrictUInt(u16)); // Overflow. + + uint64_t u64 = 0; + EXPECT_TRUE(iss("1000000000001") >> StrictUInt(u64)); + EXPECT_EQ(uint64_t(1000000000001LL), u64); + + // uint8_t is stubbed out, so no tests for it. +} + +TEST(IOUtilsTest, ParseSimpleEquations) { + std::istringstream iss("123+456=579,4-3=1"); + uint32_t a = 0; + uint32_t b = 0; + uint32_t c = 0; + EXPECT_TRUE(iss >> StrictUInt(a) >> EatChar('+') >> + StrictUInt(b) >> EatChar('=') >> + StrictUInt(c)); + EXPECT_EQ(uint32_t(123), a); + EXPECT_EQ(uint32_t(456), b); + EXPECT_EQ(uint32_t(579), c); + EXPECT_TRUE(iss >> EatChar(',')); + EXPECT_TRUE(iss >> StrictUInt(a) >> EatChar('-') >> + StrictUInt(b) >> EatChar('=') >> + StrictUInt(c)); + EXPECT_EQ(uint32_t(4), a); + EXPECT_EQ(uint32_t(3), b); + EXPECT_EQ(uint32_t(1), c); +} + +} // namespace zucchini diff --git a/label_manager.cc b/label_manager.cc new file mode 100644 index 0000000..4b74d8b --- /dev/null +++ b/label_manager.cc @@ -0,0 +1,93 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/label_manager.h" + +#include +#include + +#include "base/logging.h" +#include "components/zucchini/algorithm.h" + +namespace zucchini { + +/******** BaseLabelManager ********/ + +BaseLabelManager::BaseLabelManager() = default; +BaseLabelManager::BaseLabelManager(const BaseLabelManager&) = default; +BaseLabelManager::~BaseLabelManager() = default; + +/******** OrderedLabelManager ********/ + +OrderedLabelManager::OrderedLabelManager() = default; +OrderedLabelManager::OrderedLabelManager(const OrderedLabelManager&) = default; +OrderedLabelManager::~OrderedLabelManager() = default; + +offset_t OrderedLabelManager::IndexOfOffset(offset_t offset) const { + auto it = std::lower_bound(labels_.begin(), labels_.end(), offset); + if (it != labels_.end() && *it == offset) + return static_cast(it - labels_.begin()); + return kUnusedIndex; +} + +void OrderedLabelManager::InsertOffsets(const std::vector& offsets) { + labels_.insert(labels_.end(), offsets.begin(), offsets.end()); + SortAndUniquify(&labels_); +} + +void OrderedLabelManager::InsertTargets(ReferenceReader&& reader) { + for (auto ref = reader.GetNext(); ref.has_value(); ref = reader.GetNext()) + labels_.push_back(ref->target); + SortAndUniquify(&labels_); +} + +/******** UnorderedLabelManager ********/ + +UnorderedLabelManager::UnorderedLabelManager() = default; +UnorderedLabelManager::UnorderedLabelManager(const UnorderedLabelManager&) = + default; +UnorderedLabelManager::~UnorderedLabelManager() = default; + +offset_t UnorderedLabelManager::IndexOfOffset(offset_t offset) const { + auto it = labels_map_.find(offset); + return it != labels_map_.end() ? it->second : kUnusedIndex; +} + +void UnorderedLabelManager::Init(std::vector&& labels) { + labels_ = std::move(labels); + labels_map_.clear(); + gap_idx_ = 0; + + size_t used_index_count = 0; + for (offset_t label : labels) { + if (label != kUnusedIndex) + ++used_index_count; + } + labels_map_.reserve(used_index_count); + + offset_t size = static_cast(labels_.size()); + for (offset_t idx = 0; idx < size; ++idx) { + if (labels_[idx] != kUnusedIndex) { + DCHECK(labels_map_.find(labels_[idx]) == labels_map_.end()); + labels_map_[labels_[idx]] = idx; + } + } +} + +void UnorderedLabelManager::InsertNewOffset(offset_t offset) { + DCHECK(labels_map_.find(offset) == labels_map_.end()); + // Look for unused entry in |labels_|. + auto pos = std::find(labels_.begin() + gap_idx_, labels_.end(), kUnusedIndex); + // Either replace the unused entry, or insert at end. + if (pos != labels_.end()) { + gap_idx_ = pos - labels_.begin(); + *pos = offset; + } else { + gap_idx_ = labels_.size(); + labels_.push_back(offset); + } + labels_map_[offset] = static_cast(gap_idx_); +} + +} // namespace zucchini diff --git a/label_manager.h b/label_manager.h new file mode 100644 index 0000000..7c6606d --- /dev/null +++ b/label_manager.h @@ -0,0 +1,113 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_LABEL_MANAGER_H_ +#define COMPONENTS_ZUCCHINI_LABEL_MANAGER_H_ + +#include + +#include +#include + +#include "base/logging.h" +#include "components/zucchini/image_utils.h" + +namespace zucchini { + +// A LabelManager stores a list of Labels. By definition, all offsets and +// indices must be distinct. It also provides functions to: +// - Get the offset of a stored index. +// - Get the index of a stored offset. +// - Create new Labels. + +// Base class for OrderedLabelManager and UnorderedLabelManager. +class BaseLabelManager { + public: + BaseLabelManager(); + BaseLabelManager(const BaseLabelManager&); + virtual ~BaseLabelManager(); + + // Returns the offset of a given |index| if it is associated with a + // stored Label, or |kUnusedIndex| otherwise. + offset_t OffsetOfIndex(offset_t index) const { + return index < labels_.size() ? labels_[index] : kUnusedIndex; + } + + // If |offset| has an associated stored Label, returns its index. Otherwise + // returns |kUnusedIndex|. + virtual offset_t IndexOfOffset(offset_t offset) const = 0; + + size_t size() const { return labels_.size(); } + + protected: + // Main storage of distinct offsets. This allows O(1) look up of an offset + // from its index. UnorderedLabelManager may contain "gaps" with + // |kUnusedIndex|. + std::vector labels_; +}; + +// OrderedLabelManager is a LabelManager that prioritizes memory efficiency, +// storing Labels as a sorted list of offsets in |labels_|. Label insertions +// are performed in batch to reduce costs. Index-of-offset lookup is O(lg n) +// (binary search). +class OrderedLabelManager : public BaseLabelManager { + public: + OrderedLabelManager(); + OrderedLabelManager(const OrderedLabelManager&); + ~OrderedLabelManager() override; + + // BaseLabelManager: + offset_t IndexOfOffset(offset_t offset) const override; + + // Creates and stores a new Label for each unique offset in |offsets|. This + // invalidates all previous Label lookups. + void InsertOffsets(const std::vector& offsets); + + // For each unique target from |reader|, creates and stores a new Label. This + // invalidates all previous Label lookups. + void InsertTargets(ReferenceReader&& reader); + + const std::vector& Labels() const { return labels_; } +}; + +// UnorderedLabelManager is a LabelManager that does not requires Labels to be +// sorted. Therefore, it can be initialized from Labels given in any order. It +// also prioritizes speed for lookup and insertion, but uses more memory than +// OrderedLabelManager. In addition to using |labels_| to store *unsorted* +// distinct offsets, an unordered_map |labels_map_| is used for index-of-offset +// lookup. +class UnorderedLabelManager : public BaseLabelManager { + public: + UnorderedLabelManager(); + UnorderedLabelManager(const UnorderedLabelManager&); + ~UnorderedLabelManager() override; + + // BaseLabelManager: + offset_t IndexOfOffset(offset_t offset) const override; + + // Clears and reinitializes all stored data. Requires that |labels| consists + // of unique offsets, but it may have "gaps" in the form of |kUnusedIndex|. + void Init(std::vector&& labels); + + // Creates a new Label for |offset|. Behavior is undefined if |offset| is + // already associated with a stored Label. If |kUnusedIndex| gaps exist, tries + // to reused indices to create new Labels, otherwise it allocates new indices. + // Previous lookup results involving stored offsets / indexes remain valid. + void InsertNewOffset(offset_t offset); + + bool ContainsOffset(offset_t offset) const { + return labels_map_.find(offset) != labels_map_.end(); + } + + private: + // Inverse map of |labels_| (excludes |kUnusedIndex|). + std::unordered_map labels_map_; + + // Index into |label_| to scan for |kUnusedIndex| entry in |labels_|. + size_t gap_idx_ = 0; +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_LABEL_MANAGER_H_ diff --git a/label_manager_unittest.cc b/label_manager_unittest.cc new file mode 100644 index 0000000..11dcdf9 --- /dev/null +++ b/label_manager_unittest.cc @@ -0,0 +1,137 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/label_manager.h" + +#include +#include + +#include "components/zucchini/test_reference_reader.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +namespace { + +constexpr auto BAD = kUnusedIndex; +using OffsetVector = std::vector; + +} // namespace + +TEST(LabelManagerTest, Ordered) { + OrderedLabelManager label_manager; + EXPECT_EQ(OffsetVector(), label_manager.Labels()); + EXPECT_EQ(BAD, label_manager.OffsetOfIndex(0)); + EXPECT_EQ(BAD, label_manager.IndexOfOffset(0)); + + // Initialize with some data, test direct lookups. + label_manager.InsertOffsets({0x33, 0x11, 0x44, 0x11}); + EXPECT_EQ(OffsetVector({0x11, 0x33, 0x44}), label_manager.Labels()); + + EXPECT_EQ(0x11U, label_manager.OffsetOfIndex(0)); + EXPECT_EQ(0x33U, label_manager.OffsetOfIndex(1)); + EXPECT_EQ(0x44U, label_manager.OffsetOfIndex(2)); + EXPECT_EQ(BAD, label_manager.OffsetOfIndex(3)); + EXPECT_EQ(BAD, label_manager.OffsetOfIndex(4)); + + EXPECT_EQ(0U, label_manager.IndexOfOffset(0x11)); + EXPECT_EQ(1U, label_manager.IndexOfOffset(0x33)); + EXPECT_EQ(2U, label_manager.IndexOfOffset(0x44)); + EXPECT_EQ(BAD, label_manager.IndexOfOffset(0x00)); + EXPECT_EQ(BAD, label_manager.IndexOfOffset(0x77)); + + // Insert more data, note that lookup results changed. + label_manager.InsertOffsets({0x66, 0x11, 0x11, 0x44, 0x00}); + EXPECT_EQ(OffsetVector({0x00, 0x11, 0x33, 0x44, 0x66}), + label_manager.Labels()); + + EXPECT_EQ(0x00U, label_manager.OffsetOfIndex(0)); + EXPECT_EQ(0x11U, label_manager.OffsetOfIndex(1)); + EXPECT_EQ(0x33U, label_manager.OffsetOfIndex(2)); + EXPECT_EQ(0x44U, label_manager.OffsetOfIndex(3)); + EXPECT_EQ(0x66U, label_manager.OffsetOfIndex(4)); + + EXPECT_EQ(1U, label_manager.IndexOfOffset(0x11)); + EXPECT_EQ(2U, label_manager.IndexOfOffset(0x33)); + EXPECT_EQ(3U, label_manager.IndexOfOffset(0x44)); + EXPECT_EQ(0U, label_manager.IndexOfOffset(0x00)); + EXPECT_EQ(BAD, label_manager.IndexOfOffset(0x77)); +} + +TEST(LabelManagerTest, OrderedInsertTargets) { + OrderedLabelManager label_manager; + + // Initialize with some data. |location| does not matter. + TestReferenceReader reader1({{0, 0x33}, {1, 0x11}, {2, 0x44}, {3, 0x11}}); + label_manager.InsertTargets(std::move(reader1)); + EXPECT_EQ(OffsetVector({0x11, 0x33, 0x44}), label_manager.Labels()); + + // Insert more data. + TestReferenceReader reader2( + {{0, 0x66}, {1, 0x11}, {2, 0x11}, {3, 0x44}, {4, 0x00}}); + label_manager.InsertTargets(std::move(reader2)); + EXPECT_EQ(OffsetVector({0x00, 0x11, 0x33, 0x44, 0x66}), + label_manager.Labels()); +} + +TEST(LabelManagerTest, Unordered) { + UnorderedLabelManager label_manager; + EXPECT_EQ(BAD, label_manager.OffsetOfIndex(0)); + EXPECT_EQ(BAD, label_manager.IndexOfOffset(0)); + + // Initialize with some data, test direct lookups. + label_manager.Init(OffsetVector({0x33, BAD, BAD, 0x11, 0x44, BAD})); + + EXPECT_EQ(0x33U, label_manager.OffsetOfIndex(0)); + EXPECT_EQ(BAD, label_manager.OffsetOfIndex(1)); + EXPECT_EQ(BAD, label_manager.OffsetOfIndex(2)); + EXPECT_EQ(0x11U, label_manager.OffsetOfIndex(3)); + EXPECT_EQ(0x44U, label_manager.OffsetOfIndex(4)); + EXPECT_EQ(BAD, label_manager.OffsetOfIndex(5)); + EXPECT_EQ(BAD, label_manager.OffsetOfIndex(6)); + + EXPECT_EQ(3U, label_manager.IndexOfOffset(0x11)); + EXPECT_EQ(0U, label_manager.IndexOfOffset(0x33)); + EXPECT_EQ(4U, label_manager.IndexOfOffset(0x44)); + EXPECT_EQ(BAD, label_manager.IndexOfOffset(0x00)); + EXPECT_EQ(BAD, label_manager.IndexOfOffset(0x66)); + + // Insert one offset, assumed to be new. + label_manager.InsertNewOffset(0x00); + EXPECT_EQ(0x33U, label_manager.OffsetOfIndex(0)); + EXPECT_EQ(0x00U, label_manager.OffsetOfIndex(1)); + EXPECT_EQ(BAD, label_manager.OffsetOfIndex(2)); + EXPECT_EQ(0x11U, label_manager.OffsetOfIndex(3)); + EXPECT_EQ(0x44U, label_manager.OffsetOfIndex(4)); + + EXPECT_EQ(1U, label_manager.IndexOfOffset(0x00)); + EXPECT_EQ(3U, label_manager.IndexOfOffset(0x11)); + EXPECT_EQ(0U, label_manager.IndexOfOffset(0x33)); + EXPECT_EQ(4U, label_manager.IndexOfOffset(0x44)); + EXPECT_EQ(BAD, label_manager.IndexOfOffset(0x66)); + + // Insert few more offset, assumed to be new. + label_manager.InsertNewOffset(0x22); + label_manager.InsertNewOffset(0x77); + label_manager.InsertNewOffset(0x55); + + EXPECT_EQ(0x33U, label_manager.OffsetOfIndex(0)); + EXPECT_EQ(0x00U, label_manager.OffsetOfIndex(1)); + EXPECT_EQ(0x22U, label_manager.OffsetOfIndex(2)); + EXPECT_EQ(0x11U, label_manager.OffsetOfIndex(3)); + EXPECT_EQ(0x44U, label_manager.OffsetOfIndex(4)); + EXPECT_EQ(0x77U, label_manager.OffsetOfIndex(5)); + EXPECT_EQ(0x55U, label_manager.OffsetOfIndex(6)); + + EXPECT_EQ(1U, label_manager.IndexOfOffset(0x00)); + EXPECT_EQ(3U, label_manager.IndexOfOffset(0x11)); + EXPECT_EQ(2U, label_manager.IndexOfOffset(0x22)); + EXPECT_EQ(0U, label_manager.IndexOfOffset(0x33)); + EXPECT_EQ(4U, label_manager.IndexOfOffset(0x44)); + EXPECT_EQ(6U, label_manager.IndexOfOffset(0x55)); + EXPECT_EQ(BAD, label_manager.IndexOfOffset(0x66)); + EXPECT_EQ(5U, label_manager.IndexOfOffset(0x77)); +} + +} // namespace zucchini diff --git a/main_utils.cc b/main_utils.cc new file mode 100644 index 0000000..b874dd0 --- /dev/null +++ b/main_utils.cc @@ -0,0 +1,193 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/main_utils.h" + +#include + +#include +#include +#include + +#include "base/command_line.h" +#include "base/logging.h" +#include "base/time/time.h" +#include "build/build_config.h" +#include "components/zucchini/io_utils.h" +#include "components/zucchini/zucchini_commands.h" + +#if defined(OS_WIN) +#include // This include must come first. + +#include +#endif + +namespace { + +#if defined(OS_WIN) +#endif + +/******** Command ********/ + +// Specifications for a Zucchini command. +struct Command { + constexpr Command(const char* name_in, + const char* usage_in, + int num_args_in, + CommandFunction command_function_in) + : name(name_in), + usage(usage_in), + num_args(num_args_in), + command_function(command_function_in) {} + Command(const Command&) = default; + ~Command() = default; + + // Unique name of command. |-name| is used to select from command-line. + const char* const name; + + // Usage help text of command. + const char* const usage; + + // Number of arguments (assumed to be filenames) used by the command. + const int num_args; + + // Main function to run for the command. + const CommandFunction command_function; +}; + +/******** List of Zucchini commands ********/ + +constexpr Command kCommands[] = { + {"gen", "-gen [-raw]", 3, &MainGen}, + {"apply", "-apply ", 3, &MainApply}, + {"read", "-read [-dump]", 1, &MainRead}, + {"detect", "-detect [-dd=format#]", 1, &MainDetect}, + {"match", "-match ", 2, &MainMatch}, + {"crc32", "-crc32 ", 1, &MainCrc32}, +}; + +/******** ScopedResourceUsageTracker ********/ + +// A class to track and log system resource usage. +class ScopedResourceUsageTracker { + public: + // Initializes states for tracking. + ScopedResourceUsageTracker() { + start_time_ = base::TimeTicks::Now(); + +#if defined(OS_WIN) + PROCESS_MEMORY_COUNTERS pmc; + if (::GetProcessMemoryInfo(::GetCurrentProcess(), &pmc, sizeof(pmc))) { + start_peak_page_file_usage_ = pmc.PeakPagefileUsage; + start_peak_working_set_size_ = pmc.PeakWorkingSetSize; + } +#endif + } + + // Computes and prints usage. + ~ScopedResourceUsageTracker() { + base::TimeTicks end_time = base::TimeTicks::Now(); + +#if defined(OS_WIN) + size_t cur_peak_page_file_usage = 0; + size_t cur_peak_working_set_size = 0; + PROCESS_MEMORY_COUNTERS pmc; + if (::GetProcessMemoryInfo(::GetCurrentProcess(), &pmc, sizeof(pmc))) { + cur_peak_page_file_usage = pmc.PeakPagefileUsage; + cur_peak_working_set_size = pmc.PeakWorkingSetSize; + } + + LOG(INFO) << "Zucchini.PeakPagefileUsage " + << cur_peak_page_file_usage / 1024 << " KiB"; + LOG(INFO) << "Zucchini.PeakPagefileUsageChange " + << (cur_peak_page_file_usage - start_peak_page_file_usage_) / 1024 + << " KiB"; + LOG(INFO) << "Zucchini.PeakWorkingSetSize " + << cur_peak_working_set_size / 1024 << " KiB"; + LOG(INFO) << "Zucchini.PeakWorkingSetSizeChange " + << (cur_peak_working_set_size - start_peak_working_set_size_) / + 1024 + << " KiB"; +#endif // !defined(OS_MACOSX) + + LOG(INFO) << "Zucchini.TotalTime " << (end_time - start_time_).InSecondsF() + << " s"; + } + + private: + base::TimeTicks start_time_; +#if defined(OS_WIN) + size_t start_peak_page_file_usage_ = 0; + size_t start_peak_working_set_size_ = 0; +#endif // !defined(OS_MACOSX) +}; + +/******** Helper functions ********/ + +// Translates |command_line| arguments to a vector of base::FilePath (expecting +// exactly |expected_count|). On success, writes the results to |paths| and +// returns true. Otherwise returns false. +bool CheckAndGetFilePathParams(const base::CommandLine& command_line, + size_t expected_count, + std::vector* paths) { + const base::CommandLine::StringVector& args = command_line.GetArgs(); + if (args.size() != expected_count) + return false; + + paths->clear(); + paths->reserve(args.size()); + for (const auto& arg : args) + paths->emplace_back(arg); + return true; +} + +// Prints main Zucchini usage text. +void PrintUsage(std::ostream& err) { + err << "Usage:" << std::endl; + for (const Command& command : kCommands) + err << " zucchini " << command.usage << std::endl; +} + +} // namespace + +/******** Exported Functions ********/ + +zucchini::status::Code RunZucchiniCommand(const base::CommandLine& command_line, + std::ostream& out, + std::ostream& err) { + // Look for a command with name that matches input. + const Command* command_use = nullptr; + for (const Command& command : kCommands) { + if (command_line.HasSwitch(command.name)) { + if (command_use) { // Too many commands found. + command_use = nullptr; // Set to null to flag error. + break; + } + command_use = &command; + } + } + + // Expect exactly 1 matching command. If 0 or >= 2, print usage and quit. + if (!command_use) { + err << "Must have exactly one of:" << std::endl; + err << " ["; + zucchini::PrefixSep sep(", "); + for (const Command& command : kCommands) + err << sep << "-" << command.name; + err << "]" << std::endl; + PrintUsage(err); + return zucchini::status::kStatusInvalidParam; + } + + // Try to parse filename arguments. On failure, print usage and quit. + std::vector paths; + if (!CheckAndGetFilePathParams(command_line, command_use->num_args, &paths)) { + err << command_use->usage << std::endl; + PrintUsage(err); + return zucchini::status::kStatusInvalidParam; + } + + ScopedResourceUsageTracker resource_usage_tracker; + return command_use->command_function({command_line, paths, out, err}); +} diff --git a/main_utils.h b/main_utils.h new file mode 100644 index 0000000..addb830 --- /dev/null +++ b/main_utils.h @@ -0,0 +1,35 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_MAIN_UTILS_H_ +#define COMPONENTS_ZUCCHINI_MAIN_UTILS_H_ + +#include + +#include "base/files/file_path.h" +#include "components/zucchini/zucchini.h" + +// Utilities to run Zucchini command based on command-line input, and to print +// help messages. + +namespace base { + +class CommandLine; + +} // namespace base + +// To add a new Zucchini command: +// 1. Declare the command's main function in zucchini_command.h. Its signature +// must match CommandFunction. +// 2. Define the command's main function in zucchini_command.cc. +// 3. Add a new entry into |kCommands| in main_utils.cc. + +// Searches |command_line| for Zucchini commands. If a unique command is found, +// runs it (passes |out| and |err|), and logs resource usage. Otherwise prints +// help message to |err|. Returns Zucchini status code for error handling. +zucchini::status::Code RunZucchiniCommand(const base::CommandLine& command_line, + std::ostream& out, + std::ostream& err); + +#endif // COMPONENTS_ZUCCHINI_MAIN_UTILS_H_ diff --git a/mapped_file.cc b/mapped_file.cc new file mode 100644 index 0000000..13c1afd --- /dev/null +++ b/mapped_file.cc @@ -0,0 +1,70 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/mapped_file.h" + +#include + +#include "base/files/file_util.h" +#include "base/logging.h" +#include "build/build_config.h" + +namespace zucchini { + +MappedFileReader::MappedFileReader(base::File&& file) { + if (!file.IsValid()) { + error_ = "Invalid file."; + return; // |buffer_| will be uninitialized, and therefore invalid. + } + if (!buffer_.Initialize(std::move(file))) { + error_ = "Can't map file to memory."; + } +} + +MappedFileWriter::MappedFileWriter(const base::FilePath& file_path, + base::File&& file, + size_t length) + : file_path_(file_path), delete_behavior_(kManualDeleteOnClose) { + if (!file.IsValid()) { + error_ = "Invalid file."; + return; // |buffer_| will be uninitialized, and therefore invalid. + } + +#if defined(OS_WIN) + file_handle_ = file.Duplicate(); + // Tell the OS to delete the file when all handles are closed. + if (file_handle_.DeleteOnClose(true)) { + delete_behavior_ = kAutoDeleteOnClose; + } else { + error_ = "Failed to mark file for delete-on-close."; + } +#endif // defined(OS_WIN) + + bool is_ok = buffer_.Initialize(std::move(file), {0, length}, + base::MemoryMappedFile::READ_WRITE_EXTEND); + if (!is_ok) { + error_ = "Can't map file to memory."; + } +} + +MappedFileWriter::~MappedFileWriter() { + if (!HasError() && delete_behavior_ == kManualDeleteOnClose && + !file_path_.empty() && !base::DeleteFile(file_path_, false)) { + error_ = "Failed to delete file."; + } +} + +bool MappedFileWriter::Keep() { +#if defined(OS_WIN) + if (delete_behavior_ == kAutoDeleteOnClose && + !file_handle_.DeleteOnClose(false)) { + error_ = "Failed to prevent deletion of file."; + return false; + } +#endif // defined(OS_WIN) + delete_behavior_ = kKeep; + return true; +} + +} // namespace zucchini diff --git a/mapped_file.h b/mapped_file.h new file mode 100644 index 0000000..540f947 --- /dev/null +++ b/mapped_file.h @@ -0,0 +1,83 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_MAPPED_FILE_H_ +#define COMPONENTS_ZUCCHINI_MAPPED_FILE_H_ + +#include +#include + +#include + +#include "base/files/file.h" +#include "base/files/file_path.h" +#include "base/files/memory_mapped_file.h" +#include "base/macros.h" +#include "components/zucchini/buffer_view.h" + +namespace zucchini { + +// A file reader wrapper. +class MappedFileReader { + public: + // Maps |file| to memory for reading. Also validates |file|. Errors are + // available via HasError() and error(). + explicit MappedFileReader(base::File&& file); + + const uint8_t* data() const { return buffer_.data(); } + size_t length() const { return buffer_.length(); } + zucchini::ConstBufferView region() const { return {data(), length()}; } + + bool HasError() { return !error_.empty() || !buffer_.IsValid(); } + const std::string& error() { return error_; } + + private: + std::string error_; + base::MemoryMappedFile buffer_; + + DISALLOW_COPY_AND_ASSIGN(MappedFileReader); +}; + +// A file writer wrapper. The target file is deleted on destruction unless +// Keep() is called. +class MappedFileWriter { + public: + // Maps |file| to memory for writing. |file_path| is needed for auto delete on + // UNIX systems, but can be empty if auto delete is not needed. Errors are + // available via HasError() and error(). + MappedFileWriter(const base::FilePath& file_path, + base::File&& file, + size_t length); + ~MappedFileWriter(); + + uint8_t* data() { return buffer_.data(); } + size_t length() const { return buffer_.length(); } + zucchini::MutableBufferView region() { return {data(), length()}; } + + bool HasError() { return !error_.empty() || !buffer_.IsValid(); } + const std::string& error() { return error_; } + + // Indicates that the file should not be deleted on destruction. Returns true + // iff the operation succeeds. + bool Keep(); + + private: + enum OnCloseDeleteBehavior { + kKeep, + kAutoDeleteOnClose, + kManualDeleteOnClose + }; + + std::string error_; + base::FilePath file_path_; + base::File file_handle_; + base::MemoryMappedFile buffer_; + OnCloseDeleteBehavior delete_behavior_; + + DISALLOW_COPY_AND_ASSIGN(MappedFileWriter); +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_MAPPED_FILE_H_ diff --git a/mapped_file_unittest.cc b/mapped_file_unittest.cc new file mode 100644 index 0000000..e3ee6dc --- /dev/null +++ b/mapped_file_unittest.cc @@ -0,0 +1,61 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/mapped_file.h" + +#include + +#include "base/files/file.h" +#include "base/files/file_path.h" +#include "base/files/file_util.h" +#include "base/files/scoped_temp_dir.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +class MappedFileWriterTest : public testing::Test { + protected: + MappedFileWriterTest() = default; + void SetUp() override { + ASSERT_TRUE(temp_dir_.CreateUniqueTempDir()); + file_path_ = temp_dir_.GetPath().AppendASCII("test-file"); + } + + base::FilePath file_path_; + + private: + base::ScopedTempDir temp_dir_; +}; + +TEST_F(MappedFileWriterTest, Keep) { + EXPECT_FALSE(base::PathExists(file_path_)); + { + using base::File; + File file(file_path_, File::FLAG_CREATE_ALWAYS | File::FLAG_READ | + File::FLAG_WRITE | File::FLAG_SHARE_DELETE | + File::FLAG_CAN_DELETE_ON_CLOSE); + MappedFileWriter file_writer(file_path_, std::move(file), 10); + EXPECT_FALSE(file_writer.HasError()); + EXPECT_TRUE(file_writer.Keep()); + EXPECT_FALSE(file_writer.HasError()); + EXPECT_TRUE(file_writer.error().empty()); + } + EXPECT_TRUE(base::PathExists(file_path_)); +} + +TEST_F(MappedFileWriterTest, DeleteOnClose) { + EXPECT_FALSE(base::PathExists(file_path_)); + { + using base::File; + File file(file_path_, File::FLAG_CREATE_ALWAYS | File::FLAG_READ | + File::FLAG_WRITE | File::FLAG_SHARE_DELETE | + File::FLAG_CAN_DELETE_ON_CLOSE); + MappedFileWriter file_writer(file_path_, std::move(file), 10); + EXPECT_FALSE(file_writer.HasError()); + EXPECT_TRUE(file_writer.error().empty()); + } + EXPECT_FALSE(base::PathExists(file_path_)); +} + +} // namespace zucchini diff --git a/patch_fuzzer.cc b/patch_fuzzer.cc new file mode 100644 index 0000000..2d1c9b7 --- /dev/null +++ b/patch_fuzzer.cc @@ -0,0 +1,19 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include +#include + +#include "base/optional.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/patch_reader.h" + +// Entry point for LibFuzzer. +extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { + logging::SetMinLogLevel(3); // Disable console spamming. + zucchini::ConstBufferView patch(data, size); + base::Optional patch_reader = + zucchini::EnsemblePatchReader::Create(patch); + return 0; +} diff --git a/patch_read_write_unittest.cc b/patch_read_write_unittest.cc new file mode 100644 index 0000000..7f84b03 --- /dev/null +++ b/patch_read_write_unittest.cc @@ -0,0 +1,604 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/patch_reader.h" +#include "components/zucchini/patch_writer.h" + +#include +#include + +#include +#include + +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +namespace { + +// Used for initialization of raw test data. +using ByteVector = std::vector; + +// Helper function that creates an object of type |T| and intializes it from +// data in |buffer|. Ensures initialization is successful. |buffer| is passed as +// pointer to avoid passing a temporay, which can causes dangling references. +template +T TestInitialize(const ByteVector* buffer) { + T value; + BufferSource buffer_source(buffer->data(), buffer->size()); + EXPECT_TRUE(value.Initialize(&buffer_source)); + EXPECT_TRUE(buffer_source.empty()); // Make sure all data has been consumed + return value; +} + +// Helper function that creates an object of type |T| and tries to intialize it +// from invalid data in |buffer|, expecting the operation to fail. |buffer| is +// passed as pointer to avoid passing a temporary, which can causes dangling +// references. +template +void TestInvalidInitialize(const ByteVector* buffer) { + T value; + BufferSource buffer_source(buffer->data(), buffer->size()); + EXPECT_FALSE(value.Initialize(&buffer_source)); +} + +// Helper function that serializes |value| into a buffer. Ensures that +// serialization is successful and that the result matches |expected|. +template +void TestSerialize(const ByteVector& expected, const T& value) { + size_t size = value.SerializedSize(); + EXPECT_EQ(expected.size(), size); + ByteVector buffer(size); + BufferSink buffer_sink(buffer.data(), buffer.size()); + EXPECT_TRUE(value.SerializeInto(&buffer_sink)); + EXPECT_EQ(expected, buffer); +} + +} // namespace + +bool operator==(const ByteVector& a, ConstBufferView b) { + return a == ByteVector(b.begin(), b.end()); +} + +TEST(PatchTest, ParseSerializeElementMatch) { + ByteVector data = { + 0x01, 0, 0, 0, // old_offset + 0x03, 0, 0, 0, // new_offset + 0x02, 0, 0, 0, // old_length + 0x04, 0, 0, 0, // new_length + 7, 0, 0, 0, // kExeTypeDex + }; + BufferSource buffer_source(data.data(), data.size()); + ElementMatch element_match = {}; + EXPECT_TRUE(patch::ParseElementMatch(&buffer_source, &element_match)); + EXPECT_EQ(kExeTypeDex, element_match.exe_type()); + EXPECT_EQ(kExeTypeDex, element_match.old_element.exe_type); + EXPECT_EQ(kExeTypeDex, element_match.new_element.exe_type); + EXPECT_EQ(0x1U, element_match.old_element.offset); + EXPECT_EQ(0x2U, element_match.old_element.size); + EXPECT_EQ(0x3U, element_match.new_element.offset); + EXPECT_EQ(0x4U, element_match.new_element.size); + + size_t size = patch::SerializedElementMatchSize(element_match); + EXPECT_EQ(data.size(), size); + ByteVector buffer(size); + BufferSink buffer_sink(buffer.data(), buffer.size()); + EXPECT_TRUE(patch::SerializeElementMatch(element_match, &buffer_sink)); + EXPECT_EQ(data, buffer); +} + +TEST(PatchTest, ParseElementMatchTooSmall) { + ByteVector data = {4}; + BufferSource buffer_source(data.data(), data.size()); + ElementMatch element_match = {}; + EXPECT_FALSE(patch::ParseElementMatch(&buffer_source, &element_match)); +} + +TEST(PatchTest, ParseSerializeElementMatchExeMismatch) { + ByteVector buffer(28); + BufferSink buffer_sink(buffer.data(), buffer.size()); + EXPECT_FALSE(patch::SerializeElementMatch( + ElementMatch{{{1, 2}, kExeTypeNoOp}, {{3, 4}, kExeTypeWin32X86}}, + &buffer_sink)); +} + +TEST(PatchTest, SerializeElementMatchTooSmall) { + ByteVector buffer(4); + BufferSink buffer_sink(buffer.data(), buffer.size()); + EXPECT_FALSE(patch::SerializeElementMatch( + ElementMatch{{{1, 2}, kExeTypeDex}, {{3, 4}, kExeTypeDex}}, + &buffer_sink)); +} + +TEST(PatchTest, ParseSerializeBuffer) { + auto TestSerialize = [](const ByteVector& expected, const ByteVector& value) { + size_t size = patch::SerializedBufferSize(value); + EXPECT_EQ(expected.size(), size); + ByteVector buffer(size); + BufferSink buffer_sink(buffer.data(), buffer.size()); + EXPECT_TRUE(patch::SerializeBuffer(value, &buffer_sink)); + EXPECT_EQ(expected, buffer); + }; + + // |data| is passed as pointer to avoid passing a temporay, which can causes + // dangling references. + auto TestParse = [](const ByteVector* data) { + BufferSource value; + BufferSource buffer_source(data->data(), data->size()); + EXPECT_TRUE(patch::ParseBuffer(&buffer_source, &value)); + // Make sure all data has been consumed. + EXPECT_TRUE(buffer_source.empty()); + return value; + }; + + ByteVector data = { + 0, 0, 0, 0, // size + }; + BufferSource buffer = TestParse(&data); + EXPECT_TRUE(buffer.empty()); + TestSerialize(data, ByteVector({})); + + data = { + 3, 0, 0, 0, // size + 1, 2, 3 // content + }; + buffer = TestParse(&data); + EXPECT_EQ(3U, buffer.size()); + EXPECT_EQ(ByteVector({1, 2, 3}), ByteVector(buffer.begin(), buffer.end())); + TestSerialize(data, ByteVector({1, 2, 3})); + + // Ill-formed input. + data = { + 3, 0, 0, 0, // size + 1, 2 // insufficient content + }; + BufferSource value; + BufferSource buffer_source(data.data(), data.size()); + EXPECT_FALSE(patch::ParseBuffer(&buffer_source, &value)); + EXPECT_TRUE(value.empty()); +} + +TEST(PatchTest, SerializeBufferTooSmall) { + ByteVector buffer(3); + BufferSink buffer_sink(buffer.data(), buffer.size()); + EXPECT_FALSE(patch::SerializeBuffer(ByteVector(), &buffer_sink)); +} + +TEST(EquivalenceSinkSourceTest, Empty) { + ByteVector data = { + 0, 0, 0, 0, // src_skip size + 0, 0, 0, 0, // dst_skip size + 0, 0, 0, 0, // copy_count size + }; + EquivalenceSource equivalence_source = + TestInitialize(&data); + + EXPECT_FALSE(equivalence_source.GetNext()); + EXPECT_TRUE(equivalence_source.Done()); + + TestSerialize(data, EquivalenceSink()); +} + +TEST(EquivalenceSourceSinkTest, Normal) { + ByteVector data = { + 2, 0, 0, 0, // src_skip size + 6, 7, // src_skip content + 2, 0, 0, 0, // dst_skip size + 7, 1, // dst_skip content + 2, 0, 0, 0, // copy_count size + 2, 1 // copy_count content + }; + EquivalenceSource equivalence_source = + TestInitialize(&data); + auto equivalence = equivalence_source.GetNext(); + EXPECT_FALSE(equivalence_source.Done()); + EXPECT_TRUE(equivalence.has_value()); + EXPECT_EQ(offset_t(3), equivalence->src_offset); + EXPECT_EQ(offset_t(7), equivalence->dst_offset); + EXPECT_EQ(offset_t(2), equivalence->length); + + equivalence = equivalence_source.GetNext(); + EXPECT_TRUE(equivalence_source.Done()); + EXPECT_TRUE(equivalence.has_value()); + EXPECT_EQ(offset_t(1), equivalence->src_offset); + EXPECT_EQ(offset_t(10), equivalence->dst_offset); + EXPECT_EQ(offset_t(1), equivalence->length); + + equivalence = equivalence_source.GetNext(); + EXPECT_FALSE(equivalence.has_value()); + + EquivalenceSink equivalence_sink; + equivalence_sink.PutNext(Equivalence{3, 7, 2}); + equivalence_sink.PutNext(Equivalence{1, 10, 1}); + TestSerialize(data, equivalence_sink); +} + +TEST(ExtraDataSourceSinkTest, Empty) { + ByteVector data = { + 0, 0, 0, 0, // extra_data size + }; + ExtraDataSource extra_data_source = TestInitialize(&data); + + EXPECT_FALSE(extra_data_source.GetNext(2)); + EXPECT_TRUE(extra_data_source.Done()); + + TestSerialize(data, ExtraDataSink()); +} + +TEST(ExtraDataSourceSinkTest, Normal) { + ByteVector data = { + 5, 0, 0, 0, // extra_data size + 1, 2, 3, 4, 5, // extra_data content + }; + ExtraDataSource extra_data_source = TestInitialize(&data); + EXPECT_FALSE(extra_data_source.Done()); + + auto extra_data = extra_data_source.GetNext(3); + EXPECT_FALSE(extra_data_source.Done()); + EXPECT_TRUE(extra_data.has_value()); + EXPECT_EQ(size_t(3), extra_data->size()); + EXPECT_EQ(ByteVector({1, 2, 3}), + ByteVector(extra_data->begin(), extra_data->end())); + + extra_data = extra_data_source.GetNext(2); + EXPECT_TRUE(extra_data_source.Done()); + EXPECT_TRUE(extra_data.has_value()); + EXPECT_EQ(ByteVector({4, 5}), + ByteVector(extra_data->begin(), extra_data->end())); + + extra_data = extra_data_source.GetNext(2); + EXPECT_FALSE(extra_data.has_value()); + + ExtraDataSink extra_data_sink; + + ByteVector content = {1, 2, 3}; + extra_data_sink.PutNext({content.data(), content.size()}); + content = {4, 5}; + extra_data_sink.PutNext({content.data(), content.size()}); + TestSerialize(data, extra_data_sink); +} + +TEST(RawDeltaSourceSinkTest, Empty) { + ByteVector data = { + 0, 0, 0, 0, // raw_delta_skip size + 0, 0, 0, 0, // raw_delta_diff size + }; + RawDeltaSource raw_delta_source = TestInitialize(&data); + + EXPECT_FALSE(raw_delta_source.GetNext()); + EXPECT_TRUE(raw_delta_source.Done()); + + TestSerialize(data, RawDeltaSink()); +} + +TEST(RawDeltaSinkSourceSinkTest, Normal) { + ByteVector data = { + 3, 0, 0, 0, // raw_delta_skip size + 1, 3, 0, // raw_delta_skip content + 3, 0, 0, 0, // raw_delta_diff size + 42, 24, 235, // raw_delta_diff content + }; + RawDeltaSource raw_delta_source = TestInitialize(&data); + EXPECT_FALSE(raw_delta_source.Done()); + + auto raw_delta = raw_delta_source.GetNext(); + EXPECT_FALSE(raw_delta_source.Done()); + EXPECT_TRUE(raw_delta.has_value()); + EXPECT_EQ(1U, raw_delta->copy_offset); + EXPECT_EQ(42, raw_delta->diff); + + raw_delta = raw_delta_source.GetNext(); + EXPECT_FALSE(raw_delta_source.Done()); + EXPECT_TRUE(raw_delta.has_value()); + EXPECT_EQ(5U, raw_delta->copy_offset); + EXPECT_EQ(24, raw_delta->diff); + + raw_delta = raw_delta_source.GetNext(); + EXPECT_TRUE(raw_delta_source.Done()); + EXPECT_TRUE(raw_delta.has_value()); + EXPECT_EQ(6U, raw_delta->copy_offset); + EXPECT_EQ(-21, raw_delta->diff); + + EXPECT_FALSE(raw_delta_source.GetNext()); + EXPECT_TRUE(raw_delta_source.Done()); + + RawDeltaSink raw_delta_sink; + raw_delta_sink.PutNext({1, 42}); + raw_delta_sink.PutNext({5, 24}); + raw_delta_sink.PutNext({6, -21}); + TestSerialize(data, raw_delta_sink); +} + +TEST(ReferenceDeltaSourceSinkTest, Empty) { + ByteVector data = { + 0, 0, 0, 0, // reference_delta size + }; + ReferenceDeltaSource reference_delta_source = + TestInitialize(&data); + + EXPECT_FALSE(reference_delta_source.GetNext()); + EXPECT_TRUE(reference_delta_source.Done()); + + TestSerialize(data, ReferenceDeltaSink()); +} + +TEST(ReferenceDeltaSourceSinkTest, Normal) { + ByteVector data = { + 2, 0, 0, 0, // reference_delta size + 84, 47, // reference_delta content + }; + ReferenceDeltaSource reference_delta_source = + TestInitialize(&data); + EXPECT_FALSE(reference_delta_source.Done()); + + auto delta = reference_delta_source.GetNext(); + EXPECT_FALSE(reference_delta_source.Done()); + EXPECT_TRUE(delta.has_value()); + EXPECT_EQ(42, *delta); + + delta = reference_delta_source.GetNext(); + EXPECT_TRUE(reference_delta_source.Done()); + EXPECT_TRUE(delta.has_value()); + EXPECT_EQ(-24, *delta); + + EXPECT_FALSE(reference_delta_source.GetNext()); + EXPECT_TRUE(reference_delta_source.Done()); + + ReferenceDeltaSink reference_delta; + reference_delta.PutNext(42); + reference_delta.PutNext(-24); + TestSerialize(data, reference_delta); +} + +TEST(TargetSourceSinkTest, Empty) { + ByteVector data = { + 0, 0, 0, 0, // extra_targets size + }; + TargetSource target_source = TestInitialize(&data); + + EXPECT_FALSE(target_source.GetNext()); + EXPECT_TRUE(target_source.Done()); + + TestSerialize(data, TargetSink()); +} + +TEST(TargetSourceSinkTest, Normal) { + ByteVector data = { + 2, 0, 0, 0, // extra_targets size + 3, 1, // extra_targets content + }; + TargetSource target_source = TestInitialize(&data); + EXPECT_FALSE(target_source.Done()); + + auto target = target_source.GetNext(); + EXPECT_FALSE(target_source.Done()); + EXPECT_TRUE(target.has_value()); + EXPECT_EQ(3U, *target); + + target = target_source.GetNext(); + EXPECT_TRUE(target_source.Done()); + EXPECT_TRUE(target.has_value()); + EXPECT_EQ(5U, *target); + + EXPECT_FALSE(target_source.GetNext()); + EXPECT_TRUE(target_source.Done()); + + TargetSink target_sink; + target_sink.PutNext(3); + target_sink.PutNext(5); + TestSerialize(data, target_sink); +} + +TEST(PatchElementTest, Normal) { + ByteVector data = { + 0x01, 0, 0, 0, // old_offset + 0x03, 0, 0, 0, // new_offset + 0x02, 0, 0, 0, // old_length + 0x04, 0, 0, 0, // new_length + 1, 0, 0, 0, // EXE_TYPE_WIN32_X86 + + 1, 0, 0, 0, // src_skip size + 0x10, // src_skip content + 1, 0, 0, 0, // dst_skip size + 0x11, // dst_skip content + 1, 0, 0, 0, // copy_count size + 0x12, // copy_count content + + 1, 0, 0, 0, // extra_data size + 0x13, // extra_data content + + 1, 0, 0, 0, // raw_delta_skip size + 0x14, // raw_delta_skip content + 1, 0, 0, 0, // raw_delta_diff size + 0x15, // raw_delta_diff content + + 1, 0, 0, 0, // reference_delta size + 0x16, // reference_delta content + + 2, 0, 0, 0, // pool count + 0, // pool_tag + 1, 0, 0, 0, // extra_targets size + 0x17, // extra_targets content + 2, // pool_tag + 1, 0, 0, 0, // extra_targets size + 0x18, // extra_targets content + }; + + PatchElementReader patch_element_reader = + TestInitialize(&data); + + ElementMatch element_match = patch_element_reader.element_match(); + EXPECT_EQ(kExeTypeWin32X86, element_match.exe_type()); + EXPECT_EQ(kExeTypeWin32X86, element_match.old_element.exe_type); + EXPECT_EQ(kExeTypeWin32X86, element_match.new_element.exe_type); + EXPECT_EQ(0x1U, element_match.old_element.offset); + EXPECT_EQ(0x2U, element_match.old_element.size); + EXPECT_EQ(0x3U, element_match.new_element.offset); + EXPECT_EQ(0x4U, element_match.new_element.size); + + EquivalenceSource equivalence_source = + patch_element_reader.GetEquivalenceSource(); + EXPECT_EQ(ByteVector({0x10}), equivalence_source.src_skip()); + EXPECT_EQ(ByteVector({0x11}), equivalence_source.dst_skip()); + EXPECT_EQ(ByteVector({0x12}), equivalence_source.copy_count()); + + ExtraDataSource extra_data_source = patch_element_reader.GetExtraDataSource(); + EXPECT_EQ(ByteVector({0x13}), extra_data_source.extra_data()); + + RawDeltaSource raw_delta_source = patch_element_reader.GetRawDeltaSource(); + EXPECT_EQ(ByteVector({0x14}), raw_delta_source.raw_delta_skip()); + EXPECT_EQ(ByteVector({0x15}), raw_delta_source.raw_delta_diff()); + + ReferenceDeltaSource reference_delta_source = + patch_element_reader.GetReferenceDeltaSource(); + EXPECT_EQ(ByteVector({0x16}), reference_delta_source.reference_delta()); + + TargetSource target_source1 = + patch_element_reader.GetExtraTargetSource(PoolTag(0)); + EXPECT_EQ(ByteVector({0x17}), target_source1.extra_targets()); + TargetSource target_source2 = + patch_element_reader.GetExtraTargetSource(PoolTag(1)); + EXPECT_EQ(ByteVector({}), target_source2.extra_targets()); + TargetSource target_source3 = + patch_element_reader.GetExtraTargetSource(PoolTag(2)); + EXPECT_EQ(ByteVector({0x18}), target_source3.extra_targets()); + + PatchElementWriter patch_element_writer(element_match); + + patch_element_writer.SetEquivalenceSink( + EquivalenceSink({0x10}, {0x11}, {0x12})); + patch_element_writer.SetExtraDataSink(ExtraDataSink({0x13})); + patch_element_writer.SetRawDeltaSink(RawDeltaSink({0x14}, {0x15})); + patch_element_writer.SetReferenceDeltaSink(ReferenceDeltaSink({0x16})); + patch_element_writer.SetTargetSink(PoolTag(0), TargetSink({0x17})); + patch_element_writer.SetTargetSink(PoolTag(2), TargetSink({0x18})); + TestSerialize(data, patch_element_writer); +} + +TEST(EnsemblePatchTest, RawPatch) { + ByteVector data = { + 0x5A, 0x75, 0x63, 0x00, // magic + 0x10, 0x32, 0x54, 0x76, // old_size + 0x00, 0x11, 0x22, 0x33, // old_crc + 0x98, 0xBA, 0xDC, 0xFE, // new_size + 0x44, 0x55, 0x66, 0x77, // new_crc + + 0, 0, 0, 0, // kRawPatch + + 1, 0, 0, 0, // number of element + + 0x01, 0, 0, 0, // old_offset + 0x00, 0, 0, 0, // new_offset + 0x02, 0, 0, 0, // old_length + 0x98, 0xBA, 0xDC, 0xFE, // new_length + 1, 0, 0, 0, // EXE_TYPE_WIN32_X86 + 0, 0, 0, 0, // src_skip size + 0, 0, 0, 0, // dst_skip size + 0, 0, 0, 0, // copy_count size + 0, 0, 0, 0, // extra_data size + 0, 0, 0, 0, // raw_delta_skip size + 0, 0, 0, 0, // raw_delta_diff size + 0, 0, 0, 0, // reference_delta size + 0, 0, 0, 0, // pool count + }; + + EnsemblePatchReader ensemble_patch_reader = + TestInitialize(&data); + + PatchHeader header = ensemble_patch_reader.header(); + EXPECT_EQ(PatchHeader::kMagic, header.magic); + EXPECT_EQ(0x76543210U, header.old_size); + EXPECT_EQ(0x33221100U, header.old_crc); + EXPECT_EQ(0xFEDCBA98U, header.new_size); + EXPECT_EQ(0x77665544U, header.new_crc); + + EXPECT_EQ(PatchType::kRawPatch, ensemble_patch_reader.patch_type()); + + const std::vector& elements = + ensemble_patch_reader.elements(); + EXPECT_EQ(size_t(1), elements.size()); + + EnsemblePatchWriter ensemble_patch_writer(header); + ensemble_patch_writer.SetPatchType(PatchType::kRawPatch); + PatchElementWriter patch_element_writer(elements[0].element_match()); + patch_element_writer.SetEquivalenceSink({}); + patch_element_writer.SetExtraDataSink({}); + patch_element_writer.SetRawDeltaSink({}); + patch_element_writer.SetReferenceDeltaSink({}); + ensemble_patch_writer.AddElement(std::move(patch_element_writer)); + + TestSerialize(data, ensemble_patch_writer); +} + +TEST(EnsemblePatchTest, CheckFile) { + ByteVector data = { + 0x5A, 0x75, 0x63, 0x00, // magic + 0x05, 0x00, 0x00, 0x00, // old_size + 0xDF, 0x13, 0xE4, 0x10, // old_crc + 0x03, 0x00, 0x00, 0x00, // new_size + 0xDC, 0xF7, 0x00, 0x40, // new_crc + 2, 0, 0, 0, // kEnsemblePatch + + 1, 0, 0, 0, // number of element + + 0x01, 0, 0, 0, // old_offset + 0x00, 0, 0, 0, // new_offset + 0x02, 0, 0, 0, // old_length + 0x03, 0, 0, 0, // new_length + 1, 0, 0, 0, // EXE_TYPE_WIN32_X86 + 0, 0, 0, 0, // src_skip size + 0, 0, 0, 0, // dst_skip size + 0, 0, 0, 0, // copy_count size + 0, 0, 0, 0, // extra_data size + 0, 0, 0, 0, // raw_delta_skip size + 0, 0, 0, 0, // raw_delta_diff size + 0, 0, 0, 0, // reference_delta size + 0, 0, 0, 0, // pool count + }; + + EnsemblePatchReader ensemble_patch_reader = + TestInitialize(&data); + + ByteVector old_file = {0x10, 0x32, 0x54, 0x76, 0x98}; + ByteVector new_file = {0xBA, 0xDC, 0xFE}; + + ConstBufferView old_image(old_file.data(), old_file.size()); + ConstBufferView new_image(new_file.data(), new_file.size()); + + EXPECT_TRUE(ensemble_patch_reader.CheckOldFile(old_image)); + EXPECT_TRUE(ensemble_patch_reader.CheckNewFile(new_image)); + EXPECT_FALSE(ensemble_patch_reader.CheckOldFile(new_image)); + EXPECT_FALSE(ensemble_patch_reader.CheckNewFile(old_image)); +} + +TEST(EnsemblePatchTest, InvalidMagic) { + ByteVector data = { + 0x42, 0x42, 0x42, 0x00, // magic + 0x10, 0x32, 0x54, 0x76, // old_size + 0x00, 0x11, 0x22, 0x33, // old_crc + 0x03, 0x00, 0x00, 0x00, // new_size + 0x44, 0x55, 0x66, 0x77, // new_crc + 0, 0, 0, 0, // kRawPatch + + 1, 0, 0, 0, // number of element + + 0x01, 0, 0, 0, // old_offset + 0x00, 0, 0, 0, // new_offset + 0x02, 0, 0, 0, // old_length + 0x03, 0, 0, 0, // new_length + 1, 0, 0, 0, // EXE_TYPE_WIN32_X86 + 0, 0, 0, 0, // src_skip size + 0, 0, 0, 0, // dst_skip size + 0, 0, 0, 0, // copy_count size + 0, 0, 0, 0, // extra_data size + 0, 0, 0, 0, // raw_delta_skip size + 0, 0, 0, 0, // raw_delta_diff size + 0, 0, 0, 0, // reference_delta size + 0, 0, 0, 0, // pool count + }; + + TestInvalidInitialize(&data); +} + +} // namespace zucchini diff --git a/patch_reader.cc b/patch_reader.cc new file mode 100644 index 0000000..eceb969 --- /dev/null +++ b/patch_reader.cc @@ -0,0 +1,345 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/patch_reader.h" + +#include +#include + +#include "base/numerics/safe_conversions.h" +#include "components/zucchini/crc32.h" + +namespace zucchini { + +namespace patch { + +bool ParseElementMatch(BufferSource* source, ElementMatch* element_match) { + PatchElementHeader element_header; + if (!source->GetValue(&element_header)) { + LOG(ERROR) << "Impossible to read ElementMatch from source."; + LOG(ERROR) << base::debug::StackTrace().ToString(); + return false; + } + ExecutableType exe_type = + static_cast(element_header.exe_type); + if (exe_type >= kNumExeType) { + LOG(ERROR) << "Invalid ExecutableType encountered."; + LOG(ERROR) << base::debug::StackTrace().ToString(); + return false; + } + element_match->old_element.offset = element_header.old_offset; + element_match->new_element.offset = element_header.new_offset; + element_match->old_element.size = element_header.old_length; + element_match->new_element.size = element_header.new_length; + element_match->old_element.exe_type = exe_type; + element_match->new_element.exe_type = exe_type; + return true; +} + +bool ParseBuffer(BufferSource* source, BufferSource* buffer) { + uint32_t size = 0; + if (!source->GetValue(&size)) { + LOG(ERROR) << "Impossible to read buffer size from source."; + LOG(ERROR) << base::debug::StackTrace().ToString(); + return false; + } + if (!source->GetRegion(base::checked_cast(size), buffer)) { + LOG(ERROR) << "Impossible to read buffer content from source."; + LOG(ERROR) << base::debug::StackTrace().ToString(); + return false; + } + return true; +} + +} // namespace patch + +/******** EquivalenceSource ********/ + +EquivalenceSource::EquivalenceSource() = default; +EquivalenceSource::EquivalenceSource(const EquivalenceSource&) = default; +EquivalenceSource::~EquivalenceSource() = default; + +bool EquivalenceSource::Initialize(BufferSource* source) { + return patch::ParseBuffer(source, &src_skip_) && + patch::ParseBuffer(source, &dst_skip_) && + patch::ParseBuffer(source, ©_count_); +} + +base::Optional EquivalenceSource::GetNext() { + if (src_skip_.empty() || dst_skip_.empty() || copy_count_.empty()) + return base::nullopt; + + Equivalence equivalence = {}; + + uint32_t length = 0; + if (!patch::ParseVarUInt(©_count_, &length)) + return base::nullopt; + equivalence.length = base::strict_cast(length); + + int32_t src_offset_diff = 0; // Intentionally signed. + if (!patch::ParseVarInt(&src_skip_, &src_offset_diff)) + return base::nullopt; + base::CheckedNumeric src_offset = + previous_src_offset_ + src_offset_diff; + if (!src_offset.IsValid()) + return base::nullopt; + + equivalence.src_offset = src_offset.ValueOrDie(); + previous_src_offset_ = src_offset + equivalence.length; + if (!previous_src_offset_.IsValid()) + return base::nullopt; + + uint32_t dst_offset_diff = 0; // Intentionally unsigned. + if (!patch::ParseVarUInt(&dst_skip_, &dst_offset_diff)) + return base::nullopt; + base::CheckedNumeric dst_offset = + previous_dst_offset_ + dst_offset_diff; + if (!dst_offset.IsValid()) + return base::nullopt; + + equivalence.dst_offset = dst_offset.ValueOrDie(); + previous_dst_offset_ = equivalence.dst_offset + equivalence.length; + if (!previous_dst_offset_.IsValid()) + return base::nullopt; + + return equivalence; +} + +/******** ExtraDataSource ********/ + +ExtraDataSource::ExtraDataSource() = default; +ExtraDataSource::ExtraDataSource(const ExtraDataSource&) = default; +ExtraDataSource::~ExtraDataSource() = default; + +bool ExtraDataSource::Initialize(BufferSource* source) { + return patch::ParseBuffer(source, &extra_data_); +} + +base::Optional ExtraDataSource::GetNext(offset_t size) { + ConstBufferView buffer; + if (!extra_data_.GetRegion(size, &buffer)) + return base::nullopt; + return buffer; +} + +/******** RawDeltaSource ********/ + +RawDeltaSource::RawDeltaSource() = default; +RawDeltaSource::RawDeltaSource(const RawDeltaSource&) = default; +RawDeltaSource::~RawDeltaSource() = default; + +bool RawDeltaSource::Initialize(BufferSource* source) { + return patch::ParseBuffer(source, &raw_delta_skip_) && + patch::ParseBuffer(source, &raw_delta_diff_); +} + +base::Optional RawDeltaSource::GetNext() { + if (raw_delta_skip_.empty() || raw_delta_diff_.empty()) + return base::nullopt; + + RawDeltaUnit delta = {}; + uint32_t copy_offset_diff = 0; + if (!patch::ParseVarUInt(&raw_delta_skip_, ©_offset_diff)) + return base::nullopt; + base::CheckedNumeric copy_offset = + copy_offset_diff + copy_offset_compensation_; + if (!copy_offset.IsValid()) + return base::nullopt; + delta.copy_offset = copy_offset.ValueOrDie(); + + if (!raw_delta_diff_.GetValue(&delta.diff)) + return base::nullopt; + + // We keep track of the compensation needed for next offset, taking into + // accound delta encoding and bias of -1. + copy_offset_compensation_ = copy_offset + 1; + if (!copy_offset_compensation_.IsValid()) + return base::nullopt; + return delta; +} + +/******** ReferenceDeltaSource ********/ + +ReferenceDeltaSource::ReferenceDeltaSource() = default; +ReferenceDeltaSource::ReferenceDeltaSource(const ReferenceDeltaSource&) = + default; +ReferenceDeltaSource::~ReferenceDeltaSource() = default; + +bool ReferenceDeltaSource::Initialize(BufferSource* source) { + return patch::ParseBuffer(source, &reference_delta_); +} + +base::Optional ReferenceDeltaSource::GetNext() { + if (reference_delta_.empty()) + return base::nullopt; + int32_t delta = 0; + if (!patch::ParseVarInt(&reference_delta_, &delta)) + return base::nullopt; + return delta; +} + +/******** TargetSource ********/ + +TargetSource::TargetSource() = default; +TargetSource::TargetSource(const TargetSource&) = default; +TargetSource::~TargetSource() = default; + +bool TargetSource::Initialize(BufferSource* source) { + return patch::ParseBuffer(source, &extra_targets_); +} + +base::Optional TargetSource::GetNext() { + if (extra_targets_.empty()) + return base::nullopt; + + uint32_t target_diff = 0; + if (!patch::ParseVarUInt(&extra_targets_, &target_diff)) + return base::nullopt; + base::CheckedNumeric target = target_diff + target_compensation_; + if (!target.IsValid()) + return base::nullopt; + + // We keep track of the compensation needed for next target, taking into + // accound delta encoding and bias of -1. + target_compensation_ = target + 1; + if (!target_compensation_.IsValid()) + return base::nullopt; + return offset_t(target.ValueOrDie()); +} + +/******** PatchElementReader ********/ + +PatchElementReader::PatchElementReader() = default; +PatchElementReader::PatchElementReader(PatchElementReader&&) = default; +PatchElementReader::~PatchElementReader() = default; + +bool PatchElementReader::Initialize(BufferSource* source) { + bool ok = patch::ParseElementMatch(source, &element_match_) && + equivalences_.Initialize(source) && + extra_data_.Initialize(source) && raw_delta_.Initialize(source) && + reference_delta_.Initialize(source); + if (!ok) + return false; + uint32_t pool_count = 0; + if (!source->GetValue(&pool_count)) { + LOG(ERROR) << "Impossible to read pool_count from source."; + return false; + } + for (uint32_t i = 0; i < pool_count; ++i) { + uint8_t pool_tag_value = 0; + if (!source->GetValue(&pool_tag_value)) { + LOG(ERROR) << "Impossible to read pool_tag from source."; + return false; + } + PoolTag pool_tag(pool_tag_value); + if (pool_tag == kNoPoolTag) { + LOG(ERROR) << "Invalid pool_tag encountered in ExtraTargetList."; + return false; + } + auto insert_result = extra_targets_.insert({pool_tag, {}}); + if (!insert_result.second) { // Element already present. + LOG(ERROR) << "Multiple ExtraTargetList found for the same pool_tag"; + return false; + } + if (!insert_result.first->second.Initialize(source)) + return false; + } + return true; +} + +/******** EnsemblePatchReader ********/ + +base::Optional EnsemblePatchReader::Create( + ConstBufferView buffer) { + BufferSource source(buffer); + EnsemblePatchReader patch; + if (!patch.Initialize(&source)) + return base::nullopt; + return patch; +} + +EnsemblePatchReader::EnsemblePatchReader() = default; +EnsemblePatchReader::EnsemblePatchReader(EnsemblePatchReader&&) = default; +EnsemblePatchReader::~EnsemblePatchReader() = default; + +bool EnsemblePatchReader::Initialize(BufferSource* source) { + if (!source->GetValue(&header_)) { + LOG(ERROR) << "Impossible to read header from source."; + return false; + } + if (header_.magic != PatchHeader::kMagic) { + LOG(ERROR) << "Patch contains invalid magic."; + return false; + } + uint32_t patch_type_int = + static_cast(PatchType::kUnrecognisedPatch); + if (!source->GetValue(&patch_type_int)) { + LOG(ERROR) << "Impossible to read patch_type from source."; + return false; + } + patch_type_ = static_cast(patch_type_int); + if (patch_type_ != PatchType::kRawPatch && + patch_type_ != PatchType::kSinglePatch && + patch_type_ != PatchType::kEnsemblePatch) { + LOG(ERROR) << "Invalid patch_type encountered."; + return false; + } + + uint32_t element_count = 0; + if (!source->GetValue(&element_count)) { + LOG(ERROR) << "Impossible to read element_count from source."; + return false; + } + if (patch_type_ == PatchType::kRawPatch || + patch_type_ == PatchType::kSinglePatch) { + if (element_count != 1) { + LOG(ERROR) << "Unexpected number of elements in patch."; + return false; // Only one element expected. + } + } + + offset_t current_dst_offset = 0; + for (uint32_t i = 0; i < element_count; ++i) { + PatchElementReader element_patch; + if (!element_patch.Initialize(source)) + return false; + + if (!element_patch.old_element().FitsIn(header_.old_size) || + !element_patch.new_element().FitsIn(header_.new_size)) { + LOG(ERROR) << "Invalid element encountered."; + return false; + } + + if (element_patch.new_element().offset != current_dst_offset) { + LOG(ERROR) << "Invalid element encountered."; + return false; + } + current_dst_offset = element_patch.new_element().EndOffset(); + + elements_.push_back(std::move(element_patch)); + } + if (current_dst_offset != header_.new_size) { + LOG(ERROR) << "Patch elements don't fully cover new image file."; + return false; + } + + if (!source->empty()) { + LOG(ERROR) << "Patch was not fully consumed."; + return false; + } + + return true; +} + +bool EnsemblePatchReader::CheckOldFile(ConstBufferView old_image) const { + return old_image.size() == header_.old_size && + CalculateCrc32(old_image.begin(), old_image.end()) == header_.old_crc; +} + +bool EnsemblePatchReader::CheckNewFile(ConstBufferView new_image) const { + return new_image.size() == header_.new_size && + CalculateCrc32(new_image.begin(), new_image.end()) == header_.new_crc; +} + +} // namespace zucchini diff --git a/patch_reader.h b/patch_reader.h new file mode 100644 index 0000000..ef6cd32 --- /dev/null +++ b/patch_reader.h @@ -0,0 +1,277 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_PATCH_READER_H_ +#define COMPONENTS_ZUCCHINI_PATCH_READER_H_ + +#include +#include + +#include +#include + +#include "base/debug/stack_trace.h" +#include "base/logging.h" +#include "base/numerics/checked_math.h" +#include "base/optional.h" +#include "components/zucchini/buffer_source.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/image_utils.h" +#include "components/zucchini/patch_utils.h" + +namespace zucchini { + +namespace patch { + +// The Parse*() functions below attempt to extract data of a specific type from +// the beginning of |source|. A parse function: On success, consumes the used +// portion of |source|, writes data into the output parameter, and returns +// true. Otherwise returns false and does not consume |source|. + +// Parses |source| for the next ElementMatch. +bool ParseElementMatch(BufferSource* source, ElementMatch* element_match); + +// Parses |source| for the next embedded BufferSource. +bool ParseBuffer(BufferSource* source, BufferSource* buffer); + +// Parses |source| for the next VarUInt. +template +bool ParseVarUInt(BufferSource* source, T* value) { + auto bytes_read = DecodeVarUInt(source->begin(), source->end(), value); + if (!bytes_read) { + LOG(ERROR) << "Impossible to read VarUInt from source."; + LOG(ERROR) << base::debug::StackTrace().ToString(); + return false; + } + // Advance |source| beyond the VarUInt value. + source->Skip(bytes_read); + return true; +} + +// Parses |source| for the next VarInt. +template +bool ParseVarInt(BufferSource* source, T* value) { + auto bytes_read = DecodeVarInt(source->begin(), source->end(), value); + if (!bytes_read) { + LOG(ERROR) << "Impossible to read VarInt from source."; + LOG(ERROR) << base::debug::StackTrace().ToString(); + return false; + } + // Advance |source| beyond the VarInt value. + source->Skip(bytes_read); + return true; +} + +} // namespace patch + +// The *Source classes below are light-weight (i.e., allows copying) visitors to +// read patch data. Each of them has an associated "main type", and performs the +// following: +// - Consumes portions of a BufferSource (required to remain valid for the +// lifetime of the object). +// - Decodes consumed data, which represent a list of items with "main type". +// - Dispenses "main type" elements (hence "Source" in the name). +// +// Common "core functions" implemented by *Source classes are: +// - bool Initialize(BufferSource* source): Consumes data from BufferSource and +// initializes internal states. Returns true if successful, and false +// otherwise (|source| may be partially consumed). +// - base::Optional GetNext(OPT_PARAMS): Decodes consumed data and +// returns the next item as base::Optional (returns base::nullopt on failure). +// - bool Done() const: Returns true if no more items remain; otherwise false. +// +// Usage of *Source instances don't mix, and GetNext() have dissimilar +// interfaces. Therefore we do not use inheritance to relate *Source classes, +// and simply implement "core functions" with matching names. + +// Source for Equivalences. +class EquivalenceSource { + public: + EquivalenceSource(); + EquivalenceSource(const EquivalenceSource&); + ~EquivalenceSource(); + + // Core functions. + bool Initialize(BufferSource* source); + base::Optional GetNext(); + bool Done() const { + return src_skip_.empty() && dst_skip_.empty() && copy_count_.empty(); + } + + // Accessors for unittest. + BufferSource src_skip() const { return src_skip_; } + BufferSource dst_skip() const { return dst_skip_; } + BufferSource copy_count() const { return copy_count_; } + + private: + BufferSource src_skip_; + BufferSource dst_skip_; + BufferSource copy_count_; + + base::CheckedNumeric previous_src_offset_ = 0; + base::CheckedNumeric previous_dst_offset_ = 0; +}; + +// Source for extra data. +class ExtraDataSource { + public: + ExtraDataSource(); + ExtraDataSource(const ExtraDataSource&); + ~ExtraDataSource(); + + // Core functions. + bool Initialize(BufferSource* source); + // |size| is the size in bytes of the buffer requested. + base::Optional GetNext(offset_t size); + bool Done() const { return extra_data_.empty(); } + + // Accessors for unittest. + BufferSource extra_data() const { return extra_data_; } + + private: + BufferSource extra_data_; +}; + +// Source for raw delta. +class RawDeltaSource { + public: + RawDeltaSource(); + RawDeltaSource(const RawDeltaSource&); + ~RawDeltaSource(); + + // Core functions. + bool Initialize(BufferSource* source); + base::Optional GetNext(); + bool Done() const { + return raw_delta_skip_.empty() && raw_delta_diff_.empty(); + } + + // Accessors for unittest. + BufferSource raw_delta_skip() const { return raw_delta_skip_; } + BufferSource raw_delta_diff() const { return raw_delta_diff_; } + + private: + BufferSource raw_delta_skip_; + BufferSource raw_delta_diff_; + + base::CheckedNumeric copy_offset_compensation_ = 0; +}; + +// Source for reference delta. +class ReferenceDeltaSource { + public: + ReferenceDeltaSource(); + ReferenceDeltaSource(const ReferenceDeltaSource&); + ~ReferenceDeltaSource(); + + // Core functions. + bool Initialize(BufferSource* source); + base::Optional GetNext(); + bool Done() const { return reference_delta_.empty(); } + + // Accessors for unittest. + BufferSource reference_delta() const { return reference_delta_; } + + private: + BufferSource reference_delta_; +}; + +// Source for additional targets. +class TargetSource { + public: + TargetSource(); + TargetSource(const TargetSource&); + ~TargetSource(); + + // Core functions. + bool Initialize(BufferSource* source); + base::Optional GetNext(); + bool Done() const { return extra_targets_.empty(); } + + // Accessors for unittest. + BufferSource extra_targets() const { return extra_targets_; } + + private: + BufferSource extra_targets_; + + base::CheckedNumeric target_compensation_ = 0; +}; + +// Following are utility classes providing a structured view on data forming a +// patch. + +// Utility to read a patch element. A patch element contains all the information +// necessary to patch a single element. This class provide access +// to the multiple streams of data forming the patch element. +class PatchElementReader { + public: + PatchElementReader(); + PatchElementReader(PatchElementReader&&); + ~PatchElementReader(); + + // If data read from |source| is well-formed, initialize cached sources to + // read from it, and returns true. Otherwise returns false. + bool Initialize(BufferSource* source); + + const ElementMatch& element_match() const { return element_match_; } + const Element& old_element() const { return element_match_.old_element; } + const Element& new_element() const { return element_match_.new_element; } + + // The Get*() functions below return copies of cached sources. + EquivalenceSource GetEquivalenceSource() const { return equivalences_; } + ExtraDataSource GetExtraDataSource() const { return extra_data_; } + RawDeltaSource GetRawDeltaSource() const { return raw_delta_; } + ReferenceDeltaSource GetReferenceDeltaSource() const { + return reference_delta_; + } + TargetSource GetExtraTargetSource(PoolTag tag) const { + auto pos = extra_targets_.find(tag); + return pos != extra_targets_.end() ? pos->second : TargetSource(); + } + + private: + ElementMatch element_match_; + + // Cached sources. + EquivalenceSource equivalences_; + ExtraDataSource extra_data_; + RawDeltaSource raw_delta_; + ReferenceDeltaSource reference_delta_; + std::map extra_targets_; +}; + +// Utility to read a Zucchini ensemble patch. An ensemble patch is the +// concatenation of a patch header with a vector of patch elements. +class EnsemblePatchReader { + public: + // If data read from |buffer| is well-formed, initializes and returns + // an instance of EnsemblePatchReader. Otherwise returns base::nullopt. + static base::Optional Create(ConstBufferView buffer); + + EnsemblePatchReader(); + EnsemblePatchReader(EnsemblePatchReader&&); + ~EnsemblePatchReader(); + + // If data read from |source| is well-formed, initialize internal state to + // read from it, and returns true. Otherwise returns false. + bool Initialize(BufferSource* source); + + // Check old / new image file validity, comparing against expected size and + // CRC32. Return true if file matches expectations, false otherwise. + bool CheckOldFile(ConstBufferView old_image) const; + bool CheckNewFile(ConstBufferView new_image) const; + + const PatchHeader& header() const { return header_; } + PatchType patch_type() const { return patch_type_; } + const std::vector& elements() const { return elements_; } + + private: + PatchHeader header_; + PatchType patch_type_; + std::vector elements_; +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_PATCH_READER_H_ diff --git a/patch_utils.h b/patch_utils.h new file mode 100644 index 0000000..77cf2f3 --- /dev/null +++ b/patch_utils.h @@ -0,0 +1,152 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_PATCH_UTILS_H_ +#define COMPONENTS_ZUCCHINI_PATCH_UTILS_H_ + +#include + +#include +#include + +#include "components/zucchini/image_utils.h" + +namespace zucchini { + +// Constants that appear inside a patch. +enum class PatchType : uint32_t { + // Patch contains a single raw element, corresponding to an element match that + // covers the entire images, and with ExecutableType::kExeTypeNoOp. + kRawPatch = 0, + + // Patch contains a single executable element, corresponding to an element + // match that covers the entire images. + kSinglePatch = 1, + + // Patch contains multiple raw and/or executable elements. + kEnsemblePatch = 2, + + // Used when type is uninitialized. + kUnrecognisedPatch +}; + +// A Zucchini 'ensemble' patch is the concatenation of a patch header with a +// list of patch 'elements', each containing data for patching individual +// elements. + +// Supported by MSVC, g++, and clang++. Ensures no gaps in packing. +#pragma pack(push, 1) + +// Header for a Zucchini patch, found at the beginning of an ensemble patch. +struct PatchHeader { + // Magic signature at the beginning of a Zucchini patch file. + enum : uint32_t { kMagic = 'Z' | ('u' << 8) | ('c' << 16) }; + + uint32_t magic = 0; + uint32_t old_size = 0; + uint32_t old_crc = 0; + uint32_t new_size = 0; + uint32_t new_crc = 0; +}; + +// Sanity check. +static_assert(sizeof(PatchHeader) == 20, "PatchHeader is 20 bytes"); + +// Header for a patch element, found at the beginning of every patch element. +struct PatchElementHeader { + uint32_t old_offset; + uint32_t new_offset; + uint32_t old_length; + uint32_t new_length; + uint32_t exe_type; +}; + +// Sanity check. +static_assert(sizeof(PatchElementHeader) == 20, + "PatchElementHeader is 28 bytes"); + +#pragma pack(pop) + +// Descibes a raw FIX operation. +struct RawDeltaUnit { + offset_t copy_offset; // Offset in copy regions. + int8_t diff; // Bytewise difference. +}; + +// A Zucchini patch contains data streams encoded using varint format to reduce +// uncompressed size. + +// Writes |value| as a varint in |dst| and returns an iterator pointing beyond +// the written region. |dst| is assumed to hold enough space. Typically, this +// will write to a vector using back insertion, e.g.: +// EncodeVarUInt(value, std::back_inserter(vector)); +template +It EncodeVarUInt(T value, It dst) { + static_assert(std::is_unsigned::value, "Value type must be unsigned"); + + while (value >= 0x80) { + *dst++ = static_cast(value) | 0x80; + value >>= 7; + } + *dst++ = static_cast(value); + return dst; +} + +// Same as EncodeVarUInt(), but for signed values. +template +It EncodeVarInt(T value, It dst) { + static_assert(std::is_signed::value, "Value type must be signed"); + + using unsigned_value_type = typename std::make_unsigned::type; + if (value < 0) + return EncodeVarUInt((unsigned_value_type(~value) << 1) | 1, dst); + else + return EncodeVarUInt(unsigned_value_type(value) << 1, dst); +} + +// Tries to read a varint unsigned integer from |[first, last)|. If +// succesful, writes result into |value| and returns the number of bytes +// read from |[first, last)|. Otherwise returns 0. +template +typename std::iterator_traits::difference_type DecodeVarUInt(It first, + It last, + T* value) { + static_assert(std::is_unsigned::value, "Value type must be unsigned"); + + uint8_t sh = 0; + T val = 0; + for (auto it = first; it != last;) { + val |= T(*it & 0x7F) << sh; + if (*(it++) < 0x80) { + *value = val; + return it - first; + } + sh += 7; + if (sh >= sizeof(T) * 8) // Overflow! + return 0; + } + return 0; +} + +// Same as DecodeVarUInt(), but for signed values. +template +typename std::iterator_traits::difference_type DecodeVarInt(It first, + It last, + T* value) { + static_assert(std::is_signed::value, "Value type must be signed"); + + typename std::make_unsigned::type tmp = 0; + auto res = DecodeVarUInt(first, last, &tmp); + if (res) { + if (tmp & 1) + *value = ~static_cast(tmp >> 1); + else + *value = static_cast(tmp >> 1); + } + return res; +} + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_PATCH_UTILS_H_ diff --git a/patch_utils_unittest.cc b/patch_utils_unittest.cc new file mode 100644 index 0000000..bdc8d45 --- /dev/null +++ b/patch_utils_unittest.cc @@ -0,0 +1,171 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/patch_utils.h" + +#include + +#include +#include + +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +template +void TestEncodeDecodeVarUInt(const std::vector& data) { + std::vector buffer; + + std::vector values; + for (T basis : data) { + // For variety, test the neighborhood values for each case in |data|. Some + // test cases may result in overflow when computing |value|, but we don't + // care about that. + for (int delta = -4; delta <= 4; ++delta) { + T value = delta + basis; + EncodeVarUInt(value, std::back_inserter(buffer)); + values.push_back(value); + + value = delta - basis; + EncodeVarUInt(value, std::back_inserter(buffer)); + values.push_back(value); + } + } + + auto it = buffer.begin(); + for (T expected : values) { + T value = T(-1); + auto res = DecodeVarUInt(it, buffer.end(), &value); + EXPECT_NE(0, res); + EXPECT_EQ(expected, value); + it += res; + } + EXPECT_EQ(it, buffer.end()); + + T value = T(-1); + auto res = DecodeVarUInt(it, buffer.end(), &value); + EXPECT_EQ(0, res); + EXPECT_EQ(T(-1), value); +} + +template +void TestEncodeDecodeVarInt(const std::vector& data) { + std::vector buffer; + + std::vector values; + for (T basis : data) { + // For variety, test the neighborhood values for each case in |data|. Some + // test cases may result in overflow when computing |value|, but we don't + // care about that. + for (int delta = -4; delta <= 4; ++delta) { + T value = delta + basis; + EncodeVarInt(value, std::back_inserter(buffer)); + values.push_back(value); + + value = delta - basis; + EncodeVarInt(value, std::back_inserter(buffer)); + values.push_back(value); + } + } + + auto it = buffer.begin(); + for (T expected : values) { + T value = T(-1); + auto res = DecodeVarInt(it, buffer.end(), &value); + EXPECT_NE(0, res); + EXPECT_EQ(expected, value); + it += res; + } + EXPECT_EQ(it, buffer.end()); + + T value = T(-1); + auto res = DecodeVarInt(it, buffer.end(), &value); + EXPECT_EQ(0, res); + EXPECT_EQ(T(-1), value); +} + +TEST(PatchUtilsTest, EncodeDecodeVarUInt32) { + TestEncodeDecodeVarUInt({0, 64, 128, 8192, 16384, 1 << 20, 1 << 21, + 1 << 22, 1 << 27, 1 << 28, 0x7FFFFFFFU, + UINT32_MAX}); +} + +TEST(PatchUtilsTest, EncodeDecodeVarInt32) { + TestEncodeDecodeVarInt({0, 64, 128, 8192, 16384, 1 << 20, 1 << 21, + 1 << 22, 1 << 27, 1 << 28, -1, INT32_MIN, + INT32_MAX}); +} + +TEST(PatchUtilsTest, EncodeDecodeVarUInt64) { + TestEncodeDecodeVarUInt({0, 64, 128, 8192, 16384, 1 << 20, 1 << 21, + 1 << 22, 1ULL << 55, 1ULL << 56, + 0x7FFFFFFFFFFFFFFFULL, UINT64_MAX}); +} + +TEST(PatchUtilsTest, EncodeDecodeVarInt64) { + TestEncodeDecodeVarInt({0, 64, 128, 8192, 16384, 1 << 20, 1 << 21, + 1 << 22, 1LL << 55, 1LL << 56, -1, INT64_MIN, + INT64_MAX}); +} + +TEST(PatchUtilsTest, DecodeVarUInt32Malformed) { + constexpr uint32_t kUninit = static_cast(-1LL); + + // Output variable to ensure that on failure, the output variable is not + // written to. + uint32_t value = uint32_t(-1); + + auto TestDecodeVarInt = [&value, + kUninit](const std::vector& buffer) { + value = kUninit; + return DecodeVarUInt(buffer.begin(), buffer.end(), &value); + }; + + // Exhausted. + EXPECT_EQ(0, TestDecodeVarInt(std::vector{})); + EXPECT_EQ(kUninit, value); + EXPECT_EQ(0, TestDecodeVarInt(std::vector(4, 128))); + EXPECT_EQ(kUninit, value); + + // Overflow. + EXPECT_EQ(0, TestDecodeVarInt(std::vector(6, 128))); + EXPECT_EQ(kUninit, value); + EXPECT_EQ(0, TestDecodeVarInt({128, 128, 128, 128, 128, 42})); + EXPECT_EQ(kUninit, value); + + // Following are pathological cases that are not handled for simplicity, + // hence decoding is expected to be successful. + EXPECT_NE(0, TestDecodeVarInt({128, 128, 128, 128, 16})); + EXPECT_EQ(uint32_t(0), value); + EXPECT_NE(0, TestDecodeVarInt({128, 128, 128, 128, 32})); + EXPECT_EQ(uint32_t(0), value); + EXPECT_NE(0, TestDecodeVarInt({128, 128, 128, 128, 64})); + EXPECT_EQ(uint32_t(0), value); +} + +TEST(PatchUtilsTest, DecodeVarUInt64Malformed) { + constexpr uint64_t kUninit = static_cast(-1); + + uint64_t value = kUninit; + auto TestDecodeVarInt = [&value, + kUninit](const std::vector& buffer) { + value = kUninit; + return DecodeVarUInt(buffer.begin(), buffer.end(), &value); + }; + + // Exhausted. + EXPECT_EQ(0, TestDecodeVarInt(std::vector{})); + EXPECT_EQ(kUninit, value); + EXPECT_EQ(0, TestDecodeVarInt(std::vector(9, 128))); + EXPECT_EQ(kUninit, value); + + // Overflow. + EXPECT_EQ(0, TestDecodeVarInt(std::vector(10, 128))); + EXPECT_EQ(kUninit, value); + EXPECT_EQ(0, TestDecodeVarInt( + {128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 42})); + EXPECT_EQ(kUninit, value); +} + +} // namespace zucchini diff --git a/patch_writer.cc b/patch_writer.cc new file mode 100644 index 0000000..4edbc7c --- /dev/null +++ b/patch_writer.cc @@ -0,0 +1,294 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/patch_writer.h" + +#include +#include + +#include "base/numerics/checked_math.h" +#include "base/numerics/safe_conversions.h" +#include "components/zucchini/crc32.h" + +namespace zucchini { + +namespace patch { + +bool SerializeElementMatch(const ElementMatch& element_match, + BufferSink* sink) { + if (!element_match.IsValid()) + return false; + + PatchElementHeader element_header; + element_header.old_offset = + base::checked_cast(element_match.old_element.offset); + element_header.new_offset = + base::checked_cast(element_match.new_element.offset); + element_header.old_length = + base::checked_cast(element_match.old_element.size); + element_header.new_length = + base::checked_cast(element_match.new_element.size); + element_header.exe_type = element_match.exe_type(); + + return sink->PutValue(element_header); +} + +size_t SerializedElementMatchSize(const ElementMatch& element_match) { + return sizeof(PatchElementHeader); +} + +bool SerializeBuffer(const std::vector& buffer, BufferSink* sink) { + // buffer.size() is not encoded as varint to simplify SerializedBufferSize(). + base::CheckedNumeric size = buffer.size(); + if (!size.IsValid()) + return false; + return sink->PutValue(size.ValueOrDie()) && + sink->PutRange(buffer.begin(), buffer.end()); +} + +size_t SerializedBufferSize(const std::vector& buffer) { + return sizeof(uint32_t) + buffer.size(); +} + +} // namespace patch + +/******** EquivalenceSink ********/ + +EquivalenceSink::EquivalenceSink() = default; +EquivalenceSink::EquivalenceSink(const std::vector& src_skip, + const std::vector& dst_skip, + const std::vector& copy_count) + : src_skip_(src_skip), dst_skip_(dst_skip), copy_count_(copy_count) {} + +EquivalenceSink::EquivalenceSink(EquivalenceSink&&) = default; +EquivalenceSink::~EquivalenceSink() = default; + +void EquivalenceSink::PutNext(const Equivalence& equivalence) { + // Equivalences are expected to be given ordered by |dst_offset|. + DCHECK_GE(equivalence.dst_offset, dst_offset_); + // Unsigned values are ensured by above check. + + // Result of substracting 2 unsigned integers is unsigned. Overflow is allowed + // for negative values, as long as uint32_t can hold the result. + uint32_t src_offset_diff = + base::strict_cast(equivalence.src_offset - src_offset_); + EncodeVarInt(static_cast(src_offset_diff), + std::back_inserter(src_skip_)); + + EncodeVarUInt( + base::strict_cast(equivalence.dst_offset - dst_offset_), + std::back_inserter(dst_skip_)); + + EncodeVarUInt(base::strict_cast(equivalence.length), + std::back_inserter(copy_count_)); + + src_offset_ = equivalence.src_offset + equivalence.length; + dst_offset_ = equivalence.dst_offset + equivalence.length; +} + +size_t EquivalenceSink::SerializedSize() const { + return patch::SerializedBufferSize(src_skip_) + + patch::SerializedBufferSize(dst_skip_) + + patch::SerializedBufferSize(copy_count_); +} + +bool EquivalenceSink::SerializeInto(BufferSink* sink) const { + return patch::SerializeBuffer(src_skip_, sink) && + patch::SerializeBuffer(dst_skip_, sink) && + patch::SerializeBuffer(copy_count_, sink); +} + +/******** ExtraDataSink ********/ + +ExtraDataSink::ExtraDataSink() = default; +ExtraDataSink::ExtraDataSink(const std::vector& extra_data) + : extra_data_(extra_data) {} + +ExtraDataSink::ExtraDataSink(ExtraDataSink&&) = default; +ExtraDataSink::~ExtraDataSink() = default; + +void ExtraDataSink::PutNext(ConstBufferView region) { + extra_data_.insert(extra_data_.end(), region.begin(), region.end()); +} + +size_t ExtraDataSink::SerializedSize() const { + return patch::SerializedBufferSize(extra_data_); +} + +bool ExtraDataSink::SerializeInto(BufferSink* sink) const { + return patch::SerializeBuffer(extra_data_, sink); +} + +/******** RawDeltaSink ********/ + +RawDeltaSink::RawDeltaSink() = default; +RawDeltaSink::RawDeltaSink(const std::vector& raw_delta_skip, + const std::vector& raw_delta_diff) + : raw_delta_skip_(raw_delta_skip), raw_delta_diff_(raw_delta_diff) {} + +RawDeltaSink::RawDeltaSink(RawDeltaSink&&) = default; +RawDeltaSink::~RawDeltaSink() = default; + +void RawDeltaSink::PutNext(const RawDeltaUnit& delta) { + DCHECK_GE(delta.copy_offset, copy_offset_compensation_); + EncodeVarUInt(base::strict_cast( + delta.copy_offset - copy_offset_compensation_), + std::back_inserter(raw_delta_skip_)); + + copy_offset_compensation_ = delta.copy_offset + 1; + + raw_delta_diff_.push_back(delta.diff); +} + +size_t RawDeltaSink::SerializedSize() const { + return patch::SerializedBufferSize(raw_delta_skip_) + + patch::SerializedBufferSize(raw_delta_diff_); +} + +bool RawDeltaSink::SerializeInto(BufferSink* sink) const { + return patch::SerializeBuffer(raw_delta_skip_, sink) && + patch::SerializeBuffer(raw_delta_diff_, sink); +} + +/******** ReferenceDeltaSink ********/ + +ReferenceDeltaSink::ReferenceDeltaSink() = default; +ReferenceDeltaSink::ReferenceDeltaSink( + const std::vector& reference_delta) + : reference_delta_(reference_delta) {} + +ReferenceDeltaSink::ReferenceDeltaSink(ReferenceDeltaSink&&) = default; +ReferenceDeltaSink::~ReferenceDeltaSink() = default; + +void ReferenceDeltaSink::PutNext(int32_t diff) { + EncodeVarInt(diff, std::back_inserter(reference_delta_)); +} + +size_t ReferenceDeltaSink::SerializedSize() const { + return patch::SerializedBufferSize(reference_delta_); +} + +bool ReferenceDeltaSink::SerializeInto(BufferSink* sink) const { + return patch::SerializeBuffer(reference_delta_, sink); +} + +/******** TargetSink ********/ + +TargetSink::TargetSink() = default; +TargetSink::TargetSink(const std::vector& extra_targets) + : extra_targets_(extra_targets) {} + +TargetSink::TargetSink(TargetSink&&) = default; +TargetSink::~TargetSink() = default; + +void TargetSink::PutNext(uint32_t target) { + DCHECK_GE(target, target_compensation_); + + EncodeVarUInt( + base::strict_cast(target - target_compensation_), + std::back_inserter(extra_targets_)); + + target_compensation_ = target + 1; +} + +size_t TargetSink::SerializedSize() const { + return patch::SerializedBufferSize(extra_targets_); +} + +bool TargetSink::SerializeInto(BufferSink* sink) const { + return patch::SerializeBuffer(extra_targets_, sink); +} + +/******** PatchElementWriter ********/ + +PatchElementWriter::PatchElementWriter() = default; +PatchElementWriter::PatchElementWriter(ElementMatch element_match) + : element_match_(element_match) {} + +PatchElementWriter::PatchElementWriter(PatchElementWriter&&) = default; +PatchElementWriter::~PatchElementWriter() = default; + +size_t PatchElementWriter::SerializedSize() const { + size_t serialized_size = + patch::SerializedElementMatchSize(element_match_) + + equivalences_->SerializedSize() + extra_data_->SerializedSize() + + raw_delta_->SerializedSize() + reference_delta_->SerializedSize(); + + serialized_size += sizeof(uint32_t); + for (const auto& extra_symbols : extra_targets_) + serialized_size += extra_symbols.second.SerializedSize() + 1; + return serialized_size; +} + +bool PatchElementWriter::SerializeInto(BufferSink* sink) const { + bool ok = + patch::SerializeElementMatch(element_match_, sink) && + equivalences_->SerializeInto(sink) && extra_data_->SerializeInto(sink) && + raw_delta_->SerializeInto(sink) && reference_delta_->SerializeInto(sink); + if (!ok) + return false; + + if (!sink->PutValue( + base::checked_cast(extra_targets_.size()))) + return false; + for (const auto& extra_target_sink : extra_targets_) { + if (!sink->PutValue(extra_target_sink.first.value())) + return false; + if (!extra_target_sink.second.SerializeInto(sink)) + return false; + } + return true; +} + +/******** EnsemblePatchWriter ********/ + +EnsemblePatchWriter::~EnsemblePatchWriter() = default; + +EnsemblePatchWriter::EnsemblePatchWriter(const PatchHeader& header) + : header_(header) { + DCHECK_EQ(header_.magic, PatchHeader::kMagic); +} + +EnsemblePatchWriter::EnsemblePatchWriter(ConstBufferView old_image, + ConstBufferView new_image) { + header_.magic = PatchHeader::kMagic; + header_.old_size = base::checked_cast(old_image.size()); + header_.old_crc = CalculateCrc32(old_image.begin(), old_image.end()); + header_.new_size = base::checked_cast(new_image.size()); + header_.new_crc = CalculateCrc32(new_image.begin(), new_image.end()); +} + +void EnsemblePatchWriter::AddElement(PatchElementWriter&& patch_element) { + DCHECK(patch_element.new_element().offset == current_dst_offset_); + current_dst_offset_ = patch_element.new_element().EndOffset(); + elements_.push_back(std::move(patch_element)); +} + +size_t EnsemblePatchWriter::SerializedSize() const { + size_t serialized_size = + sizeof(PatchHeader) + sizeof(PatchType) + sizeof(uint32_t); + for (const auto& patch_element : elements_) { + serialized_size += patch_element.SerializedSize(); + } + return serialized_size; +} + +bool EnsemblePatchWriter::SerializeInto(BufferSink* sink) const { + DCHECK_NE(patch_type_, PatchType::kUnrecognisedPatch); + DCHECK_EQ(current_dst_offset_, header_.new_size); + bool ok = + sink->PutValue(header_) && + sink->PutValue(patch_type_) && + sink->PutValue(base::checked_cast(elements_.size())); + if (!ok) + return false; + + for (const auto& element : elements_) { + if (!element.SerializeInto(sink)) + return false; + } + return true; +} + +} // namespace zucchini diff --git a/patch_writer.h b/patch_writer.h new file mode 100644 index 0000000..a7c3785 --- /dev/null +++ b/patch_writer.h @@ -0,0 +1,276 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_PATCH_WRITER_H_ +#define COMPONENTS_ZUCCHINI_PATCH_WRITER_H_ + +#include +#include + +#include +#include +#include + +#include "base/logging.h" +#include "base/macros.h" +#include "base/optional.h" +#include "components/zucchini/buffer_sink.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/image_utils.h" +#include "components/zucchini/patch_utils.h" + +namespace zucchini { + +namespace patch { + +// If sufficient space is available, serializes |element_match| into |sink| and +// returns true. Otherwise returns false, and |sink| will be in an undefined +// state. +bool SerializeElementMatch(const ElementMatch& element_match, BufferSink* sink); + +// Returns the size in bytes required to serialize |element_match|. +size_t SerializedElementMatchSize(const ElementMatch& element_match); + +// If sufficient space is available, serializes |buffer| into |sink| and returns +// true. Otherwise returns false, and |sink| will be in an undefined state. +bool SerializeBuffer(const std::vector& buffer, BufferSink* sink); + +// Returns the size in bytes required to serialize |buffer|. +size_t SerializedBufferSize(const std::vector& buffer); + +} // namespace patch + +// Each of *Sink classes below has an associated "main type", and performs the +// following: +// - Receives multiple "main type" elements (hence "Sink" in the name). +// - Encodes list of received data, and writes them to internal storage (e.g., +// applying delta encoding). +// - Writes encoded data to BufferSink. +// +// Common "core functions" implemented for *Sink classes are: +// - void PutNext(const MAIN_TYPE& inst): Encodes and writes an instance of +// MAIN_TYPE to internal storage. Assumptions may be applied to successive +// |inst| provided. +// - size_t SerializedSize() const: Returns the serialized size in bytes of +// internal storage. +// - bool SerializeInto(BufferSink* sink) const: If |sink| has enough space, +// serializes internal storage into |sink|, and returns true. Otherwise +// returns false. +// +// Usage of *Sink instances don't mix, and PuttNext() have dissimilar +// interfaces. Therefore we do not use inheritance to relate *Sink classes, +// simply implement "core functions" with matching names. + +// Sink for equivalences. +class EquivalenceSink { + public: + EquivalenceSink(); + EquivalenceSink(const std::vector& src_skip, + const std::vector& dst_skip, + const std::vector& copy_count); + + EquivalenceSink(EquivalenceSink&&); + ~EquivalenceSink(); + + // Core functions. + // Equivalences must be given by increasing |Equivalence::dst_offset|. + void PutNext(const Equivalence& equivalence); + size_t SerializedSize() const; + bool SerializeInto(BufferSink* sink) const; + + private: + // Offset in source, delta-encoded starting from end of last equivalence, and + // stored as signed varint. + std::vector src_skip_; + // Offset in destination, delta-encoded starting from end of last equivalence, + // and stored as unsigned varint. + std::vector dst_skip_; + // Length of equivalence stored as unsigned varint. + // TODO(etiennep): Investigate on bias. + std::vector copy_count_; + + offset_t src_offset_ = 0; // Last offset in source. + offset_t dst_offset_ = 0; // Last offset in destination. +}; + +// Sink for extra data. +class ExtraDataSink { + public: + ExtraDataSink(); + explicit ExtraDataSink(const std::vector& extra_data); + ExtraDataSink(ExtraDataSink&&); + ~ExtraDataSink(); + + // Core functions. + void PutNext(ConstBufferView region); + size_t SerializedSize() const; + bool SerializeInto(BufferSink* sink) const; + + private: + std::vector extra_data_; +}; + +// Sink for raw delta. +class RawDeltaSink { + public: + RawDeltaSink(); + RawDeltaSink(const std::vector& raw_delta_skip, + const std::vector& raw_delta_diff); + RawDeltaSink(RawDeltaSink&&); + ~RawDeltaSink(); + + // Core functions. + // Deltas must be given by increasing |RawDeltaUnit::copy_offset|. + void PutNext(const RawDeltaUnit& delta); + size_t SerializedSize() const; + bool SerializeInto(BufferSink* sink) const; + + private: + std::vector raw_delta_skip_; // Copy offset stating from last delta. + std::vector raw_delta_diff_; // Bytewise difference. + + // We keep track of the compensation needed for next copy offset, taking into + // accound delta encoding and bias of -1. Stored delta are biased by -1, so a + // sequence of single byte deltas is represented as a string of 0's. + offset_t copy_offset_compensation_ = 0; +}; + +// Sink for reference delta. +class ReferenceDeltaSink { + public: + ReferenceDeltaSink(); + explicit ReferenceDeltaSink(const std::vector& reference_delta); + ReferenceDeltaSink(ReferenceDeltaSink&&); + ~ReferenceDeltaSink(); + + // Core functions. + void PutNext(int32_t diff); + size_t SerializedSize() const; + bool SerializeInto(BufferSink* sink) const; + + private: + std::vector reference_delta_; +}; + +// Sink for additional targets. +class TargetSink { + public: + TargetSink(); + explicit TargetSink(const std::vector& extra_targets); + TargetSink(TargetSink&&); + ~TargetSink(); + + // Core functions. + // Targets must be given by increasing order. + void PutNext(uint32_t target); + size_t SerializedSize() const; + bool SerializeInto(BufferSink* sink) const; + + private: + // Targets are delta-encoded and biaised by 1, stored as unsigned varint. + std::vector extra_targets_; + + // We keep track of the compensation needed for next target, taking into + // accound delta encoding and bias of -1. + offset_t target_compensation_ = 0; +}; + +// Following are utility classes to write structured data forming a patch. + +// Utility to write a patch element. A patch element contains all the +// information necessary to patch a single element. This class +// provides an interface to individually set different building blocks of data +// in the patch element. +class PatchElementWriter { + public: + PatchElementWriter(); + explicit PatchElementWriter(ElementMatch element_match); + PatchElementWriter(PatchElementWriter&&); + ~PatchElementWriter(); + + const ElementMatch& element_match() const { return element_match_; } + const Element& old_element() const { return element_match_.old_element; } + const Element& new_element() const { return element_match_.new_element; } + + // Following methods set individual blocks for this element. Previous + // corresponding block is replaced. All streams must be set before call to + // SerializedSize() of SerializeInto(). + + void SetEquivalenceSink(EquivalenceSink&& equivalences) { + equivalences_.emplace(std::move(equivalences)); + } + void SetExtraDataSink(ExtraDataSink&& extra_data) { + extra_data_.emplace(std::move(extra_data)); + } + void SetRawDeltaSink(RawDeltaSink&& raw_delta) { + raw_delta_.emplace(std::move(raw_delta)); + } + void SetReferenceDeltaSink(ReferenceDeltaSink reference_delta) { + reference_delta_.emplace(std::move(reference_delta)); + } + // Set additional targets for pool identified with |pool_tag|. + void SetTargetSink(PoolTag pool_tag, TargetSink&& extra_targets) { + DCHECK(pool_tag != kNoPoolTag); + extra_targets_.emplace(pool_tag, std::move(extra_targets)); + } + + // Returns the serialized size in bytes of the data this object is holding. + size_t SerializedSize() const; + + // If sufficient space is available, serializes data into |sink|, which is at + // least SerializedSize() bytes, and returns true. Otherwise returns false. + bool SerializeInto(BufferSink* sink) const; + + private: + ElementMatch element_match_; + base::Optional equivalences_; + base::Optional extra_data_; + base::Optional raw_delta_; + base::Optional reference_delta_; + std::map extra_targets_; +}; + +// Utility to write a Zucchini ensemble patch. An ensemble patch is the +// concatenation of a patch header with a vector of patch elements. +class EnsemblePatchWriter { + public: + explicit EnsemblePatchWriter(const PatchHeader& header); + EnsemblePatchWriter(ConstBufferView old_image, ConstBufferView new_image); + ~EnsemblePatchWriter(); + + void SetPatchType(PatchType patch_type) { patch_type_ = patch_type; } + + // Reserves space for |count| patch elements. + void ReserveElements(size_t count) { elements_.reserve(count); } + + // Adds an patch element into the patch. Patch elements must be ordered by + // their location in the new image file. + void AddElement(PatchElementWriter&& patch_element); + + // Returns the serialized size in bytes of the data this object is holding. + size_t SerializedSize() const; + + // If sufficient space is available, serializes data into |sink|, which is at + // least SerializedSize() bytes, and returns true. Otherwise returns false. + bool SerializeInto(BufferSink* sink) const; + + // If sufficient space is available, serializes data into |buffer|, which is + // at least SerializedSize() bytes, and returns true. Otherwise returns false. + bool SerializeInto(MutableBufferView buffer) const { + BufferSink sink(buffer); + return SerializeInto(&sink); + } + + private: + PatchHeader header_; + PatchType patch_type_ = PatchType::kUnrecognisedPatch; + std::vector elements_; + offset_t current_dst_offset_ = 0; + + DISALLOW_COPY_AND_ASSIGN(EnsemblePatchWriter); +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_PATCH_WRITER_H_ diff --git a/reference_set.cc b/reference_set.cc new file mode 100644 index 0000000..963e814 --- /dev/null +++ b/reference_set.cc @@ -0,0 +1,68 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/reference_set.h" + +#include +#include + +#include "base/logging.h" +#include "base/macros.h" +#include "components/zucchini/target_pool.h" + +namespace zucchini { + +namespace { + +// Returns true if |refs| is sorted by location. +bool IsReferenceListSorted(const std::vector& refs) { + return std::is_sorted( + refs.begin(), refs.end(), + [](const IndirectReference& a, const IndirectReference& b) { + return a.location < b.location; + }); +} + +} // namespace + +ReferenceSet::ReferenceSet(const ReferenceTypeTraits& traits, + const TargetPool& target_pool) + : traits_(traits), target_pool_(target_pool) {} +ReferenceSet::ReferenceSet(ReferenceSet&&) = default; +ReferenceSet::~ReferenceSet() = default; + +void ReferenceSet::InitReferences(ReferenceReader&& ref_reader) { + DCHECK(references_.empty()); + for (auto ref = ref_reader.GetNext(); ref.has_value(); + ref = ref_reader.GetNext()) { + references_.push_back( + {ref->location, target_pool_.KeyForOffset(ref->target)}); + } + DCHECK(IsReferenceListSorted(references_)); +} + +void ReferenceSet::InitReferences(const std::vector& refs) { + DCHECK(references_.empty()); + references_.reserve(refs.size()); + std::transform(refs.begin(), refs.end(), std::back_inserter(references_), + [&](const Reference& ref) -> IndirectReference { + return {ref.location, target_pool_.KeyForOffset(ref.target)}; + }); + DCHECK(IsReferenceListSorted(references_)); +} + +IndirectReference ReferenceSet::at(offset_t offset) const { + auto pos = + std::upper_bound(references_.begin(), references_.end(), offset, + [](offset_t offset, const IndirectReference& ref) { + return offset < ref.location; + }); + + DCHECK(pos != references_.begin()); // Iterators. + --pos; + DCHECK_LT(offset, pos->location + width()); + return *pos; +} + +} // namespace zucchini diff --git a/reference_set.h b/reference_set.h new file mode 100644 index 0000000..2ca7202 --- /dev/null +++ b/reference_set.h @@ -0,0 +1,66 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_REFERENCE_SET_H_ +#define COMPONENTS_ZUCCHINI_REFERENCE_SET_H_ + +#include + +#include + +#include "components/zucchini/image_utils.h" + +namespace zucchini { + +class TargetPool; + +// Container of distinct indirect references of one type, along with traits, +// only used during patch generation. +class ReferenceSet { + public: + using const_iterator = std::vector::const_iterator; + + // |traits| specifies the reference represented. |target_pool| specifies + // common targets shared by all reference represented, and mediates target + // translation between offsets and indexes. + ReferenceSet(const ReferenceTypeTraits& traits, + const TargetPool& target_pool); + ReferenceSet(const ReferenceSet&) = delete; + ReferenceSet(ReferenceSet&&); + ~ReferenceSet(); + + // Either one of the initializers below should be called exactly once. These + // insert all references from |ref_reader/refs| into this class. The targets + // of these references must be in |target_pool_|. + void InitReferences(ReferenceReader&& ref_reader); + void InitReferences(const std::vector& refs); + + const std::vector& references() const { + return references_; + } + const ReferenceTypeTraits& traits() const { return traits_; } + const TargetPool& target_pool() const { return target_pool_; } + TypeTag type_tag() const { return traits_.type_tag; } + PoolTag pool_tag() const { return traits_.pool_tag; } + offset_t width() const { return traits_.width; } + + // Looks up the IndirectReference by an |offset| that it spans. |offset| is + // assumed to be valid, i.e., |offset| must be spanned by some + // IndirectReference in |references_|. + IndirectReference at(offset_t offset) const; + + size_t size() const { return references_.size(); } + const_iterator begin() const { return references_.begin(); } + const_iterator end() const { return references_.end(); } + + private: + ReferenceTypeTraits traits_; + const TargetPool& target_pool_; + // List of distinct IndirectReference instances sorted by location. + std::vector references_; +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_REFERENCE_SET_H_ diff --git a/reference_set_unittest.cc b/reference_set_unittest.cc new file mode 100644 index 0000000..b4ccceb --- /dev/null +++ b/reference_set_unittest.cc @@ -0,0 +1,51 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/reference_set.h" + +#include + +#include "components/zucchini/image_utils.h" +#include "components/zucchini/target_pool.h" +#include "components/zucchini/test_reference_reader.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +namespace { + +constexpr offset_t kWidth = 2U; + +} // namespace + +class ReferenceSetTest : public testing::Test { + protected: + // For simplicity, |target_pool_| has no type info (not needed here). + TargetPool target_pool_ = TargetPool{{0, 2, 3, 5}}; + ReferenceSet reference_set_ = + ReferenceSet{{kWidth, TypeTag(0), PoolTag(0)}, target_pool_}; +}; + +TEST_F(ReferenceSetTest, InitReferencesFromReader) { + EXPECT_EQ(std::vector(), reference_set_.references()); + EXPECT_EQ(0U, reference_set_.size()); + std::vector references = {{10, 0}, {12, 2}, {14, 5}}; + reference_set_.InitReferences(TestReferenceReader(references)); + EXPECT_EQ(std::vector({{10, 0}, {12, 1}, {14, 3}}), + reference_set_.references()); + EXPECT_EQ(3U, reference_set_.size()); +} + +TEST_F(ReferenceSetTest, At) { + reference_set_.InitReferences({{10, 0}, {12, 2}, {15, 5}}); + // Each references has kWidth = 2, so check all bytes covered. + EXPECT_EQ(IndirectReference({10, 0}), reference_set_.at(10)); + EXPECT_EQ(IndirectReference({10, 0}), reference_set_.at(11)); + EXPECT_EQ(IndirectReference({12, 1}), reference_set_.at(12)); + EXPECT_EQ(IndirectReference({12, 1}), reference_set_.at(13)); + EXPECT_EQ(IndirectReference({15, 3}), reference_set_.at(15)); + EXPECT_EQ(IndirectReference({15, 3}), reference_set_.at(16)); +} + +} // namespace zucchini diff --git a/rel32_finder.cc b/rel32_finder.cc new file mode 100644 index 0000000..9a07ade --- /dev/null +++ b/rel32_finder.cc @@ -0,0 +1,137 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/rel32_finder.h" + +#include + +namespace zucchini { + +/******** Abs32GapFinder ********/ + +Abs32GapFinder::Abs32GapFinder(ConstBufferView image, + ConstBufferView region, + const std::vector& abs32_locations, + size_t abs32_width) + : base_(image.begin()), + region_end_(region.end()), + abs32_end_(abs32_locations.end()), + abs32_width_(abs32_width) { + DCHECK_GT(abs32_width, size_t(0)); + DCHECK_GE(region.begin(), image.begin()); + DCHECK_LE(region.end(), image.end()); + + const offset_t begin_offset = region.begin() - image.begin(); + // Find the first |abs32_current_| with |*abs32_current_ >= begin_offset|. + abs32_current_ = std::lower_bound(abs32_locations.begin(), + abs32_locations.end(), begin_offset); + + // Find lower boundary, accounting for possibility that |abs32_current_[-1]| + // may straddle across |region.begin()|. + current_lo_ = region.begin(); + if (abs32_current_ > abs32_locations.begin()) { + current_lo_ = std::max(current_lo_, + image.begin() + abs32_current_[-1] + abs32_width_); + } +} + +Abs32GapFinder::~Abs32GapFinder() = default; + +base::Optional Abs32GapFinder::GetNext() { + // Iterate over |[abs32_current_, abs32_end_)| and emit segments. + while (abs32_current_ != abs32_end_ && + base_ + *abs32_current_ < region_end_) { + ConstBufferView::const_iterator hi = base_ + *abs32_current_; + ConstBufferView gap = ConstBufferView::FromRange(current_lo_, hi); + current_lo_ = hi + abs32_width_; + ++abs32_current_; + if (!gap.empty()) + return gap; + } + // Emit final segment. + if (current_lo_ < region_end_) { + ConstBufferView gap = ConstBufferView::FromRange(current_lo_, region_end_); + current_lo_ = region_end_; + return gap; + } + return base::nullopt; +} + +/******** Rel32Finder ********/ + +Rel32Finder::Rel32Finder() = default; + +Rel32Finder::Rel32Finder(ConstBufferView region) + : region_(region), next_cursor_(region_.begin()) {} + +Rel32Finder::~Rel32Finder() = default; + +/******** Rel32FinderX86 ********/ + +ConstBufferView Rel32FinderX86::Scan(ConstBufferView region) { + ConstBufferView::const_iterator cursor = region.begin(); + while (cursor < region.end()) { + // Heuristic rel32 detection by looking for opcodes that use them. + if (cursor + 5 <= region.end()) { + if (cursor[0] == 0xE8 || cursor[0] == 0xE9) { // JMP rel32; CALL rel32 + rel32_ = {cursor + 1, false}; + return ConstBufferView::FromRange(cursor, rel32_.location + 4); + } + } + if (cursor + 6 <= region.end()) { + if (cursor[0] == 0x0F && (cursor[1] & 0xF0) == 0x80) { // Jcc long form + rel32_ = {cursor + 2, false}; + return ConstBufferView::FromRange(cursor, rel32_.location + 4); + } + } + ++cursor; + } + return {region.end(), 0}; +} + +/******** Rel32FinderX64 ********/ + +ConstBufferView Rel32FinderX64::Scan(ConstBufferView region) { + ConstBufferView::const_iterator cursor = region.begin(); + while (cursor < region.end()) { + // Heuristic rel32 detection by looking for opcodes that use them. + if (cursor + 5 <= region.end()) { + if (cursor[0] == 0xE8 || cursor[0] == 0xE9) { // JMP rel32; CALL rel32 + rel32_ = {cursor + 1, false}; + return ConstBufferView::FromRange(cursor, rel32_.location + 4); + } + } + if (cursor + 6 <= region.end()) { + if (cursor[0] == 0x0F && (cursor[1] & 0xF0) == 0x80) { // Jcc long form + rel32_ = {cursor + 2, false}; + return ConstBufferView::FromRange(cursor, rel32_.location + 4); + } else if ((cursor[0] == 0xFF && + (cursor[1] == 0x15 || cursor[1] == 0x25)) || + ((cursor[0] == 0x89 || cursor[0] == 0x8B || + cursor[0] == 0x8D) && + (cursor[1] & 0xC7) == 0x05)) { + // 6-byte instructions: + // [2-byte opcode] [disp32]: + // Opcode + // FF 15: CALL QWORD PTR [rip+disp32] + // FF 25: JMP QWORD PTR [rip+disp32] + // + // [1-byte opcode] [ModR/M] [disp32]: + // Opcode + // 89: MOV DWORD PTR [rip+disp32],reg + // 8B: MOV reg,DWORD PTR [rip+disp32] + // 8D: LEA reg,[rip+disp32] + // ModR/M : MMRRRMMM + // MM = 00 & MMM = 101 => rip+disp32 + // RRR: selects reg operand from [eax|ecx|...|edi] + rel32_ = {cursor + 2, true}; + return ConstBufferView::FromRange(cursor, rel32_.location + 4); + } + } + ++cursor; + } + return {region.end(), 0}; +} + +} // namespace zucchini diff --git a/rel32_finder.h b/rel32_finder.h new file mode 100644 index 0000000..798983e --- /dev/null +++ b/rel32_finder.h @@ -0,0 +1,189 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_REL32_FINDER_H_ +#define COMPONENTS_ZUCCHINI_REL32_FINDER_H_ + +#include + +#include + +#include "base/logging.h" +#include "base/macros.h" +#include "base/optional.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/image_utils.h" + +namespace zucchini { + +// See README.md for definitions on abs32 and rel32 references. We assume the +// following: +// - Abs32 locations have fixed lengths, and never overlap. +// - Rel32 locations can be reasonably identified by heuristically disassembling +// machine code. +// - Rel32 locations never overlap with each other, and never with abs32 +// locations. + +// Abs32GapFinder is a class that iterates over all contiguous gaps in |region| +// that lie outside of |abs32_locations| elements, each spanning |abs_width| +// bytes. For example, given +// region = [base_ + 8, base_ + 25), +// abs32_locations = {2, 6, 15, 20, 27}, +// abs32_width_ = 4, +// we obtain the following: +// 111111111122222222223 -> offsets +// 0123456789012345678901234567890 +// ........*****************...... -> region = * +// ^ ^ ^ ^ ^ -> abs32 locations +// aaaaaaaa aaaa aaaa aaaa -> abs32 locations with width +// ........--*****----*----*...... -> region excluding abs32 -> 3 gaps +// The resulting gaps (must be non-empty) are: +// [10, 15), [19, 20), [24, 25). +// These gaps can then be passed to Rel32Finder (below) to find rel32 references +// that are guaranteed to not overlap with any abs32 references. +class Abs32GapFinder { + public: + // |abs32_locations| is a sorted list of non-overlapping abs32 reference + // locations in |image|, each spanning |abs32_width| bytes. Gaps are searched + // in |region|, which must be part of |image|. + Abs32GapFinder(ConstBufferView image, + ConstBufferView region, + const std::vector& abs32_locations, + size_t abs32_width); + ~Abs32GapFinder(); + + // Returns the next available gap, or nullopt if exhausted. + base::Optional GetNext(); + + private: + const ConstBufferView::const_iterator base_; + const ConstBufferView::const_iterator region_end_; + ConstBufferView::const_iterator current_lo_; + std::vector::const_iterator abs32_current_; + std::vector::const_iterator abs32_end_; + size_t abs32_width_; + + DISALLOW_COPY_AND_ASSIGN(Abs32GapFinder); +}; + +// A class to parse executable bytes of an image to find rel32 locations. +// Architecture-specific parse details are delegated to inherited classes. +// This is typically used along with Abs32GapFinder to find search regions. +// The caller may filter rel32 locations, based on rel32 targets. +class Rel32Finder { + public: + Rel32Finder(); + // |region| is the region being scanned for rel32 references. + explicit Rel32Finder(ConstBufferView region); + virtual ~Rel32Finder(); + + // Reset object to start scanning for rel32 references in |region|. + void Reset(ConstBufferView region) { + next_cursor_ = region.begin(); + region_ = region; + } + + // Accept the last reference found. Next call to FindNext() will scan starting + // beyond that reference, instead of the current search position. + void Accept() { region_.seek(next_cursor_); } + + // Accessors for unittest. + ConstBufferView::const_iterator next_cursor() const { return next_cursor_; } + ConstBufferView region() const { return region_; } + + protected: + // Scans for the next rel32 reference. If a reference is found, advances the + // search position beyond it and returns true. Otherwise, moves the search + // position to the end of the region and returns false. + bool FindNext() { + ConstBufferView result = Scan(region_); + region_.seek(result.begin()); + next_cursor_ = result.end(); + if (region_.empty()) + return false; + region_.remove_prefix(1); + DCHECK_GE(next_cursor_, region_.begin()); + DCHECK_LE(next_cursor_, region_.end()); + return true; + } + + // Architecture-specific rel32 reference detection, which scans executable + // bytes given by |region|. For each rel32 reference found, the implementation + // should cache the necessary data to be retrieved via accessors and return a + // region starting at the current search position, and ending beyond the + // reference that was just found, or an empty region starting at the end of + // the search region if no more reference is found. By default, the next time + // FindNext() is called, |region| will start at the current search position, + // unless Accept() was called, in which case |region| will start beyond the + // last reference. + virtual ConstBufferView Scan(ConstBufferView region) = 0; + + private: + ConstBufferView region_; + ConstBufferView::const_iterator next_cursor_ = nullptr; + + DISALLOW_COPY_AND_ASSIGN(Rel32Finder); +}; + +// Parsing for X86 or X64: we perform naive scan for opcodes that have rel32 as +// an argument, and disregard instruction alignment. +class Rel32FinderIntel : public Rel32Finder { + public: + // Struct to store GetNext() results. + struct Result { + ConstBufferView::const_iterator location; + + // Some references must have their target in the same section as location, + // which we use this to heuristically reject rel32 reference candidates. + // When true, this constraint is relaxed. + bool can_point_outside_section; + }; + + using Rel32Finder::Rel32Finder; + + // Returns the next available Result, or nullopt if exhausted. + base::Optional GetNext() { + if (FindNext()) + return rel32_; + return base::nullopt; + } + + protected: + // Cached results. + Result rel32_; + + // Rel32Finder: + ConstBufferView Scan(ConstBufferView region) override = 0; + + private: + DISALLOW_COPY_AND_ASSIGN(Rel32FinderIntel); +}; + +// X86 instructions. +class Rel32FinderX86 : public Rel32FinderIntel { + public: + using Rel32FinderIntel::Rel32FinderIntel; + + private: + // Rel32Finder: + ConstBufferView Scan(ConstBufferView region) override; + + DISALLOW_COPY_AND_ASSIGN(Rel32FinderX86); +}; + +// X64 instructions. +class Rel32FinderX64 : public Rel32FinderIntel { + public: + using Rel32FinderIntel::Rel32FinderIntel; + + private: + // Rel32Finder: + ConstBufferView Scan(ConstBufferView region) override; + + DISALLOW_COPY_AND_ASSIGN(Rel32FinderX64); +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_REL32_FINDER_H_ diff --git a/rel32_finder_unittest.cc b/rel32_finder_unittest.cc new file mode 100644 index 0000000..2da76ad --- /dev/null +++ b/rel32_finder_unittest.cc @@ -0,0 +1,353 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/rel32_finder.h" + +#include +#include + +#include +#include +#include +#include +#include + +#include "base/format_macros.h" +#include "base/logging.h" +#include "base/strings/stringprintf.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/image_utils.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +TEST(Abs32GapFinderTest, All) { + const size_t kRegionTotal = 99; + std::vector buffer(kRegionTotal); + ConstBufferView image(buffer.data(), buffer.size()); + + // Common test code that returns the resulting segments as a string. + auto run_test = [&](size_t rlo, size_t rhi, + std::vector abs32_locations, + std::ptrdiff_t abs32_width) -> std::string { + CHECK_LE(rlo, kRegionTotal); + CHECK_LE(rhi, kRegionTotal); + CHECK(std::is_sorted(abs32_locations.begin(), abs32_locations.end())); + CHECK_GT(abs32_width, 0); + ConstBufferView region = + ConstBufferView::FromRange(image.begin() + rlo, image.begin() + rhi); + Abs32GapFinder gap_finder(image, region, abs32_locations, abs32_width); + + std::string out_str; + for (auto gap = gap_finder.GetNext(); gap; gap = gap_finder.GetNext()) { + size_t lo = static_cast(gap->begin() - image.begin()); + size_t hi = static_cast(gap->end() - image.begin()); + out_str.append(base::StringPrintf("[%" PRIuS ",%" PRIuS ")", lo, hi)); + } + return out_str; + }; + + // Empty regions yield empty segments. + EXPECT_EQ("", run_test(0, 0, std::vector(), 4)); + EXPECT_EQ("", run_test(9, 9, std::vector(), 4)); + EXPECT_EQ("", run_test(8, 8, {8}, 4)); + EXPECT_EQ("", run_test(8, 8, {0, 12}, 4)); + + // If no abs32 locations exist then the segment is the main range. + EXPECT_EQ("[0,99)", run_test(0, 99, std::vector(), 4)); + EXPECT_EQ("[20,21)", run_test(20, 21, std::vector(), 4)); + EXPECT_EQ("[51,55)", run_test(51, 55, std::vector(), 4)); + + // abs32 locations found near start of main range. + EXPECT_EQ("[10,20)", run_test(10, 20, {5}, 4)); + EXPECT_EQ("[10,20)", run_test(10, 20, {6}, 4)); + EXPECT_EQ("[11,20)", run_test(10, 20, {7}, 4)); + EXPECT_EQ("[12,20)", run_test(10, 20, {8}, 4)); + EXPECT_EQ("[13,20)", run_test(10, 20, {9}, 4)); + EXPECT_EQ("[14,20)", run_test(10, 20, {10}, 4)); + EXPECT_EQ("[10,11)[15,20)", run_test(10, 20, {11}, 4)); + + // abs32 locations found near end of main range. + EXPECT_EQ("[10,15)[19,20)", run_test(10, 20, {15}, 4)); + EXPECT_EQ("[10,16)", run_test(10, 20, {16}, 4)); + EXPECT_EQ("[10,17)", run_test(10, 20, {17}, 4)); + EXPECT_EQ("[10,18)", run_test(10, 20, {18}, 4)); + EXPECT_EQ("[10,19)", run_test(10, 20, {19}, 4)); + EXPECT_EQ("[10,20)", run_test(10, 20, {20}, 4)); + EXPECT_EQ("[10,20)", run_test(10, 20, {21}, 4)); + + // Main range completely eclipsed by abs32 location. + EXPECT_EQ("", run_test(10, 11, {7}, 4)); + EXPECT_EQ("", run_test(10, 11, {8}, 4)); + EXPECT_EQ("", run_test(10, 11, {9}, 4)); + EXPECT_EQ("", run_test(10, 11, {10}, 4)); + EXPECT_EQ("", run_test(10, 12, {8}, 4)); + EXPECT_EQ("", run_test(10, 12, {9}, 4)); + EXPECT_EQ("", run_test(10, 12, {10}, 4)); + EXPECT_EQ("", run_test(10, 13, {9}, 4)); + EXPECT_EQ("", run_test(10, 13, {10}, 4)); + EXPECT_EQ("", run_test(10, 14, {10}, 4)); + EXPECT_EQ("", run_test(10, 14, {8, 12}, 4)); + + // Partial eclipses. + EXPECT_EQ("[24,25)", run_test(20, 25, {20}, 4)); + EXPECT_EQ("[20,21)", run_test(20, 25, {21}, 4)); + EXPECT_EQ("[20,21)[25,26)", run_test(20, 26, {21}, 4)); + + // abs32 location outside main range. + EXPECT_EQ("[40,60)", run_test(40, 60, {36, 60}, 4)); + EXPECT_EQ("[41,61)", run_test(41, 61, {0, 10, 20, 30, 34, 62, 68, 80}, 4)); + + // Change abs32 width. + EXPECT_EQ("[10,11)[12,14)[16,19)", run_test(10, 20, {9, 11, 14, 15, 19}, 1)); + EXPECT_EQ("", run_test(10, 11, {10}, 1)); + EXPECT_EQ("[18,23)[29,31)", run_test(17, 31, {15, 23, 26, 31}, 3)); + EXPECT_EQ("[17,22)[25,26)[29,30)", run_test(17, 31, {14, 22, 26, 30}, 3)); + EXPECT_EQ("[10,11)[19,20)", run_test(10, 20, {11}, 8)); + + // Mixed cases with abs32 width = 4. + EXPECT_EQ("[10,15)[19,20)[24,25)", run_test(8, 25, {2, 6, 15, 20, 27}, 4)); + EXPECT_EQ("[0,25)[29,45)[49,50)", run_test(0, 50, {25, 45}, 4)); + EXPECT_EQ("[10,20)[28,50)", run_test(10, 50, {20, 24}, 4)); + EXPECT_EQ("[49,50)[54,60)[64,70)[74,80)[84,87)", + run_test(49, 87, {10, 20, 30, 40, 50, 60, 70, 80, 90}, 4)); + EXPECT_EQ("[0,10)[14,20)[24,25)[29,50)", run_test(0, 50, {10, 20, 25}, 4)); +} + +namespace { + +// A mock Rel32Finder to inject next search result on Scan(). +class TestRel32Finder : public Rel32Finder { + public: + using Rel32Finder::Rel32Finder; + + bool GetNext() { return Rel32Finder::FindNext(); } + + // Rel32Finder: + ConstBufferView Scan(ConstBufferView region) override { return next_result; } + + ConstBufferView next_result; +}; + +} // namespace + +TEST(Rel32FinderTest, Scan) { + const size_t kRegionTotal = 99; + std::vector buffer(kRegionTotal); + ConstBufferView image(buffer.data(), buffer.size()); + + TestRel32Finder finder(image); + + auto check_finder_state = [&](const TestRel32Finder& finder, + size_t expected_cursor, + size_t expected_next_cursor) { + CHECK_LE(expected_cursor, kRegionTotal); + CHECK_LE(expected_next_cursor, kRegionTotal); + + EXPECT_EQ(image.begin() + expected_cursor, finder.region().begin()); + EXPECT_EQ(image.begin() + expected_next_cursor, finder.next_cursor()); + }; + + check_finder_state(finder, 0, 0); + + finder.next_result = ConstBufferView(image.begin() + 0, 1); + EXPECT_TRUE(finder.GetNext()); + check_finder_state(finder, 1, 1); + + finder.next_result = ConstBufferView(image.begin() + 1, 1); + EXPECT_TRUE(finder.GetNext()); + check_finder_state(finder, 2, 2); + + finder.next_result = ConstBufferView(image.begin() + 4, 2); + EXPECT_TRUE(finder.GetNext()); + check_finder_state(finder, 5, 6); + finder.Accept(); + check_finder_state(finder, 6, 6); + + finder.next_result = ConstBufferView(image.begin() + 6, 1); + EXPECT_TRUE(finder.GetNext()); + check_finder_state(finder, 7, 7); + + finder.next_result = ConstBufferView(image.begin() + 7, 1); + EXPECT_TRUE(finder.GetNext()); + check_finder_state(finder, 8, 8); + + finder.next_result = ConstBufferView(image.begin() + 98, 1); + EXPECT_TRUE(finder.GetNext()); + check_finder_state(finder, 99, 99); + + finder.next_result = ConstBufferView(image.end(), 0); + EXPECT_FALSE(finder.GetNext()); + check_finder_state(finder, 99, 99); +} + +TEST(Rel32FinderX86Test, FindNext) { + constexpr uint8_t data[] = { + 0x55, // 00: push ebp + 0x8B, 0xEC, // 01: mov ebp,esp + 0xE8, 0x00, 0x00, 0x00, 0x00, // 03: call 08 + 0xE9, 0x00, 0x00, 0x00, 0x00, // 08: jmp 0D + 0x0F, 0x80, 0x00, 0x00, 0x00, 0x00, // 0D: jo 13 + 0x0F, 0x81, 0x00, 0x00, 0x00, 0x00, // 13: jno 19 + 0x0F, 0x82, 0x00, 0x00, 0x00, 0x00, // 19: jb 1F + 0x0F, 0x83, 0x00, 0x00, 0x00, 0x00, // 1F: jae 25 + 0x0F, 0x84, 0x00, 0x00, 0x00, 0x00, // 25: je 2B + 0x0F, 0x85, 0x00, 0x00, 0x00, 0x00, // 2B: jne 31 + 0x0F, 0x86, 0x00, 0x00, 0x00, 0x00, // 31: jbe 37 + 0x0F, 0x87, 0x00, 0x00, 0x00, 0x00, // 37: ja 3D + 0x0F, 0x88, 0x00, 0x00, 0x00, 0x00, // 3D: js 43 + 0x0F, 0x89, 0x00, 0x00, 0x00, 0x00, // 43: jns 49 + 0x0F, 0x8A, 0x00, 0x00, 0x00, 0x00, // 49: jp 4F + 0x0F, 0x8B, 0x00, 0x00, 0x00, 0x00, // 4F: jnp 55 + 0x0F, 0x8C, 0x00, 0x00, 0x00, 0x00, // 55: jl 5B + 0x0F, 0x8D, 0x00, 0x00, 0x00, 0x00, // 5B: jge 61 + 0x0F, 0x8E, 0x00, 0x00, 0x00, 0x00, // 61: jle 67 + 0x0F, 0x8F, 0x00, 0x00, 0x00, 0x00, // 67: jg 6D + 0x5D, // 6D: pop ebp + 0xC3, // C3: ret + }; + + ConstBufferView image = + ConstBufferView::FromRange(std::begin(data), std::end(data)); + + Rel32FinderX86 rel_finder(image); + + // List of expected locations as pairs of (cursor position, rel32 position). + std::vector> expected_locations = { + {0x04, 0x04}, {0x09, 0x09}, {0x0E, 0x0F}, {0x14, 0x15}, {0x1A, 0x1B}, + {0x20, 0x21}, {0x26, 0x27}, {0x2C, 0x2D}, {0x32, 0x33}, {0x38, 0x39}, + {0x3E, 0x3F}, {0x44, 0x45}, {0x4A, 0x4B}, {0x50, 0x51}, {0x56, 0x57}, + {0x5C, 0x5D}, {0x62, 0x63}, {0x68, 0x69}, + }; + + for (auto location : expected_locations) { + auto result = rel_finder.GetNext(); + EXPECT_TRUE(result.has_value()); + + EXPECT_EQ(location.first, + size_t(rel_finder.region().begin() - image.begin())); + EXPECT_EQ(location.second, size_t(result->location - image.begin())); + EXPECT_EQ(result->location + 4, rel_finder.next_cursor()); + EXPECT_FALSE(result->can_point_outside_section); + rel_finder.Accept(); + } + EXPECT_EQ(base::nullopt, rel_finder.GetNext()); +} + +TEST(Rel32FinderX86Test, Accept) { + constexpr uint8_t data[] = { + 0xB9, 0x00, 0x00, 0x00, 0xE9, // 00: mov E9000000 + 0xE8, 0x00, 0x00, 0x00, 0xE9, // 05: call E900000A + 0xE8, 0x00, 0x00, 0x00, 0xE9, // 0A: call E900000F + }; + + ConstBufferView image = + ConstBufferView::FromRange(std::begin(data), std::end(data)); + + auto next_location = [&](Rel32FinderX86& rel_finder) { + auto result = rel_finder.GetNext(); + EXPECT_TRUE(result.has_value()); + return result->location - image.begin(); + }; + + Rel32FinderX86 rel_finder(image); + + EXPECT_EQ(0x05, next_location(rel_finder)); // False positive. + rel_finder.Accept(); + // False negative: shadowed by 0x05 + // EXPECT_EQ(0x06, next_location(rel_finder)); + EXPECT_EQ(0x0A, next_location(rel_finder)); // False positive. + EXPECT_EQ(0x0B, next_location(rel_finder)); // Found if 0x0A is discarded. +} + +TEST(Rel32FinderX64Test, FindNext) { + constexpr uint8_t data[] = { + 0x55, // 00: push ebp + 0x8B, 0xEC, // 01: mov ebp,esp + 0xE8, 0x00, 0x00, 0x00, 0x00, // 03: call 08 + 0xE9, 0x00, 0x00, 0x00, 0x00, // 08: jmp 0D + 0x0F, 0x80, 0x00, 0x00, 0x00, 0x00, // 0D: jo 13 + 0x0F, 0x81, 0x00, 0x00, 0x00, 0x00, // 13: jno 19 + 0x0F, 0x82, 0x00, 0x00, 0x00, 0x00, // 19: jb 1F + 0x0F, 0x83, 0x00, 0x00, 0x00, 0x00, // 1F: jae 25 + 0x0F, 0x84, 0x00, 0x00, 0x00, 0x00, // 25: je 2B + 0x0F, 0x85, 0x00, 0x00, 0x00, 0x00, // 2B: jne 31 + 0x0F, 0x86, 0x00, 0x00, 0x00, 0x00, // 31: jbe 37 + 0x0F, 0x87, 0x00, 0x00, 0x00, 0x00, // 37: ja 3D + 0x0F, 0x88, 0x00, 0x00, 0x00, 0x00, // 3D: js 43 + 0x0F, 0x89, 0x00, 0x00, 0x00, 0x00, // 43: jns 49 + 0x0F, 0x8A, 0x00, 0x00, 0x00, 0x00, // 49: jp 4F + 0x0F, 0x8B, 0x00, 0x00, 0x00, 0x00, // 4F: jnp 55 + 0x0F, 0x8C, 0x00, 0x00, 0x00, 0x00, // 55: jl 5B + 0x0F, 0x8D, 0x00, 0x00, 0x00, 0x00, // 5B: jge 61 + 0x0F, 0x8E, 0x00, 0x00, 0x00, 0x00, // 61: jle 67 + 0x0F, 0x8F, 0x00, 0x00, 0x00, 0x00, // 67: jg 6F + 0xFF, 0x15, 0x00, 0x00, 0x00, 0x00, // 6D: call [rip+00] + 0xFF, 0x25, 0x00, 0x00, 0x00, 0x00, // 73: jmp [rip+00] + 0x8B, 0x05, 0x00, 0x00, 0x00, 0x00, // 79: mov eax,[rip+00] + 0x8B, 0x3D, 0x00, 0x00, 0x00, 0x00, // 7F: mov edi,[rip+00] + 0x8D, 0x05, 0x00, 0x00, 0x00, 0x00, // 85: lea eax,[rip+00] + 0x8D, 0x3D, 0x00, 0x00, 0x00, 0x00, // 8B: lea edi,[rip+00] + 0x48, 0x8B, 0x05, 0x00, 0x00, 0x00, 0x00, // 91: mov rax,[rip+00] + 0x48, 0x8B, 0x3D, 0x00, 0x00, 0x00, 0x00, // 98: mov rdi,[rip+00] + 0x48, 0x8D, 0x05, 0x00, 0x00, 0x00, 0x00, // 9F: lea rax,[rip+00] + 0x48, 0x8D, 0x3D, 0x00, 0x00, 0x00, 0x00, // A6: lea rdi,[rip+00] + 0x4C, 0x8B, 0x05, 0x00, 0x00, 0x00, 0x00, // AD: mov r8,[rip+00] + 0x4C, 0x8B, 0x3D, 0x00, 0x00, 0x00, 0x00, // B4: mov r15,[rip+00] + 0x4C, 0x8D, 0x05, 0x00, 0x00, 0x00, 0x00, // BB: lea r8,[rip+00] + 0x4C, 0x8D, 0x3D, 0x00, 0x00, 0x00, 0x00, // C2: lea r15,[rip+00] + 0x66, 0x8B, 0x05, 0x00, 0x00, 0x00, 0x00, // C9: mov ax,[rip+00] + 0x66, 0x8B, 0x3D, 0x00, 0x00, 0x00, 0x00, // D0: mov di,[rip+00] + 0x66, 0x8D, 0x05, 0x00, 0x00, 0x00, 0x00, // D7: lea ax,[rip+00] + 0x66, 0x8D, 0x3D, 0x00, 0x00, 0x00, 0x00, // DE: lea di,[rip+00] + 0x5D, // E5: pop ebp + 0xC3, // E6: ret + }; + + ConstBufferView image = + ConstBufferView::FromRange(std::begin(data), std::end(data)); + + Rel32FinderX64 rel_finder(image); + + // Lists of expected locations as pairs of (cursor position, rel32 position). + std::vector> expected_locations = { + {0x04, 0x04}, {0x09, 0x09}, {0x0E, 0x0F}, {0x14, 0x15}, {0x1A, 0x1B}, + {0x20, 0x21}, {0x26, 0x27}, {0x2C, 0x2D}, {0x32, 0x33}, {0x38, 0x39}, + {0x3E, 0x3F}, {0x44, 0x45}, {0x4A, 0x4B}, {0x50, 0x51}, {0x56, 0x57}, + {0x5C, 0x5D}, {0x62, 0x63}, {0x68, 0x69}, + }; + std::vector> expected_locations_rip = { + {0x6E, 0x6F}, {0x74, 0x75}, {0x7A, 0x7B}, {0x80, 0x81}, {0x86, 0x87}, + {0x8C, 0x8D}, {0x93, 0x94}, {0x9A, 0x9B}, {0xA1, 0xA2}, {0xA8, 0xA9}, + {0xAF, 0xB0}, {0xB6, 0xB7}, {0xBD, 0xBE}, {0xC4, 0xC5}, {0xCB, 0xCC}, + {0xD2, 0xD3}, {0xD9, 0xDA}, {0xE0, 0xE1}, + }; + for (auto location : expected_locations) { + auto result = rel_finder.GetNext(); + EXPECT_TRUE(result.has_value()); + + EXPECT_EQ(location.first, + size_t(rel_finder.region().begin() - image.begin())); + EXPECT_EQ(location.second, size_t(result->location - image.begin())); + EXPECT_EQ(result->location + 4, rel_finder.next_cursor()); + EXPECT_FALSE(result->can_point_outside_section); + rel_finder.Accept(); + } + for (auto location : expected_locations_rip) { + auto result = rel_finder.GetNext(); + EXPECT_TRUE(result.has_value()); + + EXPECT_EQ(location.first, + size_t(rel_finder.region().begin() - image.begin())); + EXPECT_EQ(location.second, size_t(result->location - image.begin())); + EXPECT_EQ(result->location + 4, rel_finder.next_cursor()); + EXPECT_TRUE(result->can_point_outside_section); + rel_finder.Accept(); + } + EXPECT_EQ(base::nullopt, rel_finder.GetNext()); +} + +// TODO(huangs): Test that integrates Abs32GapFinder and Rel32Finder. + +} // namespace zucchini diff --git a/rel32_utils.cc b/rel32_utils.cc new file mode 100644 index 0000000..fa59386 --- /dev/null +++ b/rel32_utils.cc @@ -0,0 +1,69 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/rel32_utils.h" + +#include + +#include "base/logging.h" +#include "components/zucchini/io_utils.h" + +namespace zucchini { + +/******** Rel32ReaderX86 ********/ + +Rel32ReaderX86::Rel32ReaderX86(ConstBufferView image, + offset_t lo, + offset_t hi, + const std::vector* locations, + const AddressTranslator& translator) + : image_(image), + target_rva_to_offset_(translator), + location_offset_to_rva_(translator), + hi_(hi), + last_(locations->end()) { + DCHECK_LE(lo, image.size()); + DCHECK_LE(hi, image.size()); + current_ = std::lower_bound(locations->begin(), locations->end(), lo); +} + +Rel32ReaderX86::~Rel32ReaderX86() = default; + +base::Optional Rel32ReaderX86::GetNext() { + while (current_ < last_ && *current_ < hi_) { + offset_t loc_offset = *(current_++); + DCHECK_LE(loc_offset + 4, image_.size()); // Sanity check. + rva_t loc_rva = location_offset_to_rva_.Convert(loc_offset); + rva_t target_rva = loc_rva + 4 + image_.read(loc_offset); + offset_t target_offset = target_rva_to_offset_.Convert(target_rva); + // In rare cases, the most significant bit of |target| is set. This + // interferes with label marking. We expect these to already be filtered out + // from |locations|. + DCHECK(!IsMarked(target_offset)); + return Reference{loc_offset, target_offset}; + } + return base::nullopt; +} + +/******** Rel32ReceptorX86 ********/ + +Rel32WriterX86::Rel32WriterX86(MutableBufferView image, + const AddressTranslator& translator) + : image_(image), + target_offset_to_rva_(translator), + location_offset_to_rva_(translator) {} + +Rel32WriterX86::~Rel32WriterX86() = default; + +void Rel32WriterX86::PutNext(Reference ref) { + rva_t target_rva = target_offset_to_rva_.Convert(ref.target); + rva_t loc_rva = location_offset_to_rva_.Convert(ref.location); + + // Subtraction underflow is okay + uint32_t code = + static_cast(target_rva) - (static_cast(loc_rva) + 4); + image_.write(ref.location, code); +} + +} // namespace zucchini diff --git a/rel32_utils.h b/rel32_utils.h new file mode 100644 index 0000000..7a01230 --- /dev/null +++ b/rel32_utils.h @@ -0,0 +1,70 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_REL32_UTILS_H_ +#define COMPONENTS_ZUCCHINI_REL32_UTILS_H_ + +#include + +#include "base/macros.h" +#include "base/optional.h" +#include "components/zucchini/address_translator.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/image_utils.h" + +namespace zucchini { + +// A visitor that emits References (locations and target) from a specified +// portion of an x86 / x64 image, given a list of valid locations. +class Rel32ReaderX86 : public ReferenceReader { + public: + // |image| is an image containing x86 / x64 code in [|lo|, |hi|). + // |locations| is a sorted list of offsets of rel32 reference locations. + // |translator| (for |image|) is embedded into |target_rva_to_offset_| and + // |location_offset_to_rva_| for address translation, and therefore must + // outlive |*this|. + Rel32ReaderX86(ConstBufferView image, + offset_t lo, + offset_t hi, + const std::vector* locations, + const AddressTranslator& translator); + ~Rel32ReaderX86() override; + + // Returns the next reference, or base::nullopt if exhausted. + base::Optional GetNext() override; + + private: + ConstBufferView image_; + AddressTranslator::RvaToOffsetCache target_rva_to_offset_; + AddressTranslator::OffsetToRvaCache location_offset_to_rva_; + const offset_t hi_; + const std::vector::const_iterator last_; + std::vector::const_iterator current_; + + DISALLOW_COPY_AND_ASSIGN(Rel32ReaderX86); +}; + +// Writer for x86 / x64 rel32 references. +class Rel32WriterX86 : public ReferenceWriter { + public: + // |image| wraps the raw bytes of a binary in which rel32 references will be + // written. |translator| (for |image|) is embedded into + // |target_offset_to_rva_| and |location_offset_to_rva_| for address + // translation, and therefore must outlive |*this|. + Rel32WriterX86(MutableBufferView image, const AddressTranslator& translator); + ~Rel32WriterX86() override; + + void PutNext(Reference ref) override; + + private: + MutableBufferView image_; + AddressTranslator::OffsetToRvaCache target_offset_to_rva_; + AddressTranslator::OffsetToRvaCache location_offset_to_rva_; + + DISALLOW_COPY_AND_ASSIGN(Rel32WriterX86); +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_REL32_UTILS_H_ diff --git a/rel32_utils_unittest.cc b/rel32_utils_unittest.cc new file mode 100644 index 0000000..80928de --- /dev/null +++ b/rel32_utils_unittest.cc @@ -0,0 +1,128 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/rel32_utils.h" + +#include + +#include +#include + +#include "base/memory/ptr_util.h" +#include "base/optional.h" +#include "base/test/gtest_util.h" +#include "components/zucchini/address_translator.h" +#include "components/zucchini/image_utils.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +namespace { + +// A trivial AddressTranslator that applies constant shift. +class TestAddressTranslator : public AddressTranslator { + public: + TestAddressTranslator(offset_t image_size, rva_t rva_begin) { + DCHECK_GE(rva_begin, 0U); + CHECK_EQ(AddressTranslator::kSuccess, + Initialize({{0, image_size, rva_begin, image_size}})); + } +}; + +// Checks that |reader| emits and only emits |expected_refs|, in order. +void CheckReader(const std::vector& expected_refs, + ReferenceReader* reader) { + for (Reference expected_ref : expected_refs) { + auto ref = reader->GetNext(); + EXPECT_TRUE(ref.has_value()); + EXPECT_EQ(expected_ref, ref.value()); + } + EXPECT_EQ(base::nullopt, reader->GetNext()); // Nothing should be left. +} + +} // namespace + +TEST(Rel32UtilsTest, Rel32ReaderX86) { + constexpr offset_t kTestImageSize = 0x00100000U; + constexpr rva_t kRvaBegin = 0x00030000U; + TestAddressTranslator translator(kTestImageSize, kRvaBegin); + + // For simplicity, test data is not real X86 machine code. We are only + // including rel32 targets, without the full instructions. + std::vector bytes = { + 0xFF, 0xFF, 0xFF, 0xFF, // 00030000: (Filler) + 0x00, 0x00, 0x00, 0x80, // 00030004: 80030008 Marked, so invalid. + 0x04, 0x00, 0x00, 0x00, // 00030008: 00030010 + 0xFF, 0xFF, 0xFF, 0xFF, // 0003000C: (Filler) + 0x00, 0x00, 0x00, 0x00, // 00030010: 00030014 + 0xFF, 0xFF, 0xFF, 0xFF, // 00030014: (Filler) + 0xF4, 0xFF, 0xFF, 0xFF, // 00030018: 00030010 + 0xE4, 0xFF, 0xFF, 0xFF, // 0003001C: 00030004 + }; + ConstBufferView buffer(bytes.data(), bytes.size()); + // Specify rel32 locations directly, instead of parsing. + std::vector rel32_locations = {0x0008U, 0x0010U, 0x0018U, 0x001CU}; + + // Generate everything. + Rel32ReaderX86 reader1(buffer, 0x0000U, 0x0020U, &rel32_locations, + translator); + CheckReader({{0x0008U, 0x0010U}, + {0x0010U, 0x0014U}, + {0x0018U, 0x0010U}, + {0x001CU, 0x0004U}}, + &reader1); + + // Exclude last. + Rel32ReaderX86 reader2(buffer, 0x0000U, 0x001CU, &rel32_locations, + translator); + CheckReader({{0x0008U, 0x0010U}, {0x0010U, 0x0014U}, {0x0018U, 0x0010U}}, + &reader2); + + // Only find one. + Rel32ReaderX86 reader3(buffer, 0x000CU, 0x0018U, &rel32_locations, + translator); + CheckReader({{0x0010U, 0x0014U}}, &reader3); + + // Marked target encountered (error). + std::vector rel32_marked_locations = {0x00004U}; + Rel32ReaderX86 reader4(buffer, 0x0000U, 0x0020U, &rel32_marked_locations, + translator); + EXPECT_DCHECK_DEATH(reader4.GetNext()); +} + +TEST(Rel32UtilsTest, Rel32WriterX86) { + constexpr offset_t kTestImageSize = 0x00100000U; + constexpr rva_t kRvaBegin = 0x00030000U; + TestAddressTranslator translator(kTestImageSize, kRvaBegin); + + std::vector bytes(32, 0xFF); + MutableBufferView buffer(bytes.data(), bytes.size()); + + Rel32WriterX86 writer(buffer, translator); + writer.PutNext({0x0008U, 0x0010U}); + EXPECT_EQ(0x00000004U, buffer.read(0x08)); // 00030008: 00030010 + + writer.PutNext({0x0010U, 0x0014U}); + EXPECT_EQ(0x00000000U, buffer.read(0x10)); // 00030010: 00030014 + + writer.PutNext({0x0018U, 0x0010U}); + EXPECT_EQ(0xFFFFFFF4U, buffer.read(0x18)); // 00030018: 00030010 + + writer.PutNext({0x001CU, 0x0004U}); + EXPECT_EQ(0xFFFFFFE4U, buffer.read(0x1C)); // 0003001C: 00030004 + + EXPECT_EQ(std::vector({ + 0xFF, 0xFF, 0xFF, 0xFF, // 00030000: (Filler) + 0xFF, 0xFF, 0xFF, 0xFF, // 00030004: (Filler) + 0x04, 0x00, 0x00, 0x00, // 00030008: 00030010 + 0xFF, 0xFF, 0xFF, 0xFF, // 0003000C: (Filler) + 0x00, 0x00, 0x00, 0x00, // 00030010: 00030014 + 0xFF, 0xFF, 0xFF, 0xFF, // 00030014: (Filler) + 0xF4, 0xFF, 0xFF, 0xFF, // 00030018: 00030010 + 0xE4, 0xFF, 0xFF, 0xFF, // 0003001C: 00030004 + }), + bytes); +} + +} // namespace zucchini diff --git a/reloc_utils.cc b/reloc_utils.cc new file mode 100644 index 0000000..d21a0d3 --- /dev/null +++ b/reloc_utils.cc @@ -0,0 +1,193 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/reloc_utils.h" + +#include +#include +#include + +#include "base/logging.h" +#include "base/numerics/safe_conversions.h" +#include "components/zucchini/algorithm.h" +#include "components/zucchini/io_utils.h" +#include "components/zucchini/type_win_pe.h" + +namespace zucchini { + +/******** RelocUnitWin32 ********/ + +RelocUnitWin32::RelocUnitWin32() = default; +RelocUnitWin32::RelocUnitWin32(uint8_t type_in, + offset_t location_in, + rva_t target_rva_in) + : type(type_in), location(location_in), target_rva(target_rva_in) {} + +bool operator==(const RelocUnitWin32& a, const RelocUnitWin32& b) { + return std::tie(a.type, a.location, a.target_rva) == + std::tie(b.type, b.location, b.target_rva); +} + +/******** RelocRvaReaderWin32 ********/ + +// static +bool RelocRvaReaderWin32::FindRelocBlocks( + ConstBufferView image, + BufferRegion reloc_region, + std::vector* reloc_block_offsets) { + CHECK_LT(reloc_region.size, kOffsetBound); + ConstBufferView reloc_data = image[reloc_region]; + reloc_block_offsets->clear(); + while (reloc_data.size() >= sizeof(pe::RelocHeader)) { + reloc_block_offsets->push_back(reloc_data.begin() - image.begin()); + auto size = reloc_data.read(0).size; + // |size| must be aligned to 4-bytes. + if (size < sizeof(pe::RelocHeader) || size % 4 || size > reloc_data.size()) + return false; + reloc_data.remove_prefix(size); + } + return reloc_data.empty(); // Fail if trailing data exist. +} + +RelocRvaReaderWin32::RelocRvaReaderWin32( + ConstBufferView image, + BufferRegion reloc_region, + const std::vector& reloc_block_offsets, + offset_t lo, + offset_t hi) + : image_(image) { + CHECK_LE(lo, hi); + lo = base::checked_cast(reloc_region.InclusiveClamp(lo)); + hi = base::checked_cast(reloc_region.InclusiveClamp(hi)); + end_it_ = image_.begin() + hi; + + // By default, get GetNext() to produce empty output. + cur_reloc_units_ = BufferSource(end_it_, 0); + if (reloc_block_offsets.empty()) + return; + + // Find the block that contains |lo|. + auto block_it = std::upper_bound(reloc_block_offsets.begin(), + reloc_block_offsets.end(), lo); + DCHECK(block_it != reloc_block_offsets.begin()); + --block_it; + + // Initialize |cur_reloc_units_| and |rva_hi_bits_|. + if (!LoadRelocBlock(image_.begin() + *block_it)) + return; // Nothing left. + + // Skip |cur_reloc_units_| to |lo|, truncating up. + offset_t cur_reloc_units_offset = cur_reloc_units_.begin() - image_.begin(); + if (lo > cur_reloc_units_offset) { + offset_t delta = + ceil(lo - cur_reloc_units_offset, kRelocUnitSize); + cur_reloc_units_.Skip(delta); + } +} + +RelocRvaReaderWin32::RelocRvaReaderWin32(RelocRvaReaderWin32&&) = default; + +RelocRvaReaderWin32::~RelocRvaReaderWin32() = default; + +// Unrolls a nested loop: outer = reloc blocks and inner = reloc entries. +base::Optional RelocRvaReaderWin32::GetNext() { + // "Outer loop" to find non-empty reloc block. + while (cur_reloc_units_.Remaining() < kRelocUnitSize) { + if (!LoadRelocBlock(cur_reloc_units_.end())) + return base::nullopt; + } + if (end_it_ - cur_reloc_units_.begin() < kRelocUnitSize) + return base::nullopt; + // "Inner loop" to extract single reloc unit. + offset_t location = cur_reloc_units_.begin() - image_.begin(); + uint16_t entry = cur_reloc_units_.read(0); + uint8_t type = static_cast(entry >> 12); + rva_t rva = rva_hi_bits_ + (entry & 0xFFF); + cur_reloc_units_.Skip(kRelocUnitSize); + return RelocUnitWin32{type, location, rva}; +} + +bool RelocRvaReaderWin32::LoadRelocBlock( + ConstBufferView::const_iterator block_begin) { + ConstBufferView header_buf(block_begin, sizeof(pe::RelocHeader)); + if (header_buf.end() >= end_it_ || + end_it_ - header_buf.end() < kRelocUnitSize) { + return false; + } + const auto& header = header_buf.read(0); + rva_hi_bits_ = header.rva_hi; + uint32_t block_size = header.size; + DCHECK_GE(block_size, sizeof(pe::RelocHeader)); + cur_reloc_units_ = BufferSource(block_begin, block_size); + cur_reloc_units_.Skip(sizeof(pe::RelocHeader)); + return true; +} + +/******** RelocReaderWin32 ********/ + +RelocReaderWin32::RelocReaderWin32(RelocRvaReaderWin32&& reloc_rva_reader, + uint16_t reloc_type, + offset_t offset_bound, + const AddressTranslator& translator) + : reloc_rva_reader_(std::move(reloc_rva_reader)), + reloc_type_(reloc_type), + offset_bound_(offset_bound), + entry_rva_to_offset_(translator) {} + +RelocReaderWin32::~RelocReaderWin32() = default; + +// ReferenceReader: +base::Optional RelocReaderWin32::GetNext() { + for (base::Optional unit = reloc_rva_reader_.GetNext(); + unit.has_value(); unit = reloc_rva_reader_.GetNext()) { + if (unit->type != reloc_type_) + continue; + offset_t target = entry_rva_to_offset_.Convert(unit->target_rva); + if (target == kInvalidOffset) + continue; + offset_t location = unit->location; + if (IsMarked(target)) { + LOG(WARNING) << "Warning: Skipping mark-aliased reloc target: " + << AsHex<8>(location) << " -> " << AsHex<8>(target) << "."; + continue; + } + // Ensures the target (abs32 reference) lies entirely within the image. + if (target >= offset_bound_) + continue; + return Reference{location, target}; + } + return base::nullopt; +} + +/******** RelocWriterWin32 ********/ + +RelocWriterWin32::RelocWriterWin32( + uint16_t reloc_type, + MutableBufferView image, + BufferRegion reloc_region, + const std::vector& reloc_block_offsets, + const AddressTranslator& translator) + : reloc_type_(reloc_type), + image_(image), + reloc_region_(reloc_region), + reloc_block_offsets_(reloc_block_offsets), + target_offset_to_rva_(translator) {} + +RelocWriterWin32::~RelocWriterWin32() = default; + +void RelocWriterWin32::PutNext(Reference ref) { + DCHECK_GE(ref.location, reloc_region_.lo()); + DCHECK_LT(ref.location, reloc_region_.hi()); + auto block_it = std::upper_bound(reloc_block_offsets_.begin(), + reloc_block_offsets_.end(), ref.location); + --block_it; + rva_t rva_hi_bits = image_.read(*block_it).rva_hi; + rva_t target_rva = target_offset_to_rva_.Convert(ref.target); + rva_t rva_lo_bits = target_rva - rva_hi_bits; + DCHECK_EQ(rva_lo_bits & 0xFFF, rva_lo_bits); + image_.write(ref.location, + (rva_lo_bits & 0xFFF) | (reloc_type_ << 12)); +} + +} // namespace zucchini diff --git a/reloc_utils.h b/reloc_utils.h new file mode 100644 index 0000000..aac1efb --- /dev/null +++ b/reloc_utils.h @@ -0,0 +1,140 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_RELOC_UTILS_H_ +#define COMPONENTS_ZUCCHINI_RELOC_UTILS_H_ + +#include +#include + +#include + +#include "base/optional.h" +#include "components/zucchini/address_translator.h" +#include "components/zucchini/buffer_source.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/image_utils.h" + +namespace zucchini { + +// Win32 PE relocation table stores a list of (type, RVA) pairs. The table is +// organized into "blocks" for RVAs with common high-order bits (12-31). Each +// block consists of a list (even length) of 2-byte "units". Each unit stores +// type (in bits 12-15) and low-order bits (0-11) of an RVA (in bits 0-11). In +// pseudo-struct: +// struct Block { +// uint32_t rva_hi; +// uint32_t block_size_in_bytes; // 8 + multiple of 4. +// struct { +// uint16_t rva_lo:12, type:4; // Little-endian. +// } units[(block_size_in_bytes - 8) / 2]; // Size must be even. +// } reloc_table[num_blocks]; // May have padding (type = 0). + +// Extracted Win32 reloc Unit data. +struct RelocUnitWin32 { + RelocUnitWin32(); + RelocUnitWin32(uint8_t type_in, offset_t location_in, rva_t target_rva_in); + friend bool operator==(const RelocUnitWin32& a, const RelocUnitWin32& b); + + uint8_t type; + offset_t location; + rva_t target_rva; +}; + +// A reader that parses Win32 PE relocation data and emits RelocUnitWin32 for +// each reloc unit that lies strictly inside |[lo, hi)|. +class RelocRvaReaderWin32 { + public: + enum : ptrdiff_t { kRelocUnitSize = sizeof(uint16_t) }; + + // Parses |image| at |reloc_region| to find beginning offsets of each reloc + // block. On success, writes the result to |reloc_block_offsets| and returns + // true. Otherwise leaves |reloc_block_offsets| in an undetermined state, and + // returns false. + static bool FindRelocBlocks(ConstBufferView image, + BufferRegion reloc_region, + std::vector* reloc_block_offsets); + + // |reloc_block_offsets| should be precomputed from FindRelBlocks(). + RelocRvaReaderWin32(ConstBufferView image, + BufferRegion reloc_region, + const std::vector& reloc_block_offsets, + offset_t lo, + offset_t hi); + RelocRvaReaderWin32(RelocRvaReaderWin32&&); + ~RelocRvaReaderWin32(); + + // Successively visits and returns data for each reloc unit, or base::nullopt + // when all reloc units are found. Encapsulates block transition details. + base::Optional GetNext(); + + private: + // Assuming that |block_begin| points to the beginning of a reloc block, loads + // |rva_hi_bits_| and assigns |cur_reloc_units_| as the region containing the + // associated units, potentially truncated by |end_it_|. Returns true if reloc + // data are available for read, and false otherwise. + bool LoadRelocBlock(ConstBufferView::const_iterator block_begin); + + const ConstBufferView image_; + + // End iterator. + ConstBufferView::const_iterator end_it_; + + // Unit data of the current reloc block. + BufferSource cur_reloc_units_; + + // High-order bits (12-31) for all relocs of the current reloc block. + rva_t rva_hi_bits_; +}; + +// A reader for Win32 reloc References, implemented as a filtering and +// translation adaptor of RelocRvaReaderWin32. +class RelocReaderWin32 : public ReferenceReader { + public: + // Takes ownership of |reloc_rva_reader|. |offset_bound| specifies the + // exclusive upper bound of reloc target offsets, taking account of widths of + // targets (which are abs32 References). + RelocReaderWin32(RelocRvaReaderWin32&& reloc_rva_reader, + uint16_t reloc_type, + offset_t offset_bound, + const AddressTranslator& translator); + ~RelocReaderWin32() override; + + // ReferenceReader: + base::Optional GetNext() override; + + private: + RelocRvaReaderWin32 reloc_rva_reader_; + const uint16_t reloc_type_; // uint16_t to simplify shifting (<< 12). + const offset_t offset_bound_; + AddressTranslator::RvaToOffsetCache entry_rva_to_offset_; +}; + +// A writer for Win32 reloc References. This is simpler than the reader since: +// - No iteration is required. +// - High-order bits of reloc target RVAs are assumed to be handled elsewhere, +// so only low-order bits need to be written. +class RelocWriterWin32 : public ReferenceWriter { + public: + RelocWriterWin32(uint16_t reloc_type, + MutableBufferView image, + BufferRegion reloc_region, + const std::vector& reloc_block_offsets, + const AddressTranslator& translator); + ~RelocWriterWin32() override; + + // ReferenceWriter: + void PutNext(Reference ref) override; + + private: + const uint16_t reloc_type_; + MutableBufferView image_; + BufferRegion reloc_region_; + const std::vector& reloc_block_offsets_; + AddressTranslator::OffsetToRvaCache target_offset_to_rva_; +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_RELOC_UTILS_H_ diff --git a/reloc_utils_unittest.cc b/reloc_utils_unittest.cc new file mode 100644 index 0000000..e75264c --- /dev/null +++ b/reloc_utils_unittest.cc @@ -0,0 +1,273 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/reloc_utils.h" + +#include + +#include +#include +#include +#include +#include + +#include "base/logging.h" +#include "base/numerics/safe_conversions.h" +#include "base/test/gtest_util.h" +#include "components/zucchini/address_translator.h" +#include "components/zucchini/algorithm.h" +#include "components/zucchini/image_utils.h" +#include "components/zucchini/test_utils.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +namespace { + +// Returns a vector that's the contatenation of two vectors of the same type. +// Elements are copied by value. +template +std::vector Cat(const std::vector& a, const std::vector& b) { + std::vector ret(a); + ret.insert(ret.end(), b.begin(), b.end()); + return ret; +} + +// Returns a subvector of a vector. Elements are copied by value. +template +std::vector Sub(const std::vector& a, size_t lo, size_t hi) { + return std::vector(a.begin() + lo, a.begin() + hi); +} + +} // namespace + +class RelocUtilsWin32Test : public testing::Test { + protected: + using Units = std::vector; + + RelocUtilsWin32Test() {} + + // Resets all tester data, calls RelocRvaReaderWin32::FindRelocBlocks(), and + // returns its results. + bool Initialize(const std::vector& image_raw, + BufferRegion reloc_region) { + image_ = BufferSource(image_raw.data(), image_raw.size()); + reloc_region_ = reloc_region; + return RelocRvaReaderWin32::FindRelocBlocks(image_, reloc_region_, + &reloc_block_offsets_); + } + + // Uses RelocRvaReaderWin32 to get all relocs, returned as Units. + Units EmitAll(offset_t lo, offset_t hi) { + RelocRvaReaderWin32 reader(image_, reloc_region_, reloc_block_offsets_, lo, + hi); + Units units; + for (auto unit = reader.GetNext(); unit.has_value(); + unit = reader.GetNext()) { + units.push_back(unit.value()); + } + return units; + } + + ConstBufferView image_; + BufferRegion reloc_region_; + std::vector reloc_block_offsets_; +}; + +TEST_F(RelocUtilsWin32Test, RvaReaderEmpty) { + { + std::vector image_raw = ParseHexString(""); + EXPECT_TRUE(Initialize(image_raw, {0U, 0U})); + EXPECT_EQ(std::vector(), reloc_block_offsets_); // Nothing. + EXPECT_EQ(Units(), EmitAll(0U, 0U)); + } + { + std::vector image_raw = ParseHexString("AA BB CC DD EE FF"); + EXPECT_TRUE(Initialize(image_raw, {2U, 0U})); + EXPECT_EQ(std::vector(), reloc_block_offsets_); // Nothing. + EXPECT_EQ(Units(), EmitAll(2U, 2U)); + } + { + std::vector image_raw = ParseHexString("00 C0 00 00 08 00 00 00"); + EXPECT_TRUE(Initialize(image_raw, {0U, image_raw.size()})); + EXPECT_EQ(std::vector({0U}), + reloc_block_offsets_); // Empty block. + EXPECT_EQ(Units(), EmitAll(0U, 8U)); + } +} + +TEST_F(RelocUtilsWin32Test, RvaReaderBad) { + std::string test_cases[] = { + "00 C0 00 00 07 00 00", // Header too small. + "00 C0 00 00 08 00 00", // Header too small, lies about size. + "00 C0 00 00 0A 00 00 00 66 31", // Odd number of units. + "00 C0 00 00 0C 00 00 00 66 31 88 31 FF", // Trailing data. + }; + for (const std::string& test_case : test_cases) { + std::vector image_raw = ParseHexString(test_case); + EXPECT_FALSE(Initialize(image_raw, {0U, image_raw.size()})); + } +} + +TEST_F(RelocUtilsWin32Test, RvaReaderSingle) { + // Block 0: All type 0x3: {0xC166, 0xC288, 0xC342, (padding) 0xCFFF}. + std::vector image_raw = ParseHexString( + "FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF " + "00 C0 00 00 10 00 00 00 66 31 88 32 42 33 FF 0F " + "FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF"); + constexpr offset_t kBlock0 = 16U; + Units exp0 = {{3, kBlock0 + 8U, 0xC166U}, + {3, kBlock0 + 10U, 0xC288U}, + {3, kBlock0 + 12U, 0xC342U}, + {0, kBlock0 + 14U, 0xCFFFU}}; + + EXPECT_TRUE(Initialize(image_raw, {16U, 16U})); + EXPECT_EQ(exp0, EmitAll(kBlock0, kBlock0 + 16U)); + EXPECT_EQ(Units(), EmitAll(kBlock0, kBlock0)); + EXPECT_EQ(Units(), EmitAll(kBlock0, kBlock0 + 8U)); + EXPECT_EQ(Units(), EmitAll(kBlock0, kBlock0 + 9U)); + EXPECT_EQ(Sub(exp0, 0, 1), EmitAll(kBlock0, kBlock0 + 10U)); + EXPECT_EQ(Sub(exp0, 0, 1), EmitAll(kBlock0 + 8U, kBlock0 + 10U)); + EXPECT_EQ(Units(), EmitAll(kBlock0 + 9U, kBlock0 + 10U)); + EXPECT_EQ(Sub(exp0, 0, 3), EmitAll(kBlock0, kBlock0 + 15U)); + EXPECT_EQ(Sub(exp0, 2, 3), EmitAll(kBlock0 + 11U, kBlock0 + 15U)); +} + +TEST_F(RelocUtilsWin32Test, RvaReaderMulti) { + // The sample image encodes 3 reloc blocks: + // Block 0: All type 0x3: {0xC166, 0xC288, 0xC344, (padding) 0xCFFF}. + // Block 1: All type 0x3: {0x12166, 0x12288}. + // Block 2: All type 0xA: {0x24000, 0x24010, 0x24020, 0x24028, 0x24A3C, + // 0x24170}. + std::vector image_raw = ParseHexString( + "FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF " + "00 C0 00 00 10 00 00 00 66 31 88 32 42 33 FF 0F " + "00 20 01 00 0C 00 00 00 66 31 88 32 " + "00 40 02 00 14 00 00 00 00 A0 10 A0 20 A0 28 A0 3C A0 70 A1 " + "FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF"); + offset_t image_size = base::checked_cast(image_raw.size()); + constexpr offset_t kBlock0 = 16U; + constexpr offset_t kBlock1 = kBlock0 + 16U; + constexpr offset_t kBlock2 = kBlock1 + 12U; + constexpr offset_t kBlockEnd = kBlock2 + 20U; + Units exp0 = {{3, kBlock0 + 8U, 0xC166U}, + {3, kBlock0 + 10U, 0xC288U}, + {3, kBlock0 + 12U, 0xC342U}, + {0, kBlock0 + 14U, 0xCFFFU}}; + Units exp1 = {{3, kBlock0 + 24U, 0x12166U}, {3, kBlock0 + 26U, 0x12288U}}; + Units exp2 = {{10, kBlock0 + 36U, 0x24000U}, {10, kBlock0 + 38U, 0x24010U}, + {10, kBlock0 + 40U, 0x24020U}, {10, kBlock0 + 42U, 0x24028U}, + {10, kBlock0 + 44U, 0x2403CU}, {10, kBlock0 + 46U, 0x24170U}}; + + EXPECT_TRUE(Initialize(image_raw, {kBlock0, kBlockEnd - kBlock0})); + EXPECT_EQ(std::vector({kBlock0, kBlock1, kBlock2}), + reloc_block_offsets_); + + // Everything. + EXPECT_EQ(Cat(Cat(exp0, exp1), exp2), EmitAll(kBlock0, kBlockEnd)); + EXPECT_EQ(Cat(Cat(exp0, exp1), exp2), EmitAll(0, image_size)); + // Entire blocks. + EXPECT_EQ(exp0, EmitAll(kBlock0, kBlock1)); + EXPECT_EQ(exp1, EmitAll(kBlock1, kBlock2)); + EXPECT_EQ(exp2, EmitAll(kBlock2, kBlockEnd)); + EXPECT_EQ(Units(), EmitAll(0, kBlock0)); + EXPECT_EQ(Units(), EmitAll(kBlockEnd, image_size)); + // Within blocks, clipped at boundaries. + EXPECT_EQ(exp0, EmitAll(kBlock0 + 5U, kBlock1)); + EXPECT_EQ(exp0, EmitAll(kBlock0 + 8U, kBlock1)); + EXPECT_EQ(Sub(exp0, 1, 4), EmitAll(kBlock0 + 9U, kBlock1)); + EXPECT_EQ(Sub(exp0, 0, 3), EmitAll(kBlock0, kBlock0 + 15U)); + EXPECT_EQ(Sub(exp0, 0, 3), EmitAll(kBlock0, kBlock0 + 14U)); + EXPECT_EQ(Sub(exp0, 0, 1), EmitAll(kBlock0 + 8U, kBlock0 + 10U)); + EXPECT_EQ(Sub(exp1, 1, 2), EmitAll(kBlock1 + 10U, kBlock1 + 12U)); + EXPECT_EQ(Sub(exp2, 2, 4), EmitAll(kBlock2 + 12U, kBlock2 + 16U)); + EXPECT_EQ(Units(), EmitAll(kBlock0, kBlock0)); + EXPECT_EQ(Units(), EmitAll(kBlock0, kBlock0 + 8U)); + EXPECT_EQ(Units(), EmitAll(kBlock2 + 10U, kBlock2 + 11U)); + EXPECT_EQ(Units(), EmitAll(kBlock2 + 11U, kBlock2 + 12U)); + // Across blocks. + EXPECT_EQ(Cat(Cat(exp0, exp1), exp2), EmitAll(kBlock0 - 5U, kBlockEnd)); + EXPECT_EQ(Cat(Cat(exp0, exp1), exp2), EmitAll(kBlock0 + 6U, kBlockEnd)); + EXPECT_EQ(Cat(Cat(exp0, exp1), Sub(exp2, 0, 5)), + EmitAll(kBlock0 + 6U, kBlock2 + 18U)); + EXPECT_EQ(Cat(Sub(exp0, 2, 4), Sub(exp1, 0, 1)), + EmitAll(kBlock0 + 12U, kBlock1 + 10U)); + EXPECT_EQ(Cat(Sub(exp0, 2, 4), Sub(exp1, 0, 1)), + EmitAll(kBlock0 + 11U, kBlock1 + 10U)); + EXPECT_EQ(Cat(Sub(exp0, 2, 4), Sub(exp1, 0, 1)), + EmitAll(kBlock0 + 12U, kBlock1 + 11U)); + EXPECT_EQ(Sub(exp1, 1, 2), EmitAll(kBlock1 + 10U, kBlock2 + 5U)); + EXPECT_EQ(Cat(Sub(exp1, 1, 2), exp2), EmitAll(kBlock1 + 10U, kBlockEnd + 5)); + EXPECT_EQ(Units(), EmitAll(kBlock0 + 15, kBlock1 + 9)); +} + +TEST_F(RelocUtilsWin32Test, ReadWrite) { + // Set up mock image: Size = 0x3000, .reloc at 0x600. RVA is 0x40000 + offset. + constexpr rva_t kBaseRva = 0x40000; + std::vector image_data(0x3000, 0xFF); + // 4 x86 relocs (xx 3x), 3 x64 relocs (xx Ax), 1 padding (xx 0X). + std::vector reloc_data = ParseHexString( + "00 10 04 00 10 00 00 00 C0 32 18 A3 F8 A7 FF 0F " + "00 20 04 00 10 00 00 00 80 A0 65 31 F8 37 BC 3A"); + reloc_region_ = {0x600, reloc_data.size()}; + std::copy(reloc_data.begin(), reloc_data.end(), + image_data.begin() + reloc_region_.lo()); + image_ = {image_data.data(), image_data.size()}; + offset_t image_size = base::checked_cast(image_.size()); + + AddressTranslator translator; + translator.Initialize({{0, image_size, kBaseRva, image_size}}); + + // Precompute |reloc_block_offsets_|. + EXPECT_TRUE(RelocRvaReaderWin32::FindRelocBlocks(image_, reloc_region_, + &reloc_block_offsets_)); + EXPECT_EQ(std::vector({0x600U, 0x610U}), reloc_block_offsets_); + + // Focus on x86. + constexpr uint16_t kRelocTypeX86 = 3; + constexpr offset_t kVAWidthX86 = 4; + + // Make RelocRvaReaderWin32. + RelocRvaReaderWin32 reloc_rva_reader(image_, reloc_region_, + reloc_block_offsets_, 0, image_size); + offset_t offset_bound = image_size - kVAWidthX86 + 1; + + // Make RelocReaderWin32 that wraps |reloc_rva_reader|. + auto reader = std::make_unique( + std::move(reloc_rva_reader), kRelocTypeX86, offset_bound, translator); + + // Read all references and check. + std::vector refs; + for (base::Optional ref = reader->GetNext(); ref.has_value(); + ref = reader->GetNext()) { + refs.push_back(ref.value()); + } + std::vector exp_refs{ + {0x608, 0x12C0}, {0x61A, 0x2165}, {0x61C, 0x27F8}, {0x61E, 0x2ABC}}; + EXPECT_EQ(exp_refs, refs); + + // Write reference, extract bytes and check. + MutableBufferView mutable_image(&image_data[0], image_data.size()); + auto writer = std::make_unique( + kRelocTypeX86, mutable_image, reloc_region_, reloc_block_offsets_, + translator); + + writer->PutNext({0x608, 0x1F83}); + std::vector exp_reloc_data1 = ParseHexString( + "00 10 04 00 10 00 00 00 83 3F 18 A3 F8 A7 FF 0F " + "00 20 04 00 10 00 00 00 80 A0 65 31 F8 37 BC 3A"); + EXPECT_EQ(exp_reloc_data1, + Sub(image_data, reloc_region_.lo(), reloc_region_.hi())); + + EXPECT_DCHECK_DEATH(writer->PutNext({0x608, 0x2000})); + + writer->PutNext({0x61C, 0x2950}); + std::vector exp_reloc_data2 = ParseHexString( + "00 10 04 00 10 00 00 00 83 3F 18 A3 F8 A7 FF 0F " + "00 20 04 00 10 00 00 00 80 A0 65 31 50 39 BC 3A"); + EXPECT_EQ(exp_reloc_data2, + Sub(image_data, reloc_region_.lo(), reloc_region_.hi())); +} + +} // namespace zucchini diff --git a/suffix_array.h b/suffix_array.h new file mode 100644 index 0000000..ea49a43 --- /dev/null +++ b/suffix_array.h @@ -0,0 +1,475 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_SUFFIX_ARRAY_H_ +#define COMPONENTS_ZUCCHINI_SUFFIX_ARRAY_H_ + +#include +#include +#include +#include + +#include "base/logging.h" +#include "base/macros.h" + +namespace zucchini { + +// A functor class that implements the naive suffix sorting algorithm that uses +// std::sort with lexicographical compare. This is only meant as reference of +// the interface. +class NaiveSuffixSort { + public: + // Type requirements: + // |InputRng| is an input random access range. + // |KeyType| is an unsigned integer type. + // |SAIt| is a random access iterator with mutable references. + template + // |str| is the input string on which suffix sort is applied. + // Characters found in |str| must be in the range [0, |key_bound|) + // |suffix_array| is the beginning of the destination range, which is at least + // as large as |str|. + void operator()(const InputRng& str, + KeyType key_bound, + SAIt suffix_array) const { + using size_type = typename SAIt::value_type; + + size_type n = static_cast(std::end(str) - std::begin(str)); + + // |suffix_array| is first filled with ordered indices of |str|. + // Those indices are then sorted with lexicographical comparisons in |str|. + std::iota(suffix_array, suffix_array + n, 0); + std::sort(suffix_array, suffix_array + n, [&str](size_type i, size_type j) { + return std::lexicographical_compare(std::begin(str) + i, std::end(str), + std::begin(str) + j, std::end(str)); + }); + } +}; + +// A functor class that implements suffix array induced sorting (SA-IS) +// algorithm with linear time and memory complexity, +// see http://ieeexplore.ieee.org/abstract/document/5582081/ +class InducedSuffixSort { + public: + // Type requirements: + // |InputRng| is an input random access range. + // |KeyType| is an unsigned integer type. + // |SAIt| is a random access iterator with mutable values. + template + // |str| is the input string on which suffix sort is applied. + // Characters found in |str| must be in the range [0, |key_bound|) + // |suffix_array| is the beginning of the destination range, which is at least + // as large as |str|. + void operator()(const InputRng& str, + KeyType key_bound, + SAIt suffix_array) const { + using value_type = typename InputRng::value_type; + using size_type = typename SAIt::value_type; + + static_assert(std::is_unsigned::value, + "SA-IS only supports input string with unsigned values"); + static_assert(std::is_unsigned::value, "KeyType must be unsigned"); + + size_type n = static_cast(std::end(str) - std::begin(str)); + + Implementation::SuffixSort(std::begin(str), n, + key_bound, suffix_array); + } + + // Given string S of length n. We assume S is terminated by a unique sentinel + // $, which is considered as the smallest character. This sentinel does not + // exist in memory and is only treated implicitly, hence |n| does not count + // the sentinel in this implementation. We denote suf(S,i) the suffix formed + // by S[i..n). + + // A suffix suf(S,i) is said to be S-type or L-type, if suf(S,i) < suf(S,i+1) + // or suf(S,i) > suf(S,i+1), respectively. + enum SLType : bool { SType, LType }; + + // A character S[i] is said to be S-type or L-type if the suffix suf(S,i) is + // S-type or L-type, respectively. + + // A character S[i] is called LMS (leftmost S-type), if S[i] is S-type and + // S[i-1] is L-type. A suffix suf(S,i) is called LMS, if S[i] is an LMS + // character. + + // A substring S[i..j) is an LMS-substring if + // (1) S[i] is LMS, S[j] is LMS or the sentinel $, and S[i..j) has no other + // LMS characters, or + // (2) S[i..j) is the sentinel $. + + template + struct Implementation { + static_assert(std::is_unsigned::value, + "SizeType must be unsigned"); + static_assert(std::is_unsigned::value, "KeyType must be unsigned"); + using size_type = SizeType; + using key_type = KeyType; + + using iterator = typename std::vector::iterator; + using const_iterator = typename std::vector::const_iterator; + + // Partition every suffix based on SL-type. Returns the number of LMS + // suffixes. + template + static size_type BuildSLPartition( + StrIt str, + size_type length, + key_type key_bound, + std::vector::reverse_iterator sl_partition_it) { + // We will count LMS suffixes (S to L-type or last S-type). + size_type lms_count = 0; + + // |previous_type| is initialized to L-type to avoid counting an extra + // LMS suffix at the end + SLType previous_type = LType; + + // Initialized to dummy, impossible key. + key_type previous_key = key_bound; + + // We're travelling backward to determine the partition, + // as if we prepend one character at a time to the string, ex: + // b$ is L-type because b > $. + // ab$ is S-type because a < b, implying ab$ < b$. + // bab$ is L-type because b > a, implying bab$ > ab$. + // bbab$ is L-type, because bab$ was also L-type, implying bbab$ > bab$. + for (auto str_it = std::reverse_iterator(str + length); + str_it != std::reverse_iterator(str); + ++str_it, ++sl_partition_it) { + key_type current_key = *str_it; + + if (current_key > previous_key || previous_key == key_bound) { + // S[i] > S[i + 1] or S[i] is last character. + if (previous_type == SType) + // suf(S,i) is L-type and suf(S,i + 1) is S-type, therefore, + // suf(S,i+1) was a LMS suffix. + ++lms_count; + + previous_type = LType; // For next round. + } else if (current_key < previous_key) { + // S[i] < S[i + 1] + previous_type = SType; // For next round. + } + // Else, S[i] == S[i + 1]: + // The next character that differs determines the SL-type, + // so we reuse the last seen type. + + *sl_partition_it = previous_type; + previous_key = current_key; // For next round. + } + + return lms_count; + } + + // Find indices of LMS suffixes and write result to |lms_indices|. + static void FindLmsSuffixes(const std::vector& sl_partition, + iterator lms_indices) { + // |previous_type| is initialized to S-type to avoid counting an extra + // LMS suffix at the beginning + SLType previous_type = SType; + for (size_type i = 0; i < sl_partition.size(); ++i) { + if (sl_partition[i] == SType && previous_type == LType) + *lms_indices++ = i; + previous_type = sl_partition[i]; + } + } + + template + static std::vector MakeBucketCount(StrIt str, + size_type length, + key_type key_bound) { + // Occurrence of every unique character is counted in |buckets| + std::vector buckets(static_cast(key_bound)); + + for (auto it = str; it != str + length; ++it) + ++buckets[*it]; + return buckets; + } + + // Apply induced sort from |lms_indices| to |suffix_array| associated with + // the string |str|. + template + static void InducedSort(StrIt str, + size_type length, + const std::vector& sl_partition, + const std::vector& lms_indices, + const std::vector& buckets, + SAIt suffix_array) { + // All indices are first marked as unset with the illegal value |length|. + std::fill(suffix_array, suffix_array + length, length); + + // Used to mark bucket boundaries (head or end) as indices in str. + DCHECK(!buckets.empty()); + std::vector bucket_bounds(buckets.size()); + + // Step 1: Assign indices for LMS suffixes, populating the end of + // respective buckets but keeping relative order. + + // Find the end of each bucket and write it to |bucket_bounds|. + std::partial_sum(buckets.begin(), buckets.end(), bucket_bounds.begin()); + + // Process each |lms_indices| backward, and assign them to the end of + // their respective buckets, so relative order is preserved. + for (auto it = lms_indices.crbegin(); it != lms_indices.crend(); ++it) { + key_type key = str[*it]; + suffix_array[--bucket_bounds[key]] = *it; + } + + // Step 2 + // Scan forward |suffix_array|; for each modified suf(S,i) for which + // suf(S,SA(i) - 1) is L-type, place suf(S,SA(i) - 1) to the current + // head of the corresponding bucket and forward the bucket head to the + // right. + + // Find the head of each bucket and write it to |bucket_bounds|. Since + // only LMS suffixes where inserted in |suffix_array| during Step 1, + // |bucket_bounds| does not contains the head of each bucket and needs to + // be updated. + bucket_bounds[0] = 0; + std::partial_sum(buckets.begin(), buckets.end() - 1, + bucket_bounds.begin() + 1); + + // From Step 1, the sentinel $, which we treat implicitly, would have + // been placed at the beginning of |suffix_array|, since $ is always + // considered as the smallest character. We then have to deal with the + // previous (last) suffix. + if (sl_partition[length - 1] == LType) { + key_type key = str[length - 1]; + suffix_array[bucket_bounds[key]++] = length - 1; + } + for (auto it = suffix_array; it != suffix_array + length; ++it) { + size_type suffix_index = *it; + + // While the original algorithm marks unset suffixes with -1, + // we found that marking them with |length| is also possible and more + // convenient because we are working with unsigned integers. + if (suffix_index != length && suffix_index > 0 && + sl_partition[--suffix_index] == LType) { + key_type key = str[suffix_index]; + suffix_array[bucket_bounds[key]++] = suffix_index; + } + } + + // Step 3 + // Scan backward |suffix_array|; for each modified suf(S, i) for which + // suf(S,SA(i) - 1) is S-type, place suf(S,SA(i) - 1) to the current + // end of the corresponding bucket and forward the bucket head to the + // left. + + // Find the end of each bucket and write it to |bucket_bounds|. Since + // only L-type suffixes where inserted in |suffix_array| during Step 2, + // |bucket_bounds| does not contain the end of each bucket and needs to + // be updated. + std::partial_sum(buckets.begin(), buckets.end(), bucket_bounds.begin()); + + for (auto it = std::reverse_iterator(suffix_array + length); + it != std::reverse_iterator(suffix_array); ++it) { + size_type suffix_index = *it; + if (suffix_index != length && suffix_index > 0 && + sl_partition[--suffix_index] == SType) { + key_type key = str[suffix_index]; + suffix_array[--bucket_bounds[key]] = suffix_index; + } + } + // Deals with the last suffix, because of the sentinel. + if (sl_partition[length - 1] == SType) { + key_type key = str[length - 1]; + suffix_array[--bucket_bounds[key]] = length - 1; + } + } + + // Given a string S starting at |str| with length |length|, an array + // starting at |substring_array| containing lexicographically ordered LMS + // terminated substring indices of S and an SL-Type partition |sl_partition| + // of S, assigns a unique label to every unique LMS substring. The sorted + // labels for all LMS substrings are written to |lms_str|, while the indices + // of LMS suffixes are written to |lms_indices|. In addition, returns the + // total number of unique labels. + template + static size_type LabelLmsSubstrings(StrIt str, + size_type length, + const std::vector& sl_partition, + SAIt suffix_array, + iterator lms_indices, + iterator lms_str) { + // Labelling starts at 0. + size_type label = 0; + + // |previous_lms| is initialized to 0 to indicate it is unset. + // Note that suf(S,0) is never a LMS suffix. Substrings will be visited in + // lexicographical order. + size_type previous_lms = 0; + for (auto it = suffix_array; it != suffix_array + length; ++it) { + if (*it > 0 && sl_partition[*it] == SType && + sl_partition[*it - 1] == LType) { + // suf(S, *it) is a LMS suffix. + + size_type current_lms = *it; + if (previous_lms != 0) { + // There was a previous LMS suffix. Check if the current LMS + // substring is equal to the previous one. + SLType current_lms_type = SType; + SLType previous_lms_type = SType; + for (size_type k = 0;; ++k) { + // |current_lms_end| and |previous_lms_end| denote whether we have + // reached the end of the current and previous LMS substring, + // respectively + bool current_lms_end = false; + bool previous_lms_end = false; + + // Check for both previous and current substring ends. + // Note that it is more convenient to check if + // suf(S,current_lms + k) is an LMS suffix than to retrieve it + // from lms_indices. + if (current_lms + k >= length || + (current_lms_type == LType && + sl_partition[current_lms + k] == SType)) { + current_lms_end = true; + } + if (previous_lms + k >= length || + (previous_lms_type == LType && + sl_partition[previous_lms + k] == SType)) { + previous_lms_end = true; + } + + if (current_lms_end && previous_lms_end) { + break; // Previous and current substrings are identical. + } else if (current_lms_end != previous_lms_end || + str[current_lms + k] != str[previous_lms + k]) { + // Previous and current substrings differ, a new label is used. + ++label; + break; + } + + current_lms_type = sl_partition[current_lms + k]; + previous_lms_type = sl_partition[previous_lms + k]; + } + } + *lms_indices++ = *it; + *lms_str++ = label; + previous_lms = current_lms; + } + } + + return label + 1; + } + + // Implementation of the SA-IS algorithm. |str| must be a random access + // iterator pointing at the beginning of S with length |length|. The result + // is writtend in |suffix_array|, a random access iterator. + template + static void SuffixSort(StrIt str, + size_type length, + key_type key_bound, + SAIt suffix_array) { + if (length == 1) + *suffix_array = 0; + if (length < 2) + return; + + std::vector sl_partition(length); + size_type lms_count = + BuildSLPartition(str, length, key_bound, sl_partition.rbegin()); + std::vector lms_indices(lms_count); + FindLmsSuffixes(sl_partition, lms_indices.begin()); + std::vector buckets = MakeBucketCount(str, length, key_bound); + + if (lms_indices.size() > 1) { + // Given |lms_indices| in the same order they appear in |str|, induce + // LMS substrings relative order and write result to |suffix_array|. + InducedSort(str, length, sl_partition, lms_indices, buckets, + suffix_array); + std::vector lms_str(lms_indices.size()); + + // Given LMS substrings in relative order found in |suffix_array|, + // map LMS substrings to unique labels to form a new string, |lms_str|. + size_type label_count = + LabelLmsSubstrings(str, length, sl_partition, suffix_array, + lms_indices.begin(), lms_str.begin()); + + if (label_count < lms_str.size()) { + // Reorder |lms_str| to have LMS suffixes in the same order they + // appear in |str|. + for (size_type i = 0; i < lms_indices.size(); ++i) + suffix_array[lms_indices[i]] = lms_str[i]; + + SLType previous_type = SType; + for (size_type i = 0, j = 0; i < sl_partition.size(); ++i) { + if (sl_partition[i] == SType && previous_type == LType) { + lms_str[j] = suffix_array[i]; + lms_indices[j++] = i; + } + previous_type = sl_partition[i]; + } + + // Recursively apply SuffixSort on |lms_str|, which is formed from + // labeled LMS suffixes in the same order they appear in |str|. + // Note that |KeyType| will be size_type because |lms_str| contains + // indices. |lms_str| is at most half the length of |str|. + Implementation::SuffixSort( + lms_str.begin(), static_cast(lms_str.size()), + label_count, suffix_array); + + // Map LMS labels back to indices in |str| and write result to + // |lms_indices|. We're using |suffix_array| as a temporary buffer. + for (size_type i = 0; i < lms_indices.size(); ++i) + suffix_array[i] = lms_indices[suffix_array[i]]; + std::copy_n(suffix_array, lms_indices.size(), lms_indices.begin()); + + // At this point, |lms_indices| contains sorted LMS suffixes of |str|. + } + } + // Given |lms_indices| where LMS suffixes are sorted, induce the full + // order of suffixes in |str|. + InducedSort(str, length, sl_partition, lms_indices, buckets, + suffix_array); + } + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(Implementation); + }; +}; + +// Generates a sorted suffix array for the input string |str| using the functor +// |Algorithm| which provides an interface equivalent to NaiveSuffixSort. +/// Characters found in |str| are assumed to be in range [0, |key_bound|). +// Returns the suffix array as a vector. +// |StrRng| is an input random access range. +// |KeyType| is an unsigned integer type. +template +std::vector MakeSuffixArray(const StrRng& str, + KeyType key_bound) { + Algorithm sort; + std::vector suffix_array(str.end() - str.begin()); + sort(str, key_bound, suffix_array.begin()); + return suffix_array; +} + +// Type requirements: +// |SARng| is an input random access range. +// |StrIt1| is a random access iterator. +// |StrIt2| is a forward iterator. +template +// Lexicographical lower bound using binary search for +// [|str2_first|, |str2_last|) in the suffix array |suffix_array| of a string +// starting at |str1_first|. This does not necessarily return the index of +// the longest matching substring. +auto SuffixLowerBound(const SARng& suffix_array, + StrIt1 str1_first, + StrIt2 str2_first, + StrIt2 str2_last) -> decltype(std::begin(suffix_array)) { + using size_type = typename SARng::value_type; + + size_t n = std::end(suffix_array) - std::begin(suffix_array); + auto it = std::lower_bound( + std::begin(suffix_array), std::end(suffix_array), str2_first, + [str1_first, str2_last, n](size_type a, StrIt2 b) { + return std::lexicographical_compare(str1_first + a, str1_first + n, b, + str2_last); + }); + return it; +} + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_SUFFIX_ARRAY_H_ diff --git a/suffix_array_unittest.cc b/suffix_array_unittest.cc new file mode 100644 index 0000000..c6f8b02 --- /dev/null +++ b/suffix_array_unittest.cc @@ -0,0 +1,331 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/suffix_array.h" + +#include +#include + +#include +#include +#include +#include + +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +namespace { + +using SLType = InducedSuffixSort::SLType; + +} // namespace + +using ustring = std::basic_string; + +constexpr uint16_t kNumChar = 256; + +ustring MakeUnsignedString(const std::string& str) { + return {str.begin(), str.end()}; +} + +template +std::vector MakeVector(const std::initializer_list& ilist) { + return {ilist.begin(), ilist.end()}; +} + +void TestSlPartition(std::initializer_list expected_sl_partition, + std::initializer_list expected_lms_indices, + std::string str) { + using SaisImpl = InducedSuffixSort::Implementation; + + std::vector sl_partition(str.size()); + EXPECT_EQ(expected_lms_indices.size(), + SaisImpl::BuildSLPartition(str.begin(), str.size(), kNumChar, + sl_partition.rbegin())); + EXPECT_EQ(MakeVector(expected_sl_partition), sl_partition); + + std::vector lms_indices(expected_lms_indices.size()); + SaisImpl::FindLmsSuffixes(expected_sl_partition, lms_indices.begin()); + EXPECT_EQ(MakeVector(expected_lms_indices), lms_indices); +} + +TEST(InducedSuffixSortTest, BuildSLPartition) { + TestSlPartition({}, {}, ""); + TestSlPartition( + { + SLType::LType, + }, + {}, "a"); + TestSlPartition( + { + SLType::LType, SLType::LType, + }, + {}, "ba"); + TestSlPartition( + { + SLType::SType, SLType::LType, + }, + {}, "ab"); + TestSlPartition( + { + SLType::SType, SLType::SType, SLType::LType, + }, + {}, "aab"); + TestSlPartition( + { + SLType::LType, SLType::LType, SLType::LType, + }, + {}, "bba"); + TestSlPartition( + { + SLType::LType, SLType::SType, SLType::LType, + }, + {1}, "bab"); + TestSlPartition( + { + SLType::LType, SLType::SType, SLType::SType, SLType::LType, + }, + {1}, "baab"); + + TestSlPartition( + { + SLType::LType, // zucchini + SLType::LType, // ucchini + SLType::SType, // cchini + SLType::SType, // chini + SLType::SType, // hini + SLType::SType, // ini + SLType::LType, // ni + SLType::LType, // i + }, + {2}, "zucchini"); +} + +std::vector BucketCount(const std::initializer_list str, + uint16_t max_key) { + using SaisImpl = InducedSuffixSort::Implementation; + return SaisImpl::MakeBucketCount(str.begin(), str.size(), max_key); +} + +TEST(InducedSuffixSortTest, BucketCount) { + using vec = std::vector; + + EXPECT_EQ(vec({0, 0, 0, 0}), BucketCount({}, 4)); + EXPECT_EQ(vec({1, 0, 0, 0}), BucketCount({0}, 4)); + EXPECT_EQ(vec({0, 2, 0, 1}), BucketCount({1, 1, 3}, 4)); +} + +std::vector InducedSortSubstring(ustring str) { + using SaisImpl = InducedSuffixSort::Implementation; + std::vector sl_partition(str.size()); + size_t lms_count = SaisImpl::BuildSLPartition( + str.begin(), str.size(), kNumChar, sl_partition.rbegin()); + std::vector lms_indices(lms_count); + SaisImpl::FindLmsSuffixes(sl_partition, lms_indices.begin()); + auto buckets = SaisImpl::MakeBucketCount(str.begin(), str.size(), kNumChar); + + std::vector suffix_array(str.size()); + SaisImpl::InducedSort(str, str.size(), sl_partition, lms_indices, buckets, + suffix_array.begin()); + + return suffix_array; +} + +TEST(InducedSuffixSortTest, InducedSortSubstring) { + using vec = std::vector; + + auto us = MakeUnsignedString; + + // L; a$ + EXPECT_EQ(vec({0}), InducedSortSubstring(us("a"))); + + // SL; ab$, b$ + EXPECT_EQ(vec({0, 1}), InducedSortSubstring(us("ab"))); + + // LL; a$, ba$ + EXPECT_EQ(vec({1, 0}), InducedSortSubstring(us("ba"))); + + // SLL; a$, aba$, ba$ + EXPECT_EQ(vec({2, 0, 1}), InducedSortSubstring(us("aba"))); + + // LSL; ab$, b$, ba + EXPECT_EQ(vec({1, 2, 0}), InducedSortSubstring(us("bab"))); + + // SSL; aab$, ab$, b$ + EXPECT_EQ(vec({0, 1, 2}), InducedSortSubstring(us("aab"))); + + // LSSL; aab$, ab$, b$, ba + EXPECT_EQ(vec({1, 2, 3, 0}), InducedSortSubstring(us("baab"))); +} + +template +void TestSuffixSort(ustring test_str) { + std::vector suffix_array = + MakeSuffixArray(test_str, kNumChar); + EXPECT_EQ(test_str.size(), suffix_array.size()); + + // Expect that I[] is a permutation of [0, len]. + std::vector sorted_suffix(suffix_array.begin(), suffix_array.end()); + std::sort(sorted_suffix.begin(), sorted_suffix.end()); + for (size_t i = 0; i < test_str.size(); ++i) + EXPECT_EQ(i, sorted_suffix[i]); + + // Expect that all suffixes are strictly ordered. + auto end = test_str.end(); + for (size_t i = 1; i < test_str.size(); ++i) { + auto suf1 = test_str.begin() + suffix_array[i - 1]; + auto suf2 = test_str.begin() + suffix_array[i]; + bool is_less = std::lexicographical_compare(suf1, end, suf2, end); + EXPECT_TRUE(is_less); + } +} + +constexpr const char* test_strs[] = { + "", + "a", + "aa", + "za", + "CACAO", + "aaaaa", + "banana", + "tobeornottobe", + "The quick brown fox jumps over the lazy dog.", + "elephantelephantelephantelephantelephant", + "walawalawashington", + "-------------------------", + "011010011001011010010110011010010", + "3141592653589793238462643383279502884197169399375105", + "\xFF\xFE\xFF\xFE\xFD\x80\x30\x31\x32\x80\x30\xFF\x01\xAB\xCD", + "abccbaabccbaabccbaabccbaabccbaabccbaabccbaabccba", + "0123456789876543210", + "9876543210123456789", + "aababcabcdabcdeabcdefabcdefg", + "asdhklgalksdjghalksdjghalksdjgh", +}; + +TEST(SuffixSortTest, NaiveSuffixSort) { + for (const std::string& test_str : test_strs) { + TestSuffixSort(MakeUnsignedString(test_str)); + } +} + +TEST(SuffixSortTest, InducedSuffixSortSort) { + for (const std::string& test_str : test_strs) { + TestSuffixSort(MakeUnsignedString(test_str)); + } +} + +// Test with sequence that has every character. +TEST(SuffixSortTest, AllChar) { + std::vector all_char(kNumChar); + std::iota(all_char.begin(), all_char.end(), 0); + + { + std::vector suffix_array = + MakeSuffixArray(all_char, kNumChar); + for (size_t i = 0; i < kNumChar; ++i) + EXPECT_EQ(i, suffix_array[i]); + } + + std::vector all_char_reverse(all_char.rbegin(), + all_char.rend()); + { + std::vector suffix_array = + MakeSuffixArray(all_char_reverse, kNumChar); + for (size_t i = 0; i < kNumChar; ++i) + EXPECT_EQ(kNumChar - i - 1, suffix_array[i]); + } +} + +void TestSuffixLowerBound(ustring base_str, ustring search_str) { + std::vector suffix_array = + MakeSuffixArray(base_str, kNumChar); + + auto pos = SuffixLowerBound(suffix_array, base_str.begin(), + search_str.begin(), search_str.end()); + + auto end = base_str.end(); + if (pos != suffix_array.begin()) { + // Previous suffix is less than |search_str|. + auto suf = base_str.begin() + pos[-1]; + bool is_less = std::lexicographical_compare(suf, end, search_str.begin(), + search_str.end()); + EXPECT_TRUE(is_less); + } + if (pos != suffix_array.end()) { + // Current suffix is greater of equal to |search_str|. + auto suf = base_str.begin() + *pos; + bool is_less = std::lexicographical_compare(suf, end, search_str.begin(), + search_str.end()); + EXPECT_FALSE(is_less); + } +} + +TEST(SuffixArrayTest, LowerBound) { + auto us = MakeUnsignedString; + + TestSuffixLowerBound(us(""), us("")); + TestSuffixLowerBound(us(""), us("a")); + TestSuffixLowerBound(us("b"), us("")); + TestSuffixLowerBound(us("b"), us("a")); + TestSuffixLowerBound(us("b"), us("c")); + TestSuffixLowerBound(us("b"), us("bc")); + TestSuffixLowerBound(us("aa"), us("a")); + TestSuffixLowerBound(us("aa"), us("aa")); + + ustring sentence = us("the quick brown fox jumps over the lazy dog."); + // Entire string: exact and unique. + TestSuffixLowerBound(sentence, sentence); + // Empty string: exact and non-unique. + TestSuffixLowerBound(sentence, us("")); + // Exact and unique suffix matches. + TestSuffixLowerBound(sentence, us(".")); + TestSuffixLowerBound(sentence, us("the lazy dog.")); + // Exact and unique non-suffix matches. + TestSuffixLowerBound(sentence, us("quick")); + TestSuffixLowerBound(sentence, us("the quick")); + // Partial and unique matches. + TestSuffixLowerBound(sentence, us("fox jumps with the hosps")); + TestSuffixLowerBound(sentence, us("xyz")); + // Exact and non-unique match: take lexicographical first. + TestSuffixLowerBound(sentence, us("the")); + TestSuffixLowerBound(sentence, us(" ")); + // Partial and non-unique match. + // query < "the l"... < "the q"... + TestSuffixLowerBound(sentence, us("the apple")); + // "the l"... < query < "the q"... + TestSuffixLowerBound(sentence, us("the opera")); + // "the l"... < "the q"... < query + TestSuffixLowerBound(sentence, us("the zebra")); + // Prefix match dominates suffix match (unique). + TestSuffixLowerBound(sentence, us("over quick brown fox")); + // Empty matchs. + TestSuffixLowerBound(sentence, us(",")); + TestSuffixLowerBound(sentence, us("1234")); + TestSuffixLowerBound(sentence, us("THE QUICK BROWN FOX")); + TestSuffixLowerBound(sentence, us("(the")); +} + +TEST(SuffixArrayTest, LowerBoundExact) { + for (const std::string& test_str : test_strs) { + ustring test_ustr = MakeUnsignedString(test_str); + + std::vector suffix_array = + MakeSuffixArray(test_ustr, kNumChar); + + for (size_t lo = 0; lo < test_str.size(); ++lo) { + for (size_t hi = lo + 1; hi <= test_str.size(); ++hi) { + ustring query(test_ustr.begin() + lo, test_ustr.begin() + hi); + ASSERT_EQ(query.size(), hi - lo); + auto pos = SuffixLowerBound(suffix_array, test_ustr.begin(), + query.begin(), query.end()); + EXPECT_TRUE( + std::equal(query.begin(), query.end(), test_ustr.begin() + *pos)); + } + } + } +} + +} // namespace zucchini diff --git a/target_pool.cc b/target_pool.cc new file mode 100644 index 0000000..0c1e0a5 --- /dev/null +++ b/target_pool.cc @@ -0,0 +1,84 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/target_pool.h" + +#include +#include +#include + +#include "base/logging.h" +#include "components/zucchini/algorithm.h" +#include "components/zucchini/equivalence_map.h" + +namespace zucchini { + +TargetPool::TargetPool() = default; + +TargetPool::TargetPool(std::vector&& targets) { + DCHECK(targets_.empty()); + DCHECK(std::is_sorted(targets.begin(), targets.end())); + targets_ = std::move(targets); +} + +TargetPool::TargetPool(TargetPool&&) = default; +TargetPool::TargetPool(const TargetPool&) = default; +TargetPool::~TargetPool() = default; + +void TargetPool::InsertTargets(const std::vector& targets) { + std::copy(targets.begin(), targets.end(), std::back_inserter(targets_)); + SortAndUniquify(&targets_); +} + +void TargetPool::InsertTargets(TargetSource* targets) { + for (auto target = targets->GetNext(); target.has_value(); + target = targets->GetNext()) { + targets_.push_back(*target); + } + // InsertTargets() can be called many times (number of reference types for the + // pool) in succession. Calling SortAndUniquify() every time enables deduping + // to occur more often. This prioritizes peak memory reduction over running + // time. + SortAndUniquify(&targets_); +} + +void TargetPool::InsertTargets(const std::vector& references) { + // This can be called many times, so it's better to let std::back_inserter() + // manage |targets_| resize, instead of manually reserving space. + std::transform(references.begin(), references.end(), + std::back_inserter(targets_), + [](const Reference& ref) { return ref.target; }); + SortAndUniquify(&targets_); +} + +void TargetPool::InsertTargets(ReferenceReader&& references) { + for (auto ref = references.GetNext(); ref.has_value(); + ref = references.GetNext()) { + targets_.push_back(ref->target); + } + SortAndUniquify(&targets_); +} + +key_t TargetPool::KeyForOffset(offset_t offset) const { + auto pos = std::lower_bound(targets_.begin(), targets_.end(), offset); + DCHECK(pos != targets_.end() && *pos == offset); + return static_cast(pos - targets_.begin()); +} + +key_t TargetPool::KeyForNearestOffset(offset_t offset) const { + auto pos = std::lower_bound(targets_.begin(), targets_.end(), offset); + if (pos != targets_.begin()) { + // If distances are equal, prefer lower key. + if (pos == targets_.end() || *pos - offset >= offset - pos[-1]) + --pos; + } + return static_cast(pos - targets_.begin()); +} + +void TargetPool::FilterAndProject(const OffsetMapper& offset_mapper) { + offset_mapper.ForwardProjectAll(&targets_); + std::sort(targets_.begin(), targets_.end()); +} + +} // namespace zucchini diff --git a/target_pool.h b/target_pool.h new file mode 100644 index 0000000..b881b1e --- /dev/null +++ b/target_pool.h @@ -0,0 +1,77 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_TARGET_POOL_H_ +#define COMPONENTS_ZUCCHINI_TARGET_POOL_H_ + +#include + +#include + +#include "components/zucchini/image_utils.h" +#include "components/zucchini/patch_reader.h" + +namespace zucchini { + +class OffsetMapper; +class TargetSource; + +// Ordered container of distinct targets that have the same semantics, along +// with a list of associated reference types, only used during patch generation. +class TargetPool { + public: + using const_iterator = std::vector::const_iterator; + + TargetPool(); + // Initializes the object with given sorted and unique |targets|. + explicit TargetPool(std::vector&& targets); + TargetPool(TargetPool&&); + TargetPool(const TargetPool&); + ~TargetPool(); + + // Insert new targets from various sources. These invalidate all previous key + // lookups. + // - From a list of targets, useful for adding extra targets in Zucchini-gen: + void InsertTargets(const std::vector& targets); + // - From TargetSource, useful for adding extra targets in Zucchini-apply: + void InsertTargets(TargetSource* targets); + // - From list of References, useful for listing targets in Zucchini-gen: + void InsertTargets(const std::vector& references); + // - From ReferenceReader, useful for listing targets in Zucchini-apply: + void InsertTargets(ReferenceReader&& references); + + // Adds |type| as a reference type associated with the pool of targets. + void AddType(TypeTag type) { types_.push_back(type); } + + // Returns a canonical key associated with a valid target at |offset|. + key_t KeyForOffset(offset_t offset) const; + + // Returns a canonical key associated with the target nearest to |offset|. + key_t KeyForNearestOffset(offset_t offset) const; + + // Returns the target for a |key|, which is assumed to be valid and held by + // this class. + offset_t OffsetForKey(key_t key) const { return targets_[key]; } + + // Uses |offset_mapper| to transform "old" |targets_| to "new" |targets_|, + // resulting in sorted and unique targets. + void FilterAndProject(const OffsetMapper& offset_mapper); + + // Accessors for testing. + const std::vector& targets() const { return targets_; } + const std::vector& types() const { return types_; } + + // Returns the number of targets. + size_t size() const { return targets_.size(); } + const_iterator begin() const { return targets_.cbegin(); } + const_iterator end() const { return targets_.cend(); } + + private: + std::vector types_; // Enumerates type_tag for this pool. + std::vector targets_; // Targets for pool in ascending order. +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_TARGET_POOL_H_ diff --git a/target_pool_unittest.cc b/target_pool_unittest.cc new file mode 100644 index 0000000..4c3efec --- /dev/null +++ b/target_pool_unittest.cc @@ -0,0 +1,64 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/target_pool.h" + +#include +#include +#include +#include + +#include "components/zucchini/image_utils.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +namespace { + +using OffsetVector = std::vector; + +} // namespace + +TEST(TargetPoolTest, InsertTargetsFromReferences) { + auto test_insert = [](std::vector&& references) -> OffsetVector { + TargetPool target_pool; + target_pool.InsertTargets(references); + // Return copy since |target_pool| goes out of scope. + return target_pool.targets(); + }; + + EXPECT_EQ(OffsetVector(), test_insert({})); + EXPECT_EQ(OffsetVector({0, 1}), test_insert({{0, 0}, {10, 1}})); + EXPECT_EQ(OffsetVector({0, 1}), test_insert({{0, 1}, {10, 0}})); + EXPECT_EQ(OffsetVector({0, 1, 2}), test_insert({{0, 1}, {10, 0}, {20, 2}})); + EXPECT_EQ(OffsetVector({0}), test_insert({{0, 0}, {10, 0}})); + EXPECT_EQ(OffsetVector({0, 1}), test_insert({{0, 0}, {10, 0}, {20, 1}})); +} + +TEST(TargetPoolTest, KeyOffset) { + auto test_key_offset = [](const std::string& nearest_offsets_key, + OffsetVector&& targets) { + TargetPool target_pool(std::move(targets)); + for (offset_t offset : target_pool.targets()) { + offset_t key = target_pool.KeyForOffset(offset); + EXPECT_LT(key, target_pool.size()); + EXPECT_EQ(offset, target_pool.OffsetForKey(key)); + } + for (offset_t offset = 0; offset < nearest_offsets_key.size(); ++offset) { + key_t key = target_pool.KeyForNearestOffset(offset); + EXPECT_EQ(key, static_cast(nearest_offsets_key[offset] - '0')); + } + }; + test_key_offset("0000000000000000", {}); + test_key_offset("0000000000000000", {0}); + test_key_offset("0000000000000000", {1}); + test_key_offset("0111111111111111", {0, 1}); + test_key_offset("0011111111111111", {0, 2}); + test_key_offset("0011111111111111", {1, 2}); + test_key_offset("0001111111111111", {1, 3}); + test_key_offset("0001112223334444", {1, 3, 7, 9, 13}); + test_key_offset("0000011112223333", {1, 7, 9, 13}); +} + +} // namespace zucchini diff --git a/targets_affinity.cc b/targets_affinity.cc new file mode 100644 index 0000000..11903a9 --- /dev/null +++ b/targets_affinity.cc @@ -0,0 +1,108 @@ +// Copyright 2016 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/targets_affinity.h" + +#include + +#include "base/logging.h" +#include "components/zucchini/equivalence_map.h" + +namespace zucchini { + +namespace { + +constexpr uint32_t kNoLabel = 0; +} + +TargetsAffinity::TargetsAffinity() = default; +TargetsAffinity::~TargetsAffinity() = default; + +void TargetsAffinity::InferFromSimilarities( + const EquivalenceMap& equivalences, + const std::vector& old_targets, + const std::vector& new_targets) { + forward_association_.assign(old_targets.size(), {}); + backward_association_.assign(new_targets.size(), {}); + + if (old_targets.empty() || new_targets.empty()) + return; + + key_t new_key = 0; + for (auto candidate : equivalences) { // Sorted by |dst_offset|. + DCHECK_GT(candidate.similarity, 0.0); + while (new_key < new_targets.size() && + new_targets[new_key] < candidate.eq.dst_offset) { + ++new_key; + } + + // Visit each new target covered by |candidate.eq| and find / update its + // associated old target. + for (; new_key < new_targets.size() && + new_targets[new_key] < candidate.eq.dst_end(); + ++new_key) { + if (backward_association_[new_key].affinity >= candidate.similarity) + continue; + + DCHECK_GE(new_targets[new_key], candidate.eq.dst_offset); + offset_t old_target = new_targets[new_key] - candidate.eq.dst_offset + + candidate.eq.src_offset; + auto old_it = + std::lower_bound(old_targets.begin(), old_targets.end(), old_target); + // If new target can be mapped via |candidate.eq| to an old target, then + // attempt to associate them. Multiple new targets can compete for the + // same old target. The heuristic here makes selections to maximize + // |candidate.similarity|, and if a tie occurs, minimize new target offset + // (by first-come, first-served). + if (old_it != old_targets.end() && *old_it == old_target) { + key_t old_key = static_cast(old_it - old_targets.begin()); + if (candidate.similarity > forward_association_[old_key].affinity) { + // Reset other associations. + if (forward_association_[old_key].affinity > 0.0) + backward_association_[forward_association_[old_key].other] = {}; + if (backward_association_[new_key].affinity > 0.0) + forward_association_[backward_association_[new_key].other] = {}; + // Assign new association. + forward_association_[old_key] = {new_key, candidate.similarity}; + backward_association_[new_key] = {old_key, candidate.similarity}; + } + } + } + } +} + +uint32_t TargetsAffinity::AssignLabels(double min_affinity, + std::vector* old_labels, + std::vector* new_labels) { + old_labels->assign(forward_association_.size(), kNoLabel); + new_labels->assign(backward_association_.size(), kNoLabel); + + uint32_t label = kNoLabel + 1; + for (key_t old_key = 0; old_key < forward_association_.size(); ++old_key) { + Association association = forward_association_[old_key]; + if (association.affinity >= min_affinity) { + (*old_labels)[old_key] = label; + DCHECK_EQ(0U, (*new_labels)[association.other]); + (*new_labels)[association.other] = label; + ++label; + } + } + return label; +} + +double TargetsAffinity::AffinityBetween(key_t old_key, key_t new_key) const { + DCHECK_LT(old_key, forward_association_.size()); + DCHECK_LT(new_key, backward_association_.size()); + if (forward_association_[old_key].affinity > 0.0 && + forward_association_[old_key].other == new_key) { + DCHECK_EQ(backward_association_[new_key].other, old_key); + DCHECK_EQ(forward_association_[old_key].affinity, + backward_association_[new_key].affinity); + return forward_association_[old_key].affinity; + } + return -std::max(forward_association_[old_key].affinity, + backward_association_[new_key].affinity); +} + +} // namespace zucchini diff --git a/targets_affinity.h b/targets_affinity.h new file mode 100644 index 0000000..3a154e7 --- /dev/null +++ b/targets_affinity.h @@ -0,0 +1,74 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_TARGETS_AFFINITY_H_ +#define COMPONENTS_ZUCCHINI_TARGETS_AFFINITY_H_ + +#include +#include + +#include + +#include "base/macros.h" +#include "components/zucchini/image_utils.h" + +namespace zucchini { + +class EquivalenceMap; + +// Computes and stores affinity between old and new targets for a single target +// pool. This is only used during patch generation. +class TargetsAffinity { + public: + TargetsAffinity(); + ~TargetsAffinity(); + + // Infers affinity between |old_targets| and |new_targets| using similarities + // described by |equivalence_map|, and updates internal state for retrieval of + // affinity scores. Both |old_targets| and |new_targets| are targets in the + // same pool and are sorted in ascending order. + void InferFromSimilarities(const EquivalenceMap& equivalence_map, + const std::vector& old_targets, + const std::vector& new_targets); + + // Assigns labels to targets based on associations previously inferred, using + // |min_affinity| to reject associations with weak |affinity|. Label 0 is + // assigned to unassociated targets. Labels for old targets are written to + // |old_labels| and labels for new targets are written to |new_labels|. + // Returns the upper bound on assigned labels (>= 1 since 0 is used). + uint32_t AssignLabels(double min_affinity, + std::vector* old_labels, + std::vector* new_labels); + + // Returns the affinity score between targets identified by |old_key| and + // |new_keys|. Affinity > 0 means an association is likely, < 0 means + // incompatible association, and 0 means neither targets have been associated. + double AffinityBetween(key_t old_key, key_t new_key) const; + + private: + struct Association { + key_t other = 0; + double affinity = 0.0; + }; + + // Forward and backward associations between old and new targets. For each + // Association element, if |affinity == 0.0| then no association is defined + // (and |other| is meaningless|. Otherwise |affinity > 0.0|, and the + // association between |old_labels[old_key]| and |new_labels[new_key]| is + // represented by: + // forward_association_[old_key].other == new_key; + // backward_association_[new_key].other == old_key; + // forward_association_[old_key].affinity == + // backward_association_[new_key].affinity; + // The two lists contain the same information, but having both enables quick + // lookup, given |old_key| or |new_key|. + std::vector forward_association_; + std::vector backward_association_; + + DISALLOW_COPY_AND_ASSIGN(TargetsAffinity); +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_TARGETS_AFFINITY_H_ diff --git a/targets_affinity_unittest.cc b/targets_affinity_unittest.cc new file mode 100644 index 0000000..86182f9 --- /dev/null +++ b/targets_affinity_unittest.cc @@ -0,0 +1,131 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/targets_affinity.h" + +#include +#include + +#include + +#include "components/zucchini/equivalence_map.h" +#include "components/zucchini/image_utils.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +TEST(TargetsAffinityTest, AffinityBetween) { + using AffinityVector = std::vector>; + + // A common TargetsAffinity is used across independent tests. This is to + // reflect actual usage, in which common TargetsAffinity is used so that + // internal buffers get reused. + TargetsAffinity targets_affinity; + + auto test_affinity = [&targets_affinity]( + const EquivalenceMap& equivalence_map, + const std::vector& old_targets, + const std::vector& new_targets) { + targets_affinity.InferFromSimilarities(equivalence_map, old_targets, + new_targets); + AffinityVector affinities(old_targets.size()); + for (key_t i = 0; i < old_targets.size(); ++i) { + for (key_t j = 0; j < new_targets.size(); ++j) { + affinities[i].push_back(targets_affinity.AffinityBetween(i, j)); + } + } + return affinities; + }; + + EXPECT_EQ(AffinityVector({}), test_affinity(EquivalenceMap(), {}, {})); + EXPECT_EQ(AffinityVector({}), + test_affinity(EquivalenceMap({{{0, 0, 8}, 1.0}}), {}, {})); + + EXPECT_EQ(AffinityVector({{0.0, 0.0}, {0.0, 0.0}}), + test_affinity(EquivalenceMap(), {0, 10}, {0, 5})); + + EXPECT_EQ(AffinityVector({{1.0, -1.0}, {-1.0, 0.0}}), + test_affinity(EquivalenceMap({{{0, 0, 1}, 1.0}}), {0, 10}, {0, 5})); + + EXPECT_EQ(AffinityVector({{1.0, -1.0}, {-1.0, 0.0}}), + test_affinity(EquivalenceMap({{{0, 0, 2}, 1.0}}), {1, 10}, {1, 5})); + + EXPECT_EQ(AffinityVector({{0.0, 0.0}, {0.0, 0.0}}), + test_affinity(EquivalenceMap({{{0, 1, 2}, 1.0}}), {1, 10}, {1, 5})); + + EXPECT_EQ(AffinityVector({{1.0, -1.0}, {-1.0, 0.0}}), + test_affinity(EquivalenceMap({{{0, 1, 2}, 1.0}}), {0, 10}, {1, 5})); + + EXPECT_EQ(AffinityVector({{2.0, -2.0}, {-2.0, 0.0}}), + test_affinity(EquivalenceMap({{{0, 0, 1}, 2.0}}), {0, 10}, {0, 5})); + + EXPECT_EQ( + AffinityVector({{1.0, -1.0}, {-1.0, 1.0}, {-1.0, -1.0}}), + test_affinity(EquivalenceMap({{{0, 0, 6}, 1.0}}), {0, 5, 10}, {0, 5})); + + EXPECT_EQ(AffinityVector({{-2.0, 2.0}, {1.0, -2.0}, {-1.0, -2.0}}), + test_affinity(EquivalenceMap({{{5, 0, 2}, 1.0}, {{0, 5, 2}, 2.0}}), + {0, 5, 10}, {0, 5})); + + EXPECT_EQ(AffinityVector({{-2.0, 2.0}, {0.0, -2.0}, {0.0, -2.0}}), + test_affinity(EquivalenceMap({{{0, 0, 2}, 1.0}, {{0, 5, 2}, 2.0}}), + {0, 5, 10}, {0, 5})); +} + +TEST(TargetsAffinityTest, AssignLabels) { + // A common TargetsAffinity is used across independent tests. This is to + // reflect actual usage, in which common TargetsAffinity is used so that + // internal buffers get reused. + TargetsAffinity targets_affinity; + + auto test_labels_assignment = + [&targets_affinity](const EquivalenceMap& equivalence_map, + const std::vector& old_targets, + const std::vector& new_targets, + double min_affinity, + const std::vector& expected_old_labels, + const std::vector& expected_new_labels) { + targets_affinity.InferFromSimilarities(equivalence_map, old_targets, + new_targets); + std::vector old_labels; + std::vector new_labels; + size_t bound = targets_affinity.AssignLabels(min_affinity, &old_labels, + &new_labels); + EXPECT_EQ(expected_old_labels, old_labels); + EXPECT_EQ(expected_new_labels, new_labels); + return bound; + }; + + EXPECT_EQ(1U, test_labels_assignment(EquivalenceMap(), {}, {}, 1.0, {}, {})); + EXPECT_EQ(1U, test_labels_assignment(EquivalenceMap({{{0, 0, 8}, 1.0}}), {}, + {}, 1.0, {}, {})); + + EXPECT_EQ(1U, test_labels_assignment(EquivalenceMap(), {0, 10}, {0, 5}, 1.0, + {0, 0}, {0, 0})); + + EXPECT_EQ(2U, test_labels_assignment(EquivalenceMap({{{0, 0, 1}, 1.0}}), + {0, 10}, {0, 5}, 1.0, {1, 0}, {1, 0})); + EXPECT_EQ(1U, test_labels_assignment(EquivalenceMap({{{0, 0, 1}, 0.99}}), + {0, 10}, {0, 5}, 1.0, {0, 0}, {0, 0})); + EXPECT_EQ(1U, test_labels_assignment(EquivalenceMap({{{0, 0, 1}, 1.0}}), + {0, 10}, {0, 5}, 1.01, {0, 0}, {0, 0})); + EXPECT_EQ(1U, test_labels_assignment(EquivalenceMap({{{0, 0, 1}, 1.0}}), + {0, 10}, {0, 5}, 15.0, {0, 0}, {0, 0})); + EXPECT_EQ(2U, test_labels_assignment(EquivalenceMap({{{0, 0, 1}, 15.0}}), + {0, 10}, {0, 5}, 15.0, {1, 0}, {1, 0})); + + EXPECT_EQ(2U, test_labels_assignment(EquivalenceMap({{{0, 1, 2}, 1.0}}), + {0, 10}, {1, 5}, 1.0, {1, 0}, {1, 0})); + EXPECT_EQ( + 3U, test_labels_assignment(EquivalenceMap({{{0, 0, 6}, 1.0}}), {0, 5, 10}, + {0, 5}, 1.0, {1, 2, 0}, {1, 2})); + EXPECT_EQ(3U, test_labels_assignment( + EquivalenceMap({{{5, 0, 2}, 1.0}, {{0, 5, 2}, 2.0}}), + {0, 5, 10}, {0, 5}, 1.0, {1, 2, 0}, {2, 1})); + EXPECT_EQ(2U, test_labels_assignment( + EquivalenceMap({{{0, 0, 2}, 1.0}, {{0, 5, 2}, 2.0}}), + {0, 5, 10}, {0, 5}, 1.0, {1, 0, 0}, {0, 1})); +} + +} // namespace zucchini diff --git a/test_disassembler.cc b/test_disassembler.cc new file mode 100644 index 0000000..8d59a93 --- /dev/null +++ b/test_disassembler.cc @@ -0,0 +1,58 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/test_disassembler.h" + +#include "components/zucchini/test_reference_reader.h" + +namespace zucchini { + +TestDisassembler::TestDisassembler(const ReferenceTypeTraits& traits1, + const std::vector& refs1, + const ReferenceTypeTraits& traits2, + const std::vector& refs2, + const ReferenceTypeTraits& traits3, + const std::vector& refs3) + : traits_{traits1, traits2, traits3}, refs_{refs1, refs2, refs3} {} + +TestDisassembler::~TestDisassembler() = default; + +ExecutableType TestDisassembler::GetExeType() const { + return kExeTypeUnknown; +} + +std::string TestDisassembler::GetExeTypeString() const { + return "(Unknown)"; +} + +std::vector TestDisassembler::MakeReferenceGroups() const { + return { + {traits_[0], &TestDisassembler::MakeReadRefs1, + &TestDisassembler::MakeWriteRefs1}, + {traits_[1], &TestDisassembler::MakeReadRefs2, + &TestDisassembler::MakeWriteRefs2}, + {traits_[2], &TestDisassembler::MakeReadRefs3, + &TestDisassembler::MakeWriteRefs3}, + }; +} + +bool TestDisassembler::Parse(ConstBufferView image) { + return true; +} + +std::unique_ptr TestDisassembler::MakeReadRefs(int type) { + return std::make_unique(refs_[type]); +} + +std::unique_ptr TestDisassembler::MakeWriteRefs( + MutableBufferView image) { + class NoOpWriter : public ReferenceWriter { + public: + // ReferenceWriter: + void PutNext(Reference) override {} + }; + return std::make_unique(); +} + +} // namespace zucchini diff --git a/test_disassembler.h b/test_disassembler.h new file mode 100644 index 0000000..427ed91 --- /dev/null +++ b/test_disassembler.h @@ -0,0 +1,78 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_TEST_DISASSEMBLER_H_ +#define COMPONENTS_ZUCCHINI_TEST_DISASSEMBLER_H_ + +#include +#include +#include + +#include "base/macros.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/disassembler.h" +#include "components/zucchini/image_utils.h" + +namespace zucchini { + +// A trivial Disassembler that reads injected references of 3 different types. +// This is only meant for testing and is not a full implementation of a +// disassembler. Reading reference ignores bounds, and writing references does +// nothing. +class TestDisassembler : public Disassembler { + public: + TestDisassembler(const ReferenceTypeTraits& traits1, + const std::vector& refs1, + const ReferenceTypeTraits& traits2, + const std::vector& refs2, + const ReferenceTypeTraits& traits3, + const std::vector& refs3); + ~TestDisassembler() override; + + // Disassembler: + ExecutableType GetExeType() const override; + std::string GetExeTypeString() const override; + std::vector MakeReferenceGroups() const override; + + // Disassembler::ReaderFactory: + std::unique_ptr MakeReadRefs1(offset_t /*lower*/, + offset_t /*upper*/) { + return MakeReadRefs(0); + } + std::unique_ptr MakeReadRefs2(offset_t /*lower*/, + offset_t /*upper*/) { + return MakeReadRefs(1); + } + std::unique_ptr MakeReadRefs3(offset_t /*lower*/, + offset_t /*upper*/) { + return MakeReadRefs(2); + } + + // Disassembler::WriterFactory: + std::unique_ptr MakeWriteRefs1(MutableBufferView image) { + return MakeWriteRefs(image); + } + std::unique_ptr MakeWriteRefs2(MutableBufferView image) { + return MakeWriteRefs(image); + } + std::unique_ptr MakeWriteRefs3(MutableBufferView image) { + return MakeWriteRefs(image); + } + + private: + // Disassembler: + bool Parse(ConstBufferView image) override; + + std::unique_ptr MakeReadRefs(int type); + std::unique_ptr MakeWriteRefs(MutableBufferView image); + + ReferenceTypeTraits traits_[3]; + std::vector refs_[3]; + + DISALLOW_COPY_AND_ASSIGN(TestDisassembler); +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_TEST_DISASSEMBLER_H_ diff --git a/test_reference_reader.cc b/test_reference_reader.cc new file mode 100644 index 0000000..5517fa0 --- /dev/null +++ b/test_reference_reader.cc @@ -0,0 +1,20 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/test_reference_reader.h" + +namespace zucchini { + +TestReferenceReader::TestReferenceReader(const std::vector& refs) + : references_(refs) {} + +TestReferenceReader::~TestReferenceReader() = default; + +base::Optional TestReferenceReader::GetNext() { + if (index_ == references_.size()) + return base::nullopt; + return references_[index_++]; +} + +} // namespace zucchini diff --git a/test_reference_reader.h b/test_reference_reader.h new file mode 100644 index 0000000..afae188 --- /dev/null +++ b/test_reference_reader.h @@ -0,0 +1,32 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_TEST_REFERENCE_READER_H_ +#define COMPONENTS_ZUCCHINI_TEST_REFERENCE_READER_H_ + +#include + +#include + +#include "base/optional.h" +#include "components/zucchini/image_utils.h" + +namespace zucchini { + +// A trivial ReferenceReader that reads injected references. +class TestReferenceReader : public ReferenceReader { + public: + explicit TestReferenceReader(const std::vector& refs); + ~TestReferenceReader() override; + + base::Optional GetNext() override; + + private: + std::vector references_; + size_t index_ = 0; +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_TEST_REFERENCE_READER_H_ diff --git a/test_utils.cc b/test_utils.cc new file mode 100644 index 0000000..91c8a39 --- /dev/null +++ b/test_utils.cc @@ -0,0 +1,26 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/test_utils.h" + +#include +#include + +#include "base/logging.h" + +namespace zucchini { + +std::vector ParseHexString(const std::string& hex_string) { + std::vector ret; + std::istringstream iss(hex_string); + iss >> std::hex; + uint32_t temp = 0; // Cannot be uint8_t: istringstream treats this as char! + while (iss >> temp) { + CHECK_LE(temp, 0xFFU); + ret.push_back(temp); + } + return ret; +} + +} // namespace zucchini diff --git a/test_utils.h b/test_utils.h new file mode 100644 index 0000000..7ed735d --- /dev/null +++ b/test_utils.h @@ -0,0 +1,20 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_TEST_UTILS_H_ +#define COMPONENTS_ZUCCHINI_TEST_UTILS_H_ + +#include + +#include +#include + +namespace zucchini { + +// Parses space-separated list of byte hex values into list. +std::vector ParseHexString(const std::string& hex_string); + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_TEST_UTILS_H_ diff --git a/testdata/chrome64_1.exe.sha1 b/testdata/chrome64_1.exe.sha1 new file mode 100644 index 0000000..9b4f113 --- /dev/null +++ b/testdata/chrome64_1.exe.sha1 @@ -0,0 +1 @@ +4970ef6f342f6a0da9ae7a4ed462f93ef68f142c \ No newline at end of file diff --git a/testdata/chrome64_2.exe.sha1 b/testdata/chrome64_2.exe.sha1 new file mode 100644 index 0000000..e4a96a2 --- /dev/null +++ b/testdata/chrome64_2.exe.sha1 @@ -0,0 +1 @@ +c3a974589d50956a3c8c17572fee078b9276ad9b \ No newline at end of file diff --git a/testdata/patch_fuzzer/empty.zuc b/testdata/patch_fuzzer/empty.zuc new file mode 100644 index 0000000..34b2d66 Binary files /dev/null and b/testdata/patch_fuzzer/empty.zuc differ diff --git a/testdata/setup1.exe.sha1 b/testdata/setup1.exe.sha1 new file mode 100644 index 0000000..2304621 --- /dev/null +++ b/testdata/setup1.exe.sha1 @@ -0,0 +1 @@ +5d0e8fed8e9e091e184adb2e2e0e668def9cd2c5 \ No newline at end of file diff --git a/testdata/setup2.exe.sha1 b/testdata/setup2.exe.sha1 new file mode 100644 index 0000000..9fa4d0c --- /dev/null +++ b/testdata/setup2.exe.sha1 @@ -0,0 +1 @@ +12194273e8d509b6e81e4a6b63621081e1426028 \ No newline at end of file diff --git a/type_win_pe.h b/type_win_pe.h new file mode 100644 index 0000000..d385ca7 --- /dev/null +++ b/type_win_pe.h @@ -0,0 +1,188 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_TYPE_WIN_PE_H_ +#define COMPONENTS_ZUCCHINI_TYPE_WIN_PE_H_ + +#include +#include + +namespace zucchini { + +// Structures and constants taken from WINNT.h and following identical layout. +// This is used for parsing of Portable Executable (PE) file format. +namespace pe { +// Supported by MSVC, g++, and clang++. Ensures no gaps in packing. +#pragma pack(push, 1) + +// IMAGE_NUMBEROF_DIRECTORY_ENTRIES +constexpr size_t kImageNumberOfDirectoryEntries = 16; + +// IMAGE_FILE_BASE_RELOCATION_TABLE +constexpr size_t kIndexOfBaseRelocationTable = 5; + +constexpr uint32_t kImageScnMemExecute = 0x20000000; // IMAGE_SCN_MEM_EXECUTE +constexpr uint32_t kImageScnMemRead = 0x40000000; // IMAGE_SCN_MEM_READ + +// IMAGE_DOS_HEADER +struct ImageDOSHeader { + uint16_t e_magic; // 0x00 + uint16_t e_cblp; + uint16_t e_cp; + uint16_t e_crlc; + uint16_t e_cparhdr; + uint16_t e_minalloc; + uint16_t e_maxalloc; + uint16_t e_ss; + uint16_t e_sp; // 0x10 + uint16_t e_csum; + uint16_t e_ip; + uint16_t e_cs; + uint16_t e_lfarlc; + uint16_t e_ovno; + uint16_t e_res[4]; + uint16_t e_oemid; // 0x24 + uint16_t e_oeminfo; + uint16_t e_res2[10]; + uint32_t e_lfanew; // 0x3C +}; +static_assert(sizeof(ImageDOSHeader) == 0x40, + "DOS header size should be 0x40 bytes"); + +// IMAGE_SECTION_HEADER +struct ImageSectionHeader { + char name[8]; + uint32_t virtual_size; + uint32_t virtual_address; + uint32_t size_of_raw_data; + uint32_t file_offset_of_raw_data; + uint32_t pointer_to_relocations; // Always zero in an image. + uint32_t pointer_to_line_numbers; // Always zero in an image. + uint16_t number_of_relocations; // Always zero in an image. + uint16_t number_of_line_numbers; // Always zero in an image. + uint32_t characteristics; +}; +static_assert(sizeof(ImageSectionHeader) == 0x28, + "Section header size should be 0x28 bytes"); + +// IMAGE_DATA_DIRECTORY +struct ImageDataDirectory { + uint32_t virtual_address; + uint32_t size; +}; +static_assert(sizeof(ImageDataDirectory) == 0x08, + "Data directory size should be 0x08 bytes"); + +// IMAGE_FILE_HEADER +struct ImageFileHeader { + uint16_t machine; + uint16_t number_of_sections; + uint32_t time_date_stamp; + uint32_t pointer_to_symbol_table; + uint32_t number_of_symbols; + uint16_t size_of_optional_header; + uint16_t characteristics; +}; +static_assert(sizeof(ImageFileHeader) == 0x14, + "File header size should be 0x14 bytes"); + +// IMAGE_OPTIONAL_HEADER +struct ImageOptionalHeader { + uint16_t magic; // 0x00: 0x10B + uint8_t major_linker_version; + uint8_t minor_linker_version; + uint32_t size_of_code; + uint32_t size_of_initialized_data; + uint32_t size_of_uninitialized_data; + uint32_t address_of_entry_point; // 0x10 + uint32_t base_of_code; + uint32_t base_of_data; + + uint32_t image_base; + uint32_t section_alignment; // 0x20 + uint32_t file_alignment; + uint16_t major_operating_system_version; + uint16_t minor_operating_system_version; + uint16_t major_image_version; + uint16_t minor_image_version; + uint16_t major_subsystem_version; // 0x30 + uint16_t minor_subsystem_version; + uint32_t win32_version_value; + uint32_t size_of_image; + uint32_t size_of_headers; + uint32_t check_sum; // 0x40 + uint16_t subsystem; + uint16_t dll_characteristics; + uint32_t size_of_stack_reserve; + uint32_t size_of_stack_commit; + uint32_t size_of_heap_reserve; // 0x50 + uint32_t size_of_heap_commit; + uint32_t loader_flags; + uint32_t number_of_rva_and_sizes; + ImageDataDirectory data_directory[kImageNumberOfDirectoryEntries]; // 0x60 + /* 0xE0 */ +}; +static_assert(sizeof(ImageOptionalHeader) == 0xE0, + "Optional header (32) size should be 0xE0 bytes"); + +// IMAGE_OPTIONAL_HEADER64 +struct ImageOptionalHeader64 { + uint16_t magic; // 0x00: 0x20B + uint8_t major_linker_version; + uint8_t minor_linker_version; + uint32_t size_of_code; + uint32_t size_of_initialized_data; + uint32_t size_of_uninitialized_data; + uint32_t address_of_entry_point; // 0x10 + uint32_t base_of_code; + + uint64_t image_base; + uint32_t section_alignment; // 0x20 + uint32_t file_alignment; + uint16_t major_operating_system_version; + uint16_t minor_operating_system_version; + uint16_t major_image_version; + uint16_t minor_image_version; + uint16_t major_subsystem_version; // 0x30 + uint16_t minor_subsystem_version; + uint32_t win32_version_value; + uint32_t size_of_image; + uint32_t size_of_headers; + uint32_t check_sum; // 0x40 + uint16_t subsystem; + uint16_t dll_characteristics; + uint64_t size_of_stack_reserve; + uint64_t size_of_stack_commit; // 0x50 + uint64_t size_of_heap_reserve; + uint64_t size_of_heap_commit; // 0x60 + uint32_t loader_flags; + uint32_t number_of_rva_and_sizes; + ImageDataDirectory data_directory[kImageNumberOfDirectoryEntries]; // 0x70 + /* 0xF0 */ +}; +static_assert(sizeof(ImageOptionalHeader64) == 0xF0, + "Optional header (64) size should be 0xF0 bytes"); + +struct RelocHeader { + uint32_t rva_hi; + uint32_t size; +}; +static_assert(sizeof(RelocHeader) == 8, "RelocHeader size should be 8 bytes"); + +#pragma pack(pop) + +} // namespace pe + +// Constants and offsets gleaned from WINNT.h and various articles on the +// format of Windows PE executables. + +constexpr char const* kTextSectionName = ".text"; + +// Bitfield with characteristics usually associated with code sections. +const uint32_t kCodeCharacteristics = + pe::kImageScnMemExecute | pe::kImageScnMemRead; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_TYPE_WIN_PE_H_ diff --git a/typed_value.h b/typed_value.h new file mode 100644 index 0000000..868397c --- /dev/null +++ b/typed_value.h @@ -0,0 +1,57 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_TYPED_VALUE_H_ +#define COMPONENTS_ZUCCHINI_TYPED_VALUE_H_ + +#include + +namespace zucchini { + +// Strong typed values, with compare and convert functions for underlying data. +// Typically one would use strongly typed enums for this. However, for Zucchini, +// the number of bytes is not fixed, and must be represented as an integer for +// iteration. +// |Tag| is a type tag used to uniquely identify TypedValue. +// |T| is an integral type used to hold values. +// Example: +// struct Foo : TypedValue { +// using Foo::TypedValue::TypedValue; // inheriting constructor. +// }; +// Foo will be used to hold values of type |int|, but with a distinct type from +// any other TypedValue. +template +class TypedValue { + public: + constexpr TypedValue() = default; + explicit constexpr TypedValue(const T& value) : value_(value) {} + + explicit operator T() const { return value_; } + const T value() const { return value_; } + + friend bool operator==(const TypedValue& a, const TypedValue& b) { + return a.value_ == b.value_; + } + friend bool operator!=(const TypedValue& a, const TypedValue& b) { + return !(a == b); + } + friend bool operator<(const TypedValue& a, const TypedValue& b) { + return a.value_ < b.value_; + } + friend bool operator>(const TypedValue& a, const TypedValue& b) { + return b < a; + } + + private: + T value_ = {}; +}; + +template +std::ostream& operator<<(std::ostream& os, const TypedValue& tag) { + return os << tag.value(); +} + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_TYPED_VALUE_H_ diff --git a/typed_value_unittest.cc b/typed_value_unittest.cc new file mode 100644 index 0000000..bc0d4f1 --- /dev/null +++ b/typed_value_unittest.cc @@ -0,0 +1,40 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/typed_value.h" + +#include + +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +struct ValueA : TypedValue { + using ValueA::TypedValue::TypedValue; +}; + +struct ValueB : TypedValue { + using ValueB::TypedValue::TypedValue; +}; + +TEST(TypedIdTest, Value) { + EXPECT_EQ(42, ValueA(42).value()); + EXPECT_EQ(42, static_cast(ValueA(42))); // explicit cast +} + +TEST(TypedIdTest, Comparison) { + EXPECT_TRUE(ValueA(0) == ValueA(0)); + EXPECT_FALSE(ValueA(0) == ValueA(42)); + EXPECT_FALSE(ValueA(0) != ValueA(0)); + EXPECT_TRUE(ValueA(0) != ValueA(42)); +} + +TEST(TypedIdTest, StrongType) { + static_assert(!std::is_convertible::value, + "ValueA should not be convertible to ValueB"); + static_assert(!std::is_convertible::value, + "ValueB should not be convertible to ValueA"); +} + +} // namespace zucchini diff --git a/zucchini.h b/zucchini.h new file mode 100644 index 0000000..9100709 --- /dev/null +++ b/zucchini.h @@ -0,0 +1,54 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_ZUCCHINI_H_ +#define COMPONENTS_ZUCCHINI_ZUCCHINI_H_ + +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/patch_reader.h" +#include "components/zucchini/patch_writer.h" + +// Definitions, structures, and interfaces for the Zucchini library. + +namespace zucchini { + +namespace status { + +// Zucchini status code, which can also be used as process exit code. Therefore +// success is explicitly 0. +enum Code { + kStatusSuccess = 0, + kStatusInvalidParam = 1, + kStatusFileReadError = 2, + kStatusFileWriteError = 3, + kStatusPatchReadError = 4, + kStatusPatchWriteError = 5, + kStatusInvalidOldImage = 6, + kStatusInvalidNewImage = 7, + kStatusFatal = 8, +}; + +} // namespace status + +// Generates ensemble patch from |old_image| to |new_image|, and writes it to +// |patch_writer|. +status::Code GenerateEnsemble(ConstBufferView old_image, + ConstBufferView new_image, + EnsemblePatchWriter* patch_writer); + +// Generates raw patch from |old_image| to |new_image|, and writes it to +// |patch_writer|. +status::Code GenerateRaw(ConstBufferView old_image, + ConstBufferView new_image, + EnsemblePatchWriter* patch_writer); + +// Applies |patch_reader| to |old_image| to build |new_image|, which refers to +// preallocated memory of sufficient size. +status::Code Apply(ConstBufferView old_image, + const EnsemblePatchReader& patch_reader, + MutableBufferView new_image); + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_ZUCCHINI_H_ diff --git a/zucchini_apply.cc b/zucchini_apply.cc new file mode 100644 index 0000000..1532874 --- /dev/null +++ b/zucchini_apply.cc @@ -0,0 +1,202 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/zucchini_apply.h" + +#include +#include +#include +#include + +#include "base/logging.h" +#include "components/zucchini/disassembler.h" +#include "components/zucchini/element_detection.h" +#include "components/zucchini/equivalence_map.h" +#include "components/zucchini/image_index.h" + +namespace zucchini { + +bool ApplyEquivalenceAndExtraData(ConstBufferView old_image, + const PatchElementReader& patch_reader, + MutableBufferView new_image) { + EquivalenceSource equiv_source = patch_reader.GetEquivalenceSource(); + ExtraDataSource extra_data_source = patch_reader.GetExtraDataSource(); + MutableBufferView::iterator dst_it = new_image.begin(); + + for (auto equivalence = equiv_source.GetNext(); equivalence.has_value(); + equivalence = equiv_source.GetNext()) { + // TODO(etiennep): Guard against out of range errors and return false + // instead. + MutableBufferView::iterator next_dst_it = + new_image.begin() + equivalence->dst_offset; + CHECK(next_dst_it >= dst_it); + offset_t gap = static_cast(next_dst_it - dst_it); + base::Optional extra_data = extra_data_source.GetNext(gap); + if (!extra_data) { + LOG(ERROR) << "Error reading extra_data"; + return false; + } + dst_it = std::copy(extra_data->begin(), extra_data->end(), dst_it); + CHECK_EQ(dst_it, next_dst_it); + dst_it = std::copy_n(old_image.begin() + equivalence->src_offset, + equivalence->length, dst_it); + CHECK_EQ(dst_it, next_dst_it + equivalence->length); + } + offset_t gap = static_cast(new_image.end() - dst_it); + base::Optional extra_data = extra_data_source.GetNext(gap); + if (!extra_data) { + LOG(ERROR) << "Error reading extra_data"; + return false; + } + std::copy(extra_data->begin(), extra_data->end(), dst_it); + if (!equiv_source.Done() || !extra_data_source.Done()) { + LOG(ERROR) << "Found trailing equivalence and extra_data"; + return false; + } + return true; +} + +bool ApplyRawDelta(const PatchElementReader& patch_reader, + MutableBufferView new_image) { + EquivalenceSource equiv_source = patch_reader.GetEquivalenceSource(); + RawDeltaSource raw_delta_source = patch_reader.GetRawDeltaSource(); + // Traverse |equiv_source| and |raw_delta_source| in lockstep. + auto equivalence = equiv_source.GetNext(); + offset_t base_copy_offset = 0; + for (auto delta = raw_delta_source.GetNext(); delta.has_value(); + delta = raw_delta_source.GetNext()) { + while (equivalence.has_value() && + base_copy_offset + equivalence->length <= delta->copy_offset) { + base_copy_offset += equivalence->length; + equivalence = equiv_source.GetNext(); + } + if (!equivalence.has_value()) { + LOG(ERROR) << "Error reading equivalences"; + return false; + } + CHECK_GE(delta->copy_offset, base_copy_offset); + CHECK_LT(delta->copy_offset, base_copy_offset + equivalence->length); + + // Invert byte diff. + new_image[equivalence->dst_offset - base_copy_offset + + delta->copy_offset] += delta->diff; + } + if (!raw_delta_source.Done()) { + LOG(ERROR) << "Found trailing raw_delta"; + return false; + } + return true; +} + +bool ApplyReferencesCorrection(ExecutableType exe_type, + ConstBufferView old_image, + const PatchElementReader& patch, + MutableBufferView new_image) { + auto old_disasm = MakeDisassemblerOfType(old_image, exe_type); + auto new_disasm = + MakeDisassemblerOfType(ConstBufferView(new_image), exe_type); + if (!old_disasm || !new_disasm) { + LOG(ERROR) << "Failed to create Disassembler"; + return false; + } + + ReferenceDeltaSource ref_delta_source = patch.GetReferenceDeltaSource(); + std::map> pool_groups; + for (const auto& ref_group : old_disasm->MakeReferenceGroups()) + pool_groups[ref_group.pool_tag()].push_back(ref_group); + + OffsetMapper offset_mapper(patch.GetEquivalenceSource()); + + std::vector new_groups = new_disasm->MakeReferenceGroups(); + for (const auto& pool_and_sub_groups : pool_groups) { + PoolTag pool_tag = pool_and_sub_groups.first; + const std::vector& sub_groups = pool_and_sub_groups.second; + + TargetPool targets; + // Load "old" targets, then filter and map them to "new" targets. + for (ReferenceGroup group : sub_groups) + targets.InsertTargets(std::move(*group.GetReader(old_disasm.get()))); + targets.FilterAndProject(offset_mapper); + + // Load extra targets from patch. + TargetSource target_source = patch.GetExtraTargetSource(pool_tag); + targets.InsertTargets(&target_source); + if (!target_source.Done()) { + LOG(ERROR) << "Found trailing extra_targets"; + return false; + } + + // Correct all new references, and write results to |new_disasm|. + for (ReferenceGroup group : sub_groups) { + std::unique_ptr ref_writer = + new_groups[group.type_tag().value()].GetWriter(new_image, + new_disasm.get()); + + EquivalenceSource equiv_source = patch.GetEquivalenceSource(); + for (auto equivalence = equiv_source.GetNext(); equivalence.has_value(); + equivalence = equiv_source.GetNext()) { + std::unique_ptr ref_gen = group.GetReader( + equivalence->src_offset, equivalence->src_end(), old_disasm.get()); + for (auto ref = ref_gen->GetNext(); ref.has_value(); + ref = ref_gen->GetNext()) { + DCHECK_GE(ref->location, equivalence->src_offset); + DCHECK_LT(ref->location, equivalence->src_end()); + + offset_t projected_target = offset_mapper.ForwardProject(ref->target); + offset_t expected_key = targets.KeyForNearestOffset(projected_target); + auto delta = ref_delta_source.GetNext(); + if (!delta.has_value()) { + LOG(ERROR) << "Error reading reference_delta"; + return false; + } + ref->target = targets.OffsetForKey(expected_key + delta.value()); + ref->location = + ref->location - equivalence->src_offset + equivalence->dst_offset; + ref_writer->PutNext(*ref); + } + } + } + } + if (!ref_delta_source.Done()) { + LOG(ERROR) << "Found trailing ref_delta_source"; + return false; + } + return true; +} + +bool ApplyElement(ExecutableType exe_type, + ConstBufferView old_image, + const PatchElementReader& patch_reader, + MutableBufferView new_image) { + return ApplyEquivalenceAndExtraData(old_image, patch_reader, new_image) && + ApplyRawDelta(patch_reader, new_image) && + ApplyReferencesCorrection(exe_type, old_image, patch_reader, + new_image); +} + +/******** Exported Functions ********/ + +status::Code Apply(ConstBufferView old_image, + const EnsemblePatchReader& patch_reader, + MutableBufferView new_image) { + if (!patch_reader.CheckOldFile(old_image)) { + LOG(ERROR) << "Invalid old_image."; + return status::kStatusInvalidOldImage; + } + + for (const auto& element_patch : patch_reader.elements()) { + ElementMatch match = element_patch.element_match(); + if (!ApplyElement(match.exe_type(), old_image[match.old_element.region()], + element_patch, new_image[match.new_element.region()])) + return status::kStatusFatal; + } + + if (!patch_reader.CheckNewFile(ConstBufferView(new_image))) { + LOG(ERROR) << "Invalid new_image."; + return status::kStatusInvalidNewImage; + } + return status::kStatusSuccess; +} + +} // namespace zucchini diff --git a/zucchini_apply.h b/zucchini_apply.h new file mode 100644 index 0000000..559812e --- /dev/null +++ b/zucchini_apply.h @@ -0,0 +1,43 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_ZUCCHINI_APPLY_H_ +#define COMPONENTS_ZUCCHINI_ZUCCHINI_APPLY_H_ + +#include + +#include "components/zucchini/image_utils.h" +#include "components/zucchini/patch_reader.h" +#include "components/zucchini/zucchini.h" + +namespace zucchini { + +// Reads equivalences from |patch_reader| to form preliminary |new_image|, +// copying regions from |old_image| and writing extra data from |patch_reader|. +bool ApplyEquivalenceAndExtraData(ConstBufferView old_image, + const PatchElementReader& patch_reader, + MutableBufferView new_image); + +// Reads raw delta from |patch_reader| and applies corrections to |new_image|. +bool ApplyRawDelta(const PatchElementReader& patch_reader, + MutableBufferView new_image); + +// Corrects references in |new_image| by projecting references from |old_image| +// and applying corrections from |patch_reader|. Both |old_image| and +// |new_image| are matching elements associated with |exe_type|. +bool ApplyReferencesCorrection(ExecutableType exe_type, + ConstBufferView old_image, + const PatchElementReader& patch_reader, + MutableBufferView new_image); + +// Applies patch element with type |exe_type| from |patch_reader| on |old_image| +// to produce |new_image|. +bool ApplyElement(ExecutableType exe_type, + ConstBufferView old_image, + const PatchElementReader& patch_reader, + MutableBufferView new_image); + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_ZUCCHINI_APPLY_H_ diff --git a/zucchini_apply_unittest.cc b/zucchini_apply_unittest.cc new file mode 100644 index 0000000..7e26b7b --- /dev/null +++ b/zucchini_apply_unittest.cc @@ -0,0 +1,22 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/zucchini_apply.h" + +#include + +#include "components/zucchini/image_utils.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +namespace { + +using OffsetVector = std::vector; + +} // namespace + +// TODO(huangs): Add more tests. + +} // namespace zucchini diff --git a/zucchini_commands.cc b/zucchini_commands.cc new file mode 100644 index 0000000..60b87cb --- /dev/null +++ b/zucchini_commands.cc @@ -0,0 +1,176 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/zucchini_commands.h" + +#include +#include + +#include +#include +#include + +#include "base/command_line.h" +#include "base/files/file.h" +#include "base/files/file_path.h" +#include "base/files/memory_mapped_file.h" +#include "base/logging.h" +#include "base/macros.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/crc32.h" +#include "components/zucchini/io_utils.h" +#include "components/zucchini/mapped_file.h" +#include "components/zucchini/patch_writer.h" +#include "components/zucchini/zucchini_integration.h" +#include "components/zucchini/zucchini_tools.h" + +namespace { + +/******** Command-line Switches ********/ + +constexpr char kSwitchDump[] = "dump"; +constexpr char kSwitchRaw[] = "raw"; + +} // namespace + +zucchini::status::Code MainGen(MainParams params) { + CHECK_EQ(3U, params.file_paths.size()); + + // TODO(huangs): Move implementation to zucchini_integration.cc. + using base::File; + File old_file(params.file_paths[0], File::FLAG_OPEN | File::FLAG_READ); + zucchini::MappedFileReader old_image(std::move(old_file)); + if (old_image.HasError()) { + LOG(ERROR) << "Error with file " << params.file_paths[0].value() << ": " + << old_image.error(); + return zucchini::status::kStatusFileReadError; + } + File new_file(params.file_paths[1], File::FLAG_OPEN | File::FLAG_READ); + zucchini::MappedFileReader new_image(std::move(new_file)); + if (new_image.HasError()) { + LOG(ERROR) << "Error with file " << params.file_paths[1].value() << ": " + << new_image.error(); + return zucchini::status::kStatusFileReadError; + } + zucchini::EnsemblePatchWriter patch_writer(old_image.region(), + new_image.region()); + + auto generate = params.command_line.HasSwitch(kSwitchRaw) + ? zucchini::GenerateRaw + : zucchini::GenerateEnsemble; + zucchini::status::Code result = + generate(old_image.region(), new_image.region(), &patch_writer); + if (result != zucchini::status::kStatusSuccess) { + params.out << "Fatal error encountered when generating patch." << std::endl; + return result; + } + + // By default, delete patch on destruction, to avoid having lingering files in + // case of a failure. On Windows deletion can be done by the OS. + File patch_file(params.file_paths[2], File::FLAG_CREATE_ALWAYS | + File::FLAG_READ | File::FLAG_WRITE | + File::FLAG_SHARE_DELETE | + File::FLAG_CAN_DELETE_ON_CLOSE); + zucchini::MappedFileWriter patch(params.file_paths[2], std::move(patch_file), + patch_writer.SerializedSize()); + if (patch.HasError()) { + LOG(ERROR) << "Error with file " << params.file_paths[2].value() << ": " + << patch.error(); + return zucchini::status::kStatusFileWriteError; + } + + if (!patch_writer.SerializeInto(patch.region())) + return zucchini::status::kStatusPatchWriteError; + + // Successfully created patch. Explicitly request file to be kept. + if (!patch.Keep()) + return zucchini::status::kStatusFileWriteError; + return zucchini::status::kStatusSuccess; +} + +zucchini::status::Code MainApply(MainParams params) { + CHECK_EQ(3U, params.file_paths.size()); + return zucchini::Apply(params.file_paths[0], params.file_paths[1], + params.file_paths[2]); +} + +zucchini::status::Code MainRead(MainParams params) { + CHECK_EQ(1U, params.file_paths.size()); + base::File input_file(params.file_paths[0], + base::File::FLAG_OPEN | base::File::FLAG_READ); + zucchini::MappedFileReader input(std::move(input_file)); + if (input.HasError()) { + LOG(ERROR) << "Error with file " << params.file_paths[0].value() << ": " + << input.error(); + return zucchini::status::kStatusFileReadError; + } + + bool do_dump = params.command_line.HasSwitch(kSwitchDump); + zucchini::status::Code status = zucchini::ReadReferences( + {input.data(), input.length()}, do_dump, params.out); + if (status != zucchini::status::kStatusSuccess) + params.err << "Fatal error found when dumping references." << std::endl; + return status; +} + +zucchini::status::Code MainDetect(MainParams params) { + CHECK_EQ(1U, params.file_paths.size()); + base::File input_file(params.file_paths[0], + base::File::FLAG_OPEN | base::File::FLAG_READ); + zucchini::MappedFileReader input(std::move(input_file)); + if (input.HasError()) { + LOG(ERROR) << "Error with file " << params.file_paths[0].value() << ": " + << input.error(); + return zucchini::status::kStatusFileReadError; + } + + std::vector sub_image_list; + zucchini::status::Code result = zucchini::DetectAll( + {input.data(), input.length()}, params.out, &sub_image_list); + if (result != zucchini::status::kStatusSuccess) + params.err << "Fatal error found when detecting executables." << std::endl; + return result; +} + +zucchini::status::Code MainMatch(MainParams params) { + CHECK_EQ(2U, params.file_paths.size()); + using base::File; + File old_file(params.file_paths[0], File::FLAG_OPEN | File::FLAG_READ); + zucchini::MappedFileReader old_image(std::move(old_file)); + if (old_image.HasError()) { + LOG(ERROR) << "Error with file " << params.file_paths[0].value() << ": " + << old_image.error(); + return zucchini::status::kStatusFileReadError; + } + File new_file(params.file_paths[1], File::FLAG_OPEN | File::FLAG_READ); + zucchini::MappedFileReader new_image(std::move(new_file)); + if (old_image.HasError()) { + LOG(ERROR) << "Error with file " << params.file_paths[1].value() << ": " + << new_image.error(); + return zucchini::status::kStatusFileReadError; + } + zucchini::status::Code status = + zucchini::MatchAll({old_image.data(), old_image.length()}, + {new_image.data(), new_image.length()}, params.out); + if (status != zucchini::status::kStatusSuccess) + params.err << "Fatal error found when matching executables." << std::endl; + return status; +} + +zucchini::status::Code MainCrc32(MainParams params) { + CHECK_EQ(1U, params.file_paths.size()); + base::File image_file(params.file_paths[0], + base::File::FLAG_OPEN | base::File::FLAG_READ); + zucchini::MappedFileReader image(std::move(image_file)); + if (image.HasError()) { + LOG(ERROR) << "Error with file " << params.file_paths[0].value() << ": " + << image.error(); + return zucchini::status::kStatusFileReadError; + } + + uint32_t crc = + zucchini::CalculateCrc32(image.data(), image.data() + image.length()); + params.out << "CRC32: " << zucchini::AsHex<8>(crc) << std::endl; + return zucchini::status::kStatusSuccess; +} diff --git a/zucchini_commands.h b/zucchini_commands.h new file mode 100644 index 0000000..cef18dc --- /dev/null +++ b/zucchini_commands.h @@ -0,0 +1,51 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_ZUCCHINI_COMMANDS_H_ +#define COMPONENTS_ZUCCHINI_ZUCCHINI_COMMANDS_H_ + +#include +#include + +#include "base/files/file_path.h" +#include "components/zucchini/zucchini.h" + +// Zucchini commands and tools that can be invoked from command-line. + +namespace base { + +class CommandLine; + +} // namespace base + +// Aggregated parameter for Main*() functions, to simplify interface. +struct MainParams { + const base::CommandLine& command_line; + const std::vector& file_paths; + std::ostream& out; + std::ostream& err; +}; + +// Signature of a Zucchini Command Function. +using CommandFunction = zucchini::status::Code (*)(MainParams); + +// Command Function: Patch generation. +zucchini::status::Code MainGen(MainParams params); + +// Command Function: Patch application. +zucchini::status::Code MainApply(MainParams params); + +// Command Function: Read and dump references from an executable. +zucchini::status::Code MainRead(MainParams params); + +// Command Function: Scan an archive file and detect executables. +zucchini::status::Code MainDetect(MainParams params); + +// Command Function: Scan two archive files and match detected executables. +zucchini::status::Code MainMatch(MainParams params); + +// Command Function: Compute CRC-32 of a file. +zucchini::status::Code MainCrc32(MainParams params); + +#endif // COMPONENTS_ZUCCHINI_ZUCCHINI_COMMANDS_H_ diff --git a/zucchini_exe_version.rc.version b/zucchini_exe_version.rc.version new file mode 100644 index 0000000..9d46a4b --- /dev/null +++ b/zucchini_exe_version.rc.version @@ -0,0 +1,46 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include + +///////////////////////////////////////////////////////////////////////////// +// +// Version +// + +VS_VERSION_INFO VERSIONINFO + FILEVERSION @MAJOR@,@MINOR@,@BUILD@,@PATCH@ + PRODUCTVERSION @MAJOR@,@MINOR@,@BUILD@,@PATCH@ + FILEFLAGSMASK 0x17L +#ifdef _DEBUG + FILEFLAGS 0x1L +#else + FILEFLAGS 0x0L +#endif + FILEOS 0x4L + FILETYPE 0x1L + FILESUBTYPE 0x0L +BEGIN + BLOCK "StringFileInfo" + BEGIN + BLOCK "040904b0" + BEGIN + VALUE "CompanyName", "@COMPANY_FULLNAME@" + VALUE "FileDescription", "Zucchini" + VALUE "FileVersion", "@MAJOR@.@MINOR@.@BUILD@.@PATCH@" + VALUE "InternalName", "zucchini" + VALUE "LegalCopyright", "@COPYRIGHT@" + VALUE "ProductName", "Zucchini" + VALUE "ProductVersion", "@MAJOR@.@MINOR@.@BUILD@.@PATCH@" + VALUE "CompanyShortName", "@COMPANY_SHORTNAME@" + VALUE "ProductShortName", "Zucchini" + VALUE "LastChange", "@LASTCHANGE@" + VALUE "Official Build", "@OFFICIAL_BUILD@" + END + END + BLOCK "VarFileInfo" + BEGIN + VALUE "Translation", 0x409, 1200 + END +END diff --git a/zucchini_gen.cc b/zucchini_gen.cc new file mode 100644 index 0000000..4be0b8b --- /dev/null +++ b/zucchini_gen.cc @@ -0,0 +1,430 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/zucchini_gen.h" + +#include +#include + +#include +#include +#include +#include + +#include "base/logging.h" +#include "base/numerics/safe_conversions.h" +#include "components/zucchini/disassembler.h" +#include "components/zucchini/element_detection.h" +#include "components/zucchini/encoded_view.h" +#include "components/zucchini/ensemble_matcher.h" +#include "components/zucchini/equivalence_map.h" +#include "components/zucchini/heuristic_ensemble_matcher.h" +#include "components/zucchini/image_index.h" +#include "components/zucchini/label_manager.h" +#include "components/zucchini/patch_writer.h" +#include "components/zucchini/suffix_array.h" +#include "components/zucchini/targets_affinity.h" + +namespace zucchini { + +namespace { + +// Parameters for patch generation. +constexpr double kMinEquivalenceSimilarity = 12.0; +constexpr double kMinLabelAffinity = 64.0; +constexpr size_t kNumIterations = 2; + +} // namespace + +std::vector FindExtraTargets(const TargetPool& projected_old_targets, + const TargetPool& new_targets) { + std::vector extra_targets; + std::set_difference( + new_targets.begin(), new_targets.end(), projected_old_targets.begin(), + projected_old_targets.end(), std::back_inserter(extra_targets)); + return extra_targets; +} + +EquivalenceMap CreateEquivalenceMap(const ImageIndex& old_image_index, + const ImageIndex& new_image_index) { + // Label matching (between "old" and "new") can guide EquivalenceMap + // construction; but EquivalenceMap induces Label matching. This apparent + // "chick and egg" problem is solved by multiple iterations alternating 2 + // steps: + // - Association of targets based on previous EquivalenceMap. Note that the + // EquivalenceMap is empty on first iteration, so this is a no-op. + // - Construction of refined EquivalenceMap based on new targets associations. + size_t pool_count = old_image_index.PoolCount(); + // |target_affinities| is outside the loop to reduce allocation. + std::vector target_affinities(pool_count); + + EquivalenceMap equivalence_map; + for (size_t i = 0; i < kNumIterations; ++i) { + EncodedView old_view(old_image_index); + EncodedView new_view(new_image_index); + + // Associate targets from "old" to "new" image based on |equivalence_map| + // for each reference pool. + for (const auto& old_pool_tag_and_targets : + old_image_index.target_pools()) { + PoolTag pool_tag = old_pool_tag_and_targets.first; + target_affinities[pool_tag.value()].InferFromSimilarities( + equivalence_map, old_pool_tag_and_targets.second.targets(), + new_image_index.pool(pool_tag).targets()); + + // Creates labels for strongly associated targets. + std::vector old_labels; + std::vector new_labels; + size_t label_bound = target_affinities[pool_tag.value()].AssignLabels( + kMinLabelAffinity, &old_labels, &new_labels); + old_view.SetLabels(pool_tag, std::move(old_labels), label_bound); + new_view.SetLabels(pool_tag, std::move(new_labels), label_bound); + } + // Build equivalence map, where references in "old" and "new" that share + // common semantics (i.e., their respective targets were associated earlier + // on) are considered equivalent. + equivalence_map.Build( + MakeSuffixArray(old_view, old_view.Cardinality()), + old_view, new_view, target_affinities, kMinEquivalenceSimilarity); + } + + return equivalence_map; +} + +bool GenerateEquivalencesAndExtraData(ConstBufferView new_image, + const EquivalenceMap& equivalence_map, + PatchElementWriter* patch_writer) { + // Make 2 passes through |equivalence_map| to reduce write churn. + // Pass 1: Write all equivalences. + EquivalenceSink equivalences_sink; + for (const EquivalenceCandidate& candidate : equivalence_map) + equivalences_sink.PutNext(candidate.eq); + patch_writer->SetEquivalenceSink(std::move(equivalences_sink)); + + // Pass 2: Write data in gaps in |new_image| before / between after + // |equivalence_map| as "extra data". + ExtraDataSink extra_data_sink; + offset_t dst_offset = 0; + for (const EquivalenceCandidate& candidate : equivalence_map) { + extra_data_sink.PutNext( + new_image[{dst_offset, candidate.eq.dst_offset - dst_offset}]); + dst_offset = candidate.eq.dst_end(); + DCHECK_LE(dst_offset, new_image.size()); + } + extra_data_sink.PutNext( + new_image[{dst_offset, new_image.size() - dst_offset}]); + patch_writer->SetExtraDataSink(std::move(extra_data_sink)); + return true; +} + +bool GenerateRawDelta(ConstBufferView old_image, + ConstBufferView new_image, + const EquivalenceMap& equivalence_map, + const ImageIndex& new_image_index, + PatchElementWriter* patch_writer) { + RawDeltaSink raw_delta_sink; + + // Visit |equivalence_map| blocks in |new_image| order. Find and emit all + // bytewise differences. + offset_t base_copy_offset = 0; + for (const EquivalenceCandidate& candidate : equivalence_map) { + Equivalence equivalence = candidate.eq; + // For each bytewise delta from |old_image| to |new_image|, compute "copy + // offset" and pass it along with delta to the sink. + for (offset_t i = 0; i < equivalence.length; ++i) { + if (new_image_index.IsReference(equivalence.dst_offset + i)) + continue; // Skip references since they're handled elsewhere. + + int8_t diff = new_image[equivalence.dst_offset + i] - + old_image[equivalence.src_offset + i]; + if (diff) + raw_delta_sink.PutNext({base_copy_offset + i, diff}); + } + base_copy_offset += equivalence.length; + } + patch_writer->SetRawDeltaSink(std::move(raw_delta_sink)); + return true; +} + +bool GenerateReferencesDelta(const ReferenceSet& src_refs, + const ReferenceSet& dst_refs, + const TargetPool& projected_target_pool, + const OffsetMapper& offset_mapper, + const EquivalenceMap& equivalence_map, + ReferenceDeltaSink* reference_delta_sink) { + size_t ref_width = src_refs.width(); + auto dst_ref = dst_refs.begin(); + + // For each equivalence, for each covered |dst_ref| and the matching + // |src_ref|, emit the delta between the respective target labels. Note: By + // construction, each reference location (with |ref_width|) lies either + // completely inside an equivalence or completely outside. We perform + // "straddle checks" throughout to verify this assertion. + for (const auto& candidate : equivalence_map) { + const Equivalence equiv = candidate.eq; + // Increment |dst_ref| until it catches up to |equiv|. + while (dst_ref != dst_refs.end() && dst_ref->location < equiv.dst_offset) + ++dst_ref; + if (dst_ref == dst_refs.end()) + break; + if (dst_ref->location >= equiv.dst_end()) + continue; + // Straddle check. + DCHECK_LE(dst_ref->location + ref_width, equiv.dst_end()); + + offset_t src_loc = + equiv.src_offset + (dst_ref->location - equiv.dst_offset); + auto src_ref = std::lower_bound( + src_refs.begin(), src_refs.end(), src_loc, + [](const IndirectReference& a, offset_t b) { return a.location < b; }); + for (; dst_ref != dst_refs.end() && + dst_ref->location + ref_width <= equiv.dst_end(); + ++dst_ref, ++src_ref) { + // Local offset of |src_ref| should match that of |dst_ref|. + DCHECK_EQ(src_ref->location - equiv.src_offset, + dst_ref->location - equiv.dst_offset); + offset_t old_offset = + src_refs.target_pool().OffsetForKey(src_ref->target_key); + offset_t new_estimated_offset = offset_mapper.ForwardProject(old_offset); + offset_t new_estimated_key = + projected_target_pool.KeyForNearestOffset(new_estimated_offset); + offset_t new_offset = + dst_refs.target_pool().OffsetForKey(dst_ref->target_key); + offset_t new_key = projected_target_pool.KeyForOffset(new_offset); + + reference_delta_sink->PutNext( + static_cast(new_key - new_estimated_key)); + } + if (dst_ref == dst_refs.end()) + break; // Done. + // Straddle check. + DCHECK_GE(dst_ref->location, equiv.dst_end()); + } + return true; +} + +bool GenerateExtraTargets(const std::vector& extra_targets, + PoolTag pool_tag, + PatchElementWriter* patch_writer) { + TargetSink target_sink; + for (offset_t target : extra_targets) + target_sink.PutNext(target); + patch_writer->SetTargetSink(pool_tag, std::move(target_sink)); + return true; +} + +bool GenerateRawElement(const std::vector& old_sa, + ConstBufferView old_image, + ConstBufferView new_image, + PatchElementWriter* patch_writer) { + ImageIndex old_image_index(old_image); + ImageIndex new_image_index(new_image); + + EquivalenceMap equivalences; + equivalences.Build(old_sa, EncodedView(old_image_index), + EncodedView(new_image_index), {}, + kMinEquivalenceSimilarity); + + patch_writer->SetReferenceDeltaSink({}); + return GenerateEquivalencesAndExtraData(new_image, equivalences, + patch_writer) && + GenerateRawDelta(old_image, new_image, equivalences, new_image_index, + patch_writer); +} + +bool GenerateExecutableElement(ExecutableType exe_type, + ConstBufferView old_image, + ConstBufferView new_image, + PatchElementWriter* patch_writer) { + // Initialize Disassemblers. + std::unique_ptr old_disasm = + MakeDisassemblerOfType(old_image, exe_type); + std::unique_ptr new_disasm = + MakeDisassemblerOfType(new_image, exe_type); + if (!old_disasm || !new_disasm) { + LOG(ERROR) << "Failed to create Disassembler."; + return false; + } + DCHECK_EQ(old_disasm->GetExeType(), new_disasm->GetExeType()); + + // Initialize ImageIndexes. + ImageIndex old_image_index(old_image); + ImageIndex new_image_index(new_image); + if (!old_image_index.Initialize(old_disasm.get()) || + !new_image_index.Initialize(new_disasm.get())) { + LOG(ERROR) << "Failed to create ImageIndex: Overlapping references found?"; + return false; + } + DCHECK_EQ(old_image_index.PoolCount(), new_image_index.PoolCount()); + + EquivalenceMap equivalences = + CreateEquivalenceMap(old_image_index, new_image_index); + OffsetMapper offset_mapper(equivalences); + + ReferenceDeltaSink reference_delta_sink; + for (const auto& old_targets : old_image_index.target_pools()) { + PoolTag pool_tag = old_targets.first; + TargetPool projected_old_targets = old_targets.second; + projected_old_targets.FilterAndProject(offset_mapper); + std::vector extra_target = + FindExtraTargets(projected_old_targets, new_image_index.pool(pool_tag)); + projected_old_targets.InsertTargets(extra_target); + + if (!GenerateExtraTargets(extra_target, pool_tag, patch_writer)) + return false; + for (TypeTag type_tag : old_targets.second.types()) { + if (!GenerateReferencesDelta(old_image_index.refs(type_tag), + new_image_index.refs(type_tag), + projected_old_targets, offset_mapper, + equivalences, &reference_delta_sink)) { + return false; + } + } + } + patch_writer->SetReferenceDeltaSink(std::move(reference_delta_sink)); + + return GenerateEquivalencesAndExtraData(new_image, equivalences, + patch_writer) && + GenerateRawDelta(old_image, new_image, equivalences, new_image_index, + patch_writer); +} + +/******** Exported Functions ********/ + +status::Code GenerateEnsemble(ConstBufferView old_image, + ConstBufferView new_image, + EnsemblePatchWriter* patch_writer) { + std::unique_ptr matcher = + std::make_unique(nullptr); + if (!matcher->RunMatch(old_image, new_image)) { + LOG(INFO) << "RunMatch() failed, generating raw patch."; + return GenerateRaw(old_image, new_image, patch_writer); + } + + const std::vector& matches = matcher->matches(); + LOG(INFO) << "Matching: Found " << matches.size() + << " nontrivial matches and " << matcher->num_identical() + << " identical matches."; + size_t num_elements = matches.size(); + if (num_elements == 0) { + LOG(INFO) << "No nontrival matches, generating raw patch."; + return GenerateRaw(old_image, new_image, patch_writer); + } + + PatchType patch_type = PatchType::kRawPatch; + if (num_elements == 1 && matches[0].old_element.size == old_image.size() && + matches[0].new_element.size == new_image.size()) { + // If |old_image| matches |new_image| entirely then we have single patch. + LOG(INFO) << "Old and new files are executables, " + << "generating single-file patch."; + patch_type = PatchType::kSinglePatch; + } else { + LOG(INFO) << "Generating ensemble patch."; + patch_type = PatchType::kEnsemblePatch; + } + + // "Gaps" are |new_image| bytes not covered by new_elements in |matches|. + // These are treated as raw data, and patched against the entire |old_image|. + + // |patch_element_map| (keyed by "new" offsets) stores PatchElementWriter + // results so elements and "gap" results can be computed separately (to reduce + // peak memory usage), and later, properly serialized to |patch_writer| + // ordered by "new" offset. + std::map patch_element_map; + + // Variables to track element patching successes. + std::vector covered_new_regions; + size_t covered_new_bytes = 0; + + // Process elements first, since non-fatal failures may turn some into gaps. + for (const ElementMatch& match : matches) { + BufferRegion new_region = match.new_element.region(); + LOG(INFO) << "--- Match [" << new_region.lo() << "," << new_region.hi() + << ")"; + + auto it_and_success = patch_element_map.emplace( + base::checked_cast(new_region.lo()), match); + DCHECK(it_and_success.second); + PatchElementWriter& patch_element = it_and_success.first->second; + + ConstBufferView old_sub_image = old_image[match.old_element.region()]; + ConstBufferView new_sub_image = new_image[new_region]; + if (GenerateExecutableElement(match.exe_type(), old_sub_image, + new_sub_image, &patch_element)) { + covered_new_regions.push_back(new_region); + covered_new_bytes += new_region.size; + } else { + LOG(INFO) << "Fall back to raw patching."; + patch_element_map.erase(it_and_success.first); + } + } + + if (covered_new_bytes == 0) + patch_type = PatchType::kRawPatch; + + if (covered_new_bytes < new_image.size()) { + // Process all "gaps", which are patched against the entire "old" image. To + // compute equivalence maps, "gaps" share a common suffix array + // |old_sa_raw|, whose lifetime is kept separated from elements' suffix + // arrays to reduce peak memory. + Element entire_old_element(old_image.local_region(), kExeTypeNoOp); + ImageIndex old_image_index(old_image); + EncodedView old_view_raw(old_image_index); + std::vector old_sa_raw = + MakeSuffixArray(old_view_raw, size_t(256)); + + offset_t gap_lo = 0; + // Add sentinel that points to end of "new" file, to simplify gap iteration. + covered_new_regions.emplace_back(BufferRegion{new_image.size(), 0}); + + for (const BufferRegion& covered : covered_new_regions) { + offset_t gap_hi = base::checked_cast(covered.lo()); + DCHECK_GE(gap_hi, gap_lo); + offset_t gap_size = gap_hi - gap_lo; + if (gap_size > 0) { + LOG(INFO) << "--- Gap [" << gap_lo << "," << gap_hi << ")"; + + ElementMatch gap_match{{entire_old_element, kExeTypeNoOp}, + {{gap_lo, gap_size}, kExeTypeNoOp}}; + auto it_and_success = patch_element_map.emplace(gap_lo, gap_match); + DCHECK(it_and_success.second); + PatchElementWriter& patch_element = it_and_success.first->second; + + ConstBufferView new_sub_image = new_image[{gap_lo, gap_size}]; + if (!GenerateRawElement(old_sa_raw, old_image, new_sub_image, + &patch_element)) { + return status::kStatusFatal; + } + } + gap_lo = base::checked_cast(covered.hi()); + } + } + + patch_writer->SetPatchType(patch_type); + // Write all PatchElementWriter sorted by "new" offset. + for (auto& new_lo_and_patch_element : patch_element_map) + patch_writer->AddElement(std::move(new_lo_and_patch_element.second)); + + return status::kStatusSuccess; +} + +status::Code GenerateRaw(ConstBufferView old_image, + ConstBufferView new_image, + EnsemblePatchWriter* patch_writer) { + patch_writer->SetPatchType(PatchType::kRawPatch); + + ImageIndex old_image_index(old_image); + EncodedView old_view(old_image_index); + std::vector old_sa = + MakeSuffixArray(old_view, old_view.Cardinality()); + + PatchElementWriter patch_element( + {Element(old_image.local_region()), Element(new_image.local_region())}); + if (!GenerateRawElement(old_sa, old_image, new_image, &patch_element)) + return status::kStatusFatal; + patch_writer->AddElement(std::move(patch_element)); + return status::kStatusSuccess; +} + +} // namespace zucchini diff --git a/zucchini_gen.h b/zucchini_gen.h new file mode 100644 index 0000000..a0f3630 --- /dev/null +++ b/zucchini_gen.h @@ -0,0 +1,84 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_ZUCCHINI_GEN_H_ +#define COMPONENTS_ZUCCHINI_ZUCCHINI_GEN_H_ + +#include + +#include "base/optional.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/image_utils.h" +#include "components/zucchini/zucchini.h" + +namespace zucchini { + +class EquivalenceMap; +class OffsetMapper; +class ImageIndex; +class PatchElementWriter; +class ReferenceDeltaSink; +class ReferenceSet; +class TargetPool; + +// Extract all targets in |new_targets| with no associated target in +// |projected_old_targets| and returns these targets in a new vector. +std::vector FindExtraTargets(const TargetPool& projected_old_targets, + const TargetPool& new_targets); + +// Creates an EquivalenceMap from "old" image to "new" image and returns the +// result. The params |*_image_index|: +// - Provide "old" and "new" raw image data and references. +// - Mediate Label matching, which links references between "old" and "new", and +// guides EquivalenceMap construction. +EquivalenceMap CreateEquivalenceMap(const ImageIndex& old_image_index, + const ImageIndex& new_image_index); + +// Writes equivalences from |equivalence_map|, and extra data from |new_image| +// found in gaps between equivalences to |patch_writer|. +bool GenerateEquivalencesAndExtraData(ConstBufferView new_image, + const EquivalenceMap& equivalence_map, + PatchElementWriter* patch_writer); + +// Writes raw delta between |old_image| and |new_image| matched by +// |equivalence_map| to |patch_writer|, using |new_image_index| to ignore +// reference bytes. +bool GenerateRawDelta(ConstBufferView old_image, + ConstBufferView new_image, + const EquivalenceMap& equivalence_map, + const ImageIndex& new_image_index, + PatchElementWriter* patch_writer); + +// Writes reference delta between references from |old_refs| and from +// |new_refs| to |patch_writer|. |projected_target_pool| contains projected +// targets from old to new image for references pool associated with |new_refs|. +bool GenerateReferencesDelta(const ReferenceSet& src_refs, + const ReferenceSet& dst_refs, + const TargetPool& projected_target_pool, + const OffsetMapper& offset_mapper, + const EquivalenceMap& equivalence_map, + ReferenceDeltaSink* reference_delta_sink); + +// Writes |extra_targets| associated with |pool_tag| to |patch_writer|. +bool GenerateExtraTargets(const std::vector& extra_targets, + PoolTag pool_tag, + PatchElementWriter* patch_writer); + +// Generates raw patch element data between |old_image| and |new_image|, and +// writes them to |patch_writer|. |old_sa| is the suffix array for |old_image|. +bool GenerateRawElement(const std::vector& old_sa, + ConstBufferView old_image, + ConstBufferView new_image, + PatchElementWriter* patch_writer); + +// Generates patch element of type |exe_type| from |old_image| to |new_image|, +// and writes it to |patch_writer|. +bool GenerateExecutableElement(ExecutableType exe_type, + ConstBufferView old_image, + ConstBufferView new_image, + PatchElementWriter* patch_writer); + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_ZUCCHINI_GEN_H_ diff --git a/zucchini_gen_unittest.cc b/zucchini_gen_unittest.cc new file mode 100644 index 0000000..29e84d6 --- /dev/null +++ b/zucchini_gen_unittest.cc @@ -0,0 +1,176 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/zucchini_gen.h" + +#include + +#include +#include + +#include "components/zucchini/equivalence_map.h" +#include "components/zucchini/image_index.h" +#include "components/zucchini/image_utils.h" +#include "components/zucchini/label_manager.h" +#include "components/zucchini/test_disassembler.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +namespace { + +using OffsetVector = std::vector; + +// In normal usage, 0.0 is an unrealistic similarity value for an +// EquivalenceCandiate. Since similarity doesn't affect results for various unit +// tests in this file, we use this dummy value for simplicity. +constexpr double kDummySim = 0.0; + +// Helper function wrapping GenerateReferencesDelta(). +std::vector GenerateReferencesDeltaTest( + std::vector&& old_references, + std::vector&& new_references, + std::vector&& exp_old_targets, + std::vector&& exp_projected_old_targets, + EquivalenceMap&& equivalence_map) { + ReferenceDeltaSink reference_delta_sink; + + TargetPool old_targets; + old_targets.InsertTargets(old_references); + ReferenceSet old_refs({1, TypeTag(0), PoolTag(0)}, old_targets); + old_refs.InitReferences(old_references); + EXPECT_EQ(exp_old_targets, old_targets.targets()); + + TargetPool new_targets; + new_targets.InsertTargets(new_references); + ReferenceSet new_refs({1, TypeTag(0), PoolTag(0)}, new_targets); + new_refs.InitReferences(new_references); + + OffsetMapper offset_mapper(equivalence_map); + TargetPool projected_old_targets = old_targets; + projected_old_targets.FilterAndProject(offset_mapper); + + std::vector extra_target = + FindExtraTargets(projected_old_targets, new_targets); + projected_old_targets.InsertTargets(extra_target); + EXPECT_EQ(exp_projected_old_targets, projected_old_targets.targets()); + + GenerateReferencesDelta(old_refs, new_refs, projected_old_targets, + offset_mapper, equivalence_map, + &reference_delta_sink); + + // Serialize |reference_delta_sink| to patch format, and read it back as + // std::vector. + std::vector buffer(reference_delta_sink.SerializedSize()); + BufferSink sink(buffer.data(), buffer.size()); + reference_delta_sink.SerializeInto(&sink); + + BufferSource source(buffer.data(), buffer.size()); + ReferenceDeltaSource reference_delta_source; + EXPECT_TRUE(reference_delta_source.Initialize(&source)); + std::vector delta_vec; + for (auto delta = reference_delta_source.GetNext(); delta.has_value(); + delta = reference_delta_source.GetNext()) { + delta_vec.push_back(*delta); + } + EXPECT_TRUE(reference_delta_source.Done()); + return delta_vec; +} + +} // namespace + +TEST(ZucchiniGenTest, FindExtraTargets) { + EXPECT_EQ(OffsetVector(), FindExtraTargets({}, {})); + EXPECT_EQ(OffsetVector(), FindExtraTargets(TargetPool({3}), {})); + EXPECT_EQ(OffsetVector(), FindExtraTargets(TargetPool({3}), TargetPool({3}))); + EXPECT_EQ(OffsetVector({4}), + FindExtraTargets(TargetPool({3}), TargetPool({4}))); + EXPECT_EQ(OffsetVector({4}), + FindExtraTargets(TargetPool({3}), TargetPool({3, 4}))); + EXPECT_EQ(OffsetVector({4}), + FindExtraTargets(TargetPool({2, 3}), TargetPool({3, 4}))); + EXPECT_EQ(OffsetVector({3, 5}), + FindExtraTargets(TargetPool({2, 4}), TargetPool({3, 5}))); +} + +TEST(ZucchiniGenTest, GenerateReferencesDelta) { + // No equivalences. + EXPECT_EQ(std::vector(), + GenerateReferencesDeltaTest({}, {}, {}, {}, EquivalenceMap())); + EXPECT_EQ(std::vector(), + GenerateReferencesDeltaTest({{10, 0}}, {{20, 0}}, {0}, {0}, + EquivalenceMap())); + + // Simple cases with one equivalence. + EXPECT_EQ( + std::vector({0}), // {0 - 0}. + GenerateReferencesDeltaTest( + {{10, 3}}, {{20, 3}}, {3}, {3}, + EquivalenceMap({{{3, 3, 1}, kDummySim}, {{10, 20, 4}, kDummySim}}))); + EXPECT_EQ( + std::vector({-1}), // {0 - 1}. + GenerateReferencesDeltaTest( + {{10, 3}}, {{20, 3}}, {3}, {3, 4}, + EquivalenceMap({{{3, 4, 1}, kDummySim}, {{10, 20, 4}, kDummySim}}))); + EXPECT_EQ( + std::vector({1}), // {1 - 0}. + GenerateReferencesDeltaTest( + {{10, 3}}, {{20, 3}}, {3}, {2, 3}, + EquivalenceMap({{{3, 2, 1}, kDummySim}, {{10, 20, 4}, kDummySim}}))); + EXPECT_EQ(std::vector({1, -1}), // {1 - 0, 0 - 1}. + GenerateReferencesDeltaTest( + {{10, 3}, {11, 4}}, {{20, 3}, {21, 4}}, {3, 4}, {2, 3, 4, 5}, + EquivalenceMap({{{3, 2, 1}, kDummySim}, + {{4, 5, 1}, kDummySim}, + {{10, 20, 4}, kDummySim}}))); + + EXPECT_EQ( + std::vector({0, 0}), // {1 - 1, 2 - 2}. + GenerateReferencesDeltaTest( + {{10, 3}, {11, 4}, {12, 5}, {13, 6}}, + {{20, 3}, {21, 4}, {22, 5}, {23, 6}}, {3, 4, 5, 6}, {3, 4, 5, 6}, + EquivalenceMap({{{3, 3, 4}, kDummySim}, {{11, 21, 2}, kDummySim}}))); + + // Multiple equivalences. + EXPECT_EQ(std::vector({-1, 1}), // {0 - 1, 1 - 0}. + GenerateReferencesDeltaTest( + {{10, 0}, {12, 1}}, {{10, 0}, {12, 1}}, {0, 1}, {0, 1}, + EquivalenceMap({{{0, 0, 2}, kDummySim}, + {{12, 10, 2}, kDummySim}, + {{10, 12, 2}, kDummySim}}))); + EXPECT_EQ( + std::vector({0, 0}), // {0 - 0, 1 - 1}. + GenerateReferencesDeltaTest( + {{0, 0}, {2, 2}}, {{0, 0}, {2, 2}}, {0, 2}, {0, 2}, + EquivalenceMap({{{2, 0, 2}, kDummySim}, {{0, 2, 2}, kDummySim}}))); + + EXPECT_EQ(std::vector({-2, 2}), // {0 - 2, 2 - 0}. + GenerateReferencesDeltaTest( + {{10, 0}, {12, 1}, {14, 2}}, {{10, 0}, {12, 1}, {14, 2}}, + {0, 1, 2}, {0, 1, 2}, + EquivalenceMap({{{0, 0, 3}, kDummySim}, + {{14, 10, 2}, kDummySim}, + {{10, 14, 2}, kDummySim}}))); + + EXPECT_EQ(std::vector({-2, 2}), // {0 - 2, 2 - 0}. + GenerateReferencesDeltaTest( + {{11, 0}, {14, 1}, {17, 2}}, {{11, 0}, {14, 1}, {17, 2}}, + {0, 1, 2}, {0, 1, 2}, + EquivalenceMap({{{0, 0, 3}, kDummySim}, + {{16, 10, 3}, kDummySim}, + {{10, 16, 3}, kDummySim}}))); + + EXPECT_EQ( + std::vector({-2, 2}), // {0 - 2, 2 - 0}. + GenerateReferencesDeltaTest({{10, 0}, {14, 2}, {16, 1}}, + {{10, 0}, {14, 2}}, {0, 1, 2}, {0, 1, 2}, + EquivalenceMap({{{0, 0, 3}, kDummySim}, + {{14, 10, 2}, kDummySim}, + {{12, 12, 2}, kDummySim}, + {{10, 14, 2}, kDummySim}}))); +} + +// TODO(huangs): Add more tests. + +} // namespace zucchini diff --git a/zucchini_integration.cc b/zucchini_integration.cc new file mode 100644 index 0000000..3ca4601 --- /dev/null +++ b/zucchini_integration.cc @@ -0,0 +1,122 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/zucchini_integration.h" + +#include + +#include "base/logging.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/mapped_file.h" +#include "components/zucchini/patch_reader.h" + +namespace zucchini { + +namespace { + +struct FileNames { + FileNames() : is_dummy(true) { + // Use fake names. If |is_dummy| is true these files are only used for error + // output. + old_name = old_name.AppendASCII("old_name"); + patch_name = patch_name.AppendASCII("patch_name"); + new_name = new_name.AppendASCII("new_name"); + } + + FileNames(const base::FilePath& old_name, + const base::FilePath& patch_name, + const base::FilePath& new_name) + : old_name(old_name), + patch_name(patch_name), + new_name(new_name), + is_dummy(false) {} + + base::FilePath old_name; + base::FilePath patch_name; + base::FilePath new_name; + const bool is_dummy; +}; + +status::Code ApplyCommon(base::File&& old_file_handle, + base::File&& patch_file_handle, + base::File&& new_file_handle, + const FileNames& names) { + MappedFileReader patch_file(std::move(patch_file_handle)); + if (patch_file.HasError()) { + LOG(ERROR) << "Error with file " << names.patch_name.value() << ": " + << patch_file.error(); + return status::kStatusFileReadError; + } + + auto patch_reader = + zucchini::EnsemblePatchReader::Create(patch_file.region()); + if (!patch_reader.has_value()) { + LOG(ERROR) << "Error reading patch header."; + return status::kStatusPatchReadError; + } + + MappedFileReader old_file(std::move(old_file_handle)); + if (old_file.HasError()) { + LOG(ERROR) << "Error with file " << names.old_name.value() << ": " + << old_file.error(); + return status::kStatusFileReadError; + } + if (!patch_reader->CheckOldFile(old_file.region())) { + LOG(ERROR) << "Invalid old_file."; + return status::kStatusInvalidOldImage; + } + + zucchini::PatchHeader header = patch_reader->header(); + // By default, delete output on destruction, to avoid having lingering files + // in case of a failure. On Windows deletion can be done by the OS. + base::FilePath file_path; + if (!names.is_dummy) + file_path = base::FilePath(names.new_name); + + MappedFileWriter new_file(file_path, std::move(new_file_handle), + header.new_size); + if (new_file.HasError()) { + LOG(ERROR) << "Error with file " << names.new_name.value() << ": " + << new_file.error(); + return status::kStatusFileWriteError; + } + + zucchini::status::Code result = + zucchini::Apply(old_file.region(), *patch_reader, new_file.region()); + if (result != status::kStatusSuccess) { + LOG(ERROR) << "Fatal error encountered while applying patch."; + return result; + } + + // Successfully patch |new_file|. Explicitly request file to be kept. + if (!new_file.Keep()) + return status::kStatusFileWriteError; + return status::kStatusSuccess; +} + +} // namespace + +status::Code Apply(base::File old_file_handle, + base::File patch_file_handle, + base::File new_file_handle) { + const FileNames file_names = FileNames(); + return ApplyCommon(std::move(old_file_handle), std::move(patch_file_handle), + std::move(new_file_handle), file_names); +} + +status::Code Apply(const base::FilePath& old_path, + const base::FilePath& patch_path, + const base::FilePath& new_path) { + using base::File; + File old_file(old_path, File::FLAG_OPEN | File::FLAG_READ); + File patch_file(patch_path, File::FLAG_OPEN | File::FLAG_READ); + File new_file(new_path, File::FLAG_CREATE_ALWAYS | File::FLAG_READ | + File::FLAG_WRITE | File::FLAG_SHARE_DELETE | + File::FLAG_CAN_DELETE_ON_CLOSE); + const FileNames file_names(old_path, patch_path, new_path); + return ApplyCommon(std::move(old_file), std::move(patch_file), + std::move(new_file), file_names); +} + +} // namespace zucchini diff --git a/zucchini_integration.h b/zucchini_integration.h new file mode 100644 index 0000000..7c3fc40 --- /dev/null +++ b/zucchini_integration.h @@ -0,0 +1,34 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_ZUCCHINI_INTEGRATION_H_ +#define COMPONENTS_ZUCCHINI_ZUCCHINI_INTEGRATION_H_ + +#include "base/files/file.h" +#include "base/files/file_path.h" +#include "components/zucchini/zucchini.h" + +namespace zucchini { + +// Applies the patch in |patch_file| to the bytes in |old_file| and writes the +// result to |new_file|. Since this uses memory mapped files, crashes are +// expected in case of I/O errors. On Windows |new_file| is kept iff returned +// code is kStatusSuccess, and is deleted otherwise. For UNIX systems the +// caller needs to do cleanup since it has ownership of the base::File params +// and Zucchini has no knowledge of which base::FilePath to delete. +status::Code Apply(base::File&& old_file, + base::File&& patch_file, + base::File&& new_file); + +// Applies the patch in |patch_path| to the bytes in |old_path| and writes the +// result to |new_path|. Since this uses memory mapped files, crashes are +// expected in case of I/O errors. |new_path| is kept iff returned code is +// kStatusSuccess, and is deleted otherwise. +status::Code Apply(const base::FilePath& old_path, + const base::FilePath& patch_path, + const base::FilePath& new_path); + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_ZUCCHINI_INTEGRATION_H_ diff --git a/zucchini_main.cc b/zucchini_main.cc new file mode 100644 index 0000000..adff154 --- /dev/null +++ b/zucchini_main.cc @@ -0,0 +1,54 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include + +#include "base/command_line.h" +#include "base/logging.h" +#include "base/process/memory.h" +#include "build/build_config.h" +#include "components/zucchini/main_utils.h" + +#if defined(OS_WIN) +#include "base/win/process_startup_helper.h" +#endif // defined(OS_WIN) + +namespace { + +void InitLogging() { + logging::LoggingSettings settings; + settings.logging_dest = logging::LOG_TO_SYSTEM_DEBUG_LOG; + settings.log_file = nullptr; + settings.lock_log = logging::DONT_LOCK_LOG_FILE; + settings.delete_old = logging::APPEND_TO_OLD_LOG_FILE; + bool logging_res = logging::InitLogging(settings); + CHECK(logging_res); +} + +void InitErrorHandling(const base::CommandLine& command_line) { + base::EnableTerminationOnHeapCorruption(); + base::EnableTerminationOnOutOfMemory(); +#if defined(OS_WIN) + base::win::RegisterInvalidParamHandler(); + base::win::SetupCRT(command_line); +#endif // defined(OS_WIN) +} + +} // namespace + +int main(int argc, const char* argv[]) { + // Initialize infrastructure from base. + base::CommandLine::Init(argc, argv); + const base::CommandLine& command_line = + *base::CommandLine::ForCurrentProcess(); + InitLogging(); + InitErrorHandling(command_line); + zucchini::status::Code status = + RunZucchiniCommand(command_line, std::cout, std::cerr); + if (!(status == zucchini::status::kStatusSuccess || + status == zucchini::status::kStatusInvalidParam)) { + std::cerr << "Failed with code " << static_cast(status) << std::endl; + } + return static_cast(status); +} diff --git a/zucchini_tools.cc b/zucchini_tools.cc new file mode 100644 index 0000000..784e355 --- /dev/null +++ b/zucchini_tools.cc @@ -0,0 +1,126 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/zucchini_tools.h" + +#include +#include + +#include +#include +#include +#include + +#include "base/bind.h" +#include "base/strings/stringprintf.h" +#include "components/zucchini/disassembler.h" +#include "components/zucchini/element_detection.h" +#include "components/zucchini/ensemble_matcher.h" +#include "components/zucchini/heuristic_ensemble_matcher.h" +#include "components/zucchini/io_utils.h" + +namespace zucchini { + +status::Code ReadReferences(ConstBufferView image, + bool do_dump, + std::ostream& out) { + std::unique_ptr disasm = MakeDisassemblerWithoutFallback(image); + if (!disasm) { + out << "Input file not recognized as executable." << std::endl; + return status::kStatusInvalidOldImage; + } + + std::vector targets; + for (const auto& group : disasm->MakeReferenceGroups()) { + targets.clear(); + auto refs = group.GetReader(disasm.get()); + for (auto ref = refs->GetNext(); ref.has_value(); ref = refs->GetNext()) + targets.push_back(ref->target); + + size_t num_locations = targets.size(); + std::sort(targets.begin(), targets.end()); + targets.erase(std::unique(targets.begin(), targets.end()), targets.end()); + size_t num_targets = targets.size(); + + out << "Type " << int(group.type_tag().value()); + out << ": Pool=" << static_cast(group.pool_tag().value()); + out << ", width=" << group.width(); + out << ", #locations=" << num_locations; + out << ", #targets=" << num_targets; + if (num_targets > 0) { + double ratio = static_cast(num_locations) / num_targets; + out << " (ratio=" << base::StringPrintf("%.4f", ratio) << ")"; + } + out << std::endl; + + if (do_dump) { + auto refs = group.GetReader(disasm.get()); + + for (auto ref = refs->GetNext(); ref; ref = refs->GetNext()) { + out << " " << AsHex<8>(ref->location); + out << " " << AsHex<8>(ref->target) << std::endl; + } + } + } + + return status::kStatusSuccess; +} + +status::Code DetectAll(ConstBufferView image, + std::ostream& out, + std::vector* sub_image_list) { + DCHECK_NE(sub_image_list, nullptr); + sub_image_list->clear(); + + const size_t size = image.size(); + size_t last_out_pos = 0; + size_t total_bytes_found = 0; + + auto print_range = [&out](size_t pos, size_t size, const std::string& msg) { + out << "-- " << AsHex<8, size_t>(pos) << " +" << AsHex<8, size_t>(size) + << ": " << msg << std::endl; + }; + + ElementFinder finder(image, + base::BindRepeating(DetectElementFromDisassembler)); + for (auto element = finder.GetNext(); element.has_value(); + element = finder.GetNext()) { + ConstBufferView sub_image = image[element->region()]; + sub_image_list->push_back(sub_image); + size_t pos = sub_image.begin() - image.begin(); + size_t prog_size = sub_image.size(); + if (last_out_pos < pos) + print_range(last_out_pos, pos - last_out_pos, "?"); + auto disasm = MakeDisassemblerOfType(sub_image, element->exe_type); + print_range(pos, prog_size, disasm->GetExeTypeString()); + total_bytes_found += prog_size; + last_out_pos = pos + prog_size; + } + if (last_out_pos < size) + print_range(last_out_pos, size - last_out_pos, "?"); + out << std::endl; + + // Print summary, using decimal instead of hexadecimal. + out << "Detected " << total_bytes_found << "/" << size << " bytes => "; + double percent = total_bytes_found * 100.0 / size; + out << base::StringPrintf("%.2f", percent) << "%." << std::endl; + + return status::kStatusSuccess; +} + +status::Code MatchAll(ConstBufferView old_image, + ConstBufferView new_image, + std::ostream& out) { + HeuristicEnsembleMatcher matcher(&out); + if (!matcher.RunMatch(old_image, new_image)) { + out << "RunMatch() failed."; + return status::kStatusFatal; + } + out << "Found " << matcher.matches().size() << " nontrivial matches and " + << matcher.num_identical() << " identical matches." << std::endl; + + return status::kStatusSuccess; +} + +} // namespace zucchini diff --git a/zucchini_tools.h b/zucchini_tools.h new file mode 100644 index 0000000..6268745 --- /dev/null +++ b/zucchini_tools.h @@ -0,0 +1,38 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_ZUCCHINI_TOOLS_H_ +#define COMPONENTS_ZUCCHINI_ZUCCHINI_TOOLS_H_ + +#include +#include + +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/zucchini.h" + +namespace zucchini { + +// The functions below are called to print diagnosis information, so outputs are +// printed using std::ostream instead of LOG(). + +// Prints stats on references found in |image|. If |do_dump| is true, then +// prints all references (locations and targets). +status::Code ReadReferences(ConstBufferView image, + bool do_dump, + std::ostream& out); + +// Prints regions and types of all detected executables in |image|. Appends +// detected subregions to |sub_image_list|. +status::Code DetectAll(ConstBufferView image, + std::ostream& out, + std::vector* sub_image_list); + +// Prints all matched regions from |old_image| to |new_image|. +status::Code MatchAll(ConstBufferView old_image, + ConstBufferView new_image, + std::ostream& out); + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_ZUCCHINI_TOOLS_H_ -- cgit v1.2.3