diff options
author | Tianjie <xunchang@google.com> | 2021-08-18 01:06:25 +0000 |
---|---|---|
committer | Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com> | 2021-08-18 01:06:25 +0000 |
commit | 45f3b91b74fbe1cfefb7576eaa3be167f91c9cd0 (patch) | |
tree | 53b4f7bd5a8f457ab0358cd7bc320c4778291445 | |
parent | dbee7e9d98c23beed6a03bb38fb007ba586c0e51 (diff) | |
parent | 08450315c3a4cc6ed94d0db406b53b98e1501322 (diff) | |
download | zucchini-45f3b91b74fbe1cfefb7576eaa3be167f91c9cd0.tar.gz |
Merge remote-tracking branch 'remotes/aosp/upstream-main' into clean am: fd73dddb6b am: 08450315c3
Original change: https://android-review.googlesource.com/c/platform/external/zucchini/+/1799387
Change-Id: I5f19b64147d90ca949afeb394d9cd6e456adb1ee
160 files changed, 28100 insertions, 0 deletions
diff --git a/BUILD.gn b/BUILD.gn new file mode 100644 index 0000000..54b06ab --- /dev/null +++ b/BUILD.gn @@ -0,0 +1,242 @@ +# Copyright 2017 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import("//build/buildflag_header.gni") +import("//chrome/process_version_rc_template.gni") +import("//testing/test.gni") + +buildflag_header("buildflags") { + header = "buildflags.h" + + # Disable DEX on Windows Official Builds. + _enable_dex = !(is_win && is_official_build) + _enable_elf = true + _enable_win = true + + # Disable ZTF (Zucchini Text Format) on official builds it is for testing only. + _enable_ztf = !is_official_build + flags = [ + "ENABLE_DEX=$_enable_dex", + "ENABLE_ELF=$_enable_elf", + "ENABLE_WIN=$_enable_win", + "ENABLE_ZTF=$_enable_ztf", + ] +} + +static_library("zucchini_lib") { + sources = [ + "abs32_utils.cc", + "abs32_utils.h", + "address_translator.cc", + "address_translator.h", + "algorithm.h", + "arm_utils.cc", + "arm_utils.h", + "binary_data_histogram.cc", + "binary_data_histogram.h", + "buffer_sink.cc", + "buffer_sink.h", + "buffer_source.cc", + "buffer_source.h", + "buffer_view.h", + "crc32.cc", + "crc32.h", + "disassembler.cc", + "disassembler.h", + "disassembler_dex.cc", + "disassembler_dex.h", + "disassembler_elf.cc", + "disassembler_elf.h", + "disassembler_no_op.cc", + "disassembler_no_op.h", + "disassembler_win32.cc", + "disassembler_win32.h", + "disassembler_ztf.cc", + "disassembler_ztf.h", + "element_detection.cc", + "element_detection.h", + "encoded_view.cc", + "encoded_view.h", + "ensemble_matcher.cc", + "ensemble_matcher.h", + "equivalence_map.cc", + "equivalence_map.h", + "heuristic_ensemble_matcher.cc", + "heuristic_ensemble_matcher.h", + "image_index.cc", + "image_index.h", + "image_utils.h", + "imposed_ensemble_matcher.cc", + "imposed_ensemble_matcher.h", + "io_utils.cc", + "io_utils.h", + "patch_reader.cc", + "patch_reader.h", + "patch_utils.h", + "patch_writer.cc", + "patch_writer.h", + "reference_bytes_mixer.cc", + "reference_bytes_mixer.h", + "reference_set.cc", + "reference_set.h", + "rel32_finder.cc", + "rel32_finder.h", + "rel32_utils.cc", + "rel32_utils.h", + "reloc_elf.cc", + "reloc_elf.h", + "reloc_win32.cc", + "reloc_win32.h", + "suffix_array.h", + "target_pool.cc", + "target_pool.h", + "targets_affinity.cc", + "targets_affinity.h", + "type_dex.h", + "type_elf.h", + "type_win_pe.h", + "typed_value.h", + "zucchini.h", + "zucchini_apply.cc", + "zucchini_apply.h", + "zucchini_gen.cc", + "zucchini_gen.h", + "zucchini_tools.cc", + "zucchini_tools.h", + ] + + deps = [ + ":buildflags", + "//base", + ] +} + +static_library("zucchini_io") { + sources = [ + "mapped_file.cc", + "mapped_file.h", + "zucchini_integration.cc", + "zucchini_integration.h", + ] + + deps = [ + ":zucchini_lib", + "//base", + ] +} + +executable("zucchini") { + sources = [ + "main_utils.cc", + "main_utils.h", + "zucchini_commands.cc", + "zucchini_commands.h", + "zucchini_main.cc", + ] + + deps = [ + ":zucchini_io", + ":zucchini_lib", + "//base", + ] + + if (is_win) { + deps += [ ":zucchini_exe_version" ] + } +} + +if (is_win) { + process_version_rc_template("zucchini_exe_version") { + template_file = "zucchini_exe_version.rc.version" + output = "$target_gen_dir/zucchini_exe_version.rc" + } +} + +test("zucchini_unittests") { + sources = [ + "abs32_utils_unittest.cc", + "address_translator_unittest.cc", + "algorithm_unittest.cc", + "arm_utils_unittest.cc", + "binary_data_histogram_unittest.cc", + "buffer_sink_unittest.cc", + "buffer_source_unittest.cc", + "buffer_view_unittest.cc", + "crc32_unittest.cc", + "disassembler_dex_unittest.cc", + "disassembler_elf_unittest.cc", + "disassembler_ztf_unittest.cc", + "element_detection_unittest.cc", + "encoded_view_unittest.cc", + "equivalence_map_unittest.cc", + "image_index_unittest.cc", + "image_utils_unittest.cc", + "imposed_ensemble_matcher_unittest.cc", + "io_utils_unittest.cc", + "mapped_file_unittest.cc", + "patch_read_write_unittest.cc", + "patch_utils_unittest.cc", + "reference_set_unittest.cc", + "rel32_finder_unittest.cc", + "rel32_utils_unittest.cc", + "reloc_elf_unittest.cc", + "reloc_win32_unittest.cc", + "suffix_array_unittest.cc", + "target_pool_unittest.cc", + "targets_affinity_unittest.cc", + "test_disassembler.cc", + "test_disassembler.h", + "test_reference_reader.cc", + "test_reference_reader.h", + "test_utils.cc", + "test_utils.h", + "typed_value_unittest.cc", + "zucchini_apply_unittest.cc", + "zucchini_gen_unittest.cc", + ] + + deps = [ + ":zucchini_io", + ":zucchini_lib", + "//base", + "//base/test:run_all_unittests", + "//base/test:test_support", + "//testing/gtest", + ] +} + +test("zucchini_integration_test") { + sources = [ "integration_test.cc" ] + + deps = [ + ":zucchini_lib", + "//base", + "//base/test:run_all_unittests", + "//base/test:test_support", + "//testing/gtest", + ] + + data = [ "testdata" ] +} + +# Group to build and depend on all the Zucchini related fuzzers. +group("zucchini_fuzzers") { + testonly = true + deps = [ + "//components/zucchini/fuzzers:zucchini_disassembler_dex_fuzzer", + "//components/zucchini/fuzzers:zucchini_disassembler_win32_fuzzer", + "//components/zucchini/fuzzers:zucchini_patch_fuzzer", + ] + + # Ensure protoc is available. + # Disabled on Windows due to crbug/844826. + if (current_toolchain == host_toolchain && !is_win) { + deps += [ + "//components/zucchini/fuzzers:zucchini_apply_fuzzer", + "//components/zucchini/fuzzers:zucchini_imposed_ensemble_matcher_fuzzer", + "//components/zucchini/fuzzers:zucchini_raw_gen_fuzzer", + "//components/zucchini/fuzzers:zucchini_ztf_gen_fuzzer", + ] + } +} diff --git a/DIR_METADATA b/DIR_METADATA new file mode 100644 index 0000000..03fc466 --- /dev/null +++ b/DIR_METADATA @@ -0,0 +1,3 @@ +monorail { + component: "Internals>Installer>Diff" +} diff --git a/README.md b/README.md new file mode 100644 index 0000000..d3fd0a1 --- /dev/null +++ b/README.md @@ -0,0 +1,280 @@ + +## Basic Definitions for Patching + +**Binary**: Executable image and data. Binaries may persist in an archive +(e.g., chrome.7z), and need to be periodically updated. Formats for binaries +include {PE files EXE / DLL, ELF, DEX}. Architectures binaries include +{x86, x64, ARM, AArch64, Dalvik}. A binary is also referred to as an executable +or an image file. + +**Patching**: Sending a "new" file to clients who have an "old" file by +computing and transmitting a "patch" that can be used to transform "old" into +"new". Patches are compressed for transmission. A key performance metric is +patch size, which refers to the size of compressed patch file. For our +experiments we use 7z. + +**Patch generation**: Computation of a "patch" from "old" and "new". This can be +expensive (e.g., ~15-20 min for Chrome, using 1 GB of RAM), but since patch +generation is a run-once step on the server-side when releasing "new" binaries, +the expense is not too critical. + +**Patch application**: Transformation from "old" binaries to "new", using a +(downloaded) "patch". This is executed on client side on updates, so resource +constraints (e.g., time, RAM, disk space) is more stringent. Also, fault- +tolerance is important. This is usually achieved by an update system by having +a fallback method of directly downloading "new" in case of patching failure. + +**Offset**: Position relative to the start of a file. + +**Local offset**: An offset relative to the start of a region of a file. + +**Element**: A region in a file with associated executable type, represented by +the tuple (exe_type, offset, length). Every Element in new file is associated +with an Element in old file and patched independently. + +**Reference**: A directed connection between two offsets in a binary. For +example, consider jump instructions in x86: + + 00401000: E9 3D 00 00 00 jmp 00401042 + +Here, the 4 bytes `[3D 00 00 00]` starting at address `00401001` point to +address `00401042` in memory. This forms a reference from `offset(00401001)` +(length 4) to `offset(00401042)`, where `offset(addr)` indicates the disk +offset corresponding to `addr`. A reference has a location, length (implicitly +determined by reference type), body, and target. + +**Location**: The starting offset of bytes that store a reference. In the +preceding example, `offset(00401001)` is a location. Each location is the +beginning of a reference body. + +**Body**: The span of bytes that encodes reference data, i.e., +[location, location + length) = +[location, location + 1, ..., location + length - 1]. +In the preceding example, `length = 4`, so the reference body is +`[00401001, 00401001 + 4) = [00401001, 00401002, 00401003, 00401004]`. +All reference bodies in an image must not overlap, and often regions boundaries +are required to not straddle a reference body. + +**Target**: The offset that's the destination of a reference. In the preceding +example, `offset(00401042)` is the target. Different references can share common +targets. For example, in + + 00401000: E9 3D 00 00 00 jmp 00401042 + 00401005: EB 3B jmp 00401042 + +we have two references with different locations and bodies, but same target +of `00401042`. + +Because the bytes that encode a reference depend on its target, and potentially +on its location, they are more likely to get modified from an old version of a +binary to a newer version. This is why "naive" patching does not do well on +binaries. + +**Target Key**: An alternative representation of a Target for a fixed pool, as its +index in the sorted list of Target offsets. Keys are useful since: + * Their numerical index are smaller than offsets, allowing more efficient + storage of target correction data in patch. + * They simplify association from Targets to Labels. + +**Disassembler**: Architecture specific data and operations, used to extract and +correct references in a binary. + +**Type of reference**: The type of a reference determines the binary +representation used to encode its target. This affects how references are parsed +and written by a disassembler. There can be many types of references in the same +binary. + +A reference is represented by the tuple (disassembler, location, target, type). +This tuple contains sufficient information to write the reference in a binary. + +**Pool of targets**: Collection of targets that is assumed to have some semantic +relationship. Each reference type belong to exactly one reference pool. Targets +for references in the same pool are shared. + +For example, the following describes two pools defined for Dalvik Executable +format (DEX). Both pools spawn multiple types of references. + +1. Index in string table. + - From bytecode to string index using 16 bits. + - From bytecode to string index using 32 bits. + - From field item to string index using 32 bits. +2. Address in code. + - Relative 16 bits pointer. + - Relative 32 bits pointer. + +Boundaries between different pools can be ambiguous. Having all targets belong +to the same pool can reduce redundancy, but will use more memory and might +cause larger corrections to happen, so this is a trade-off that can be resolved +with benchmarks. + +**Abs32 references**: References whose targets are adjusted by the OS during +program load. In an image, a **relocation table** typically provides locations +of abs32 references. At each abs32 location, the stored bytes then encode +semantic information about the target (e.g., as RVA). + +**Rel32 references**: References embedded within machine code, in which targets +are encoded as some delta relative to the reference's location. Typical examples +of rel32 references are branching instructions and instruction pointer-relative +memory access. + +**Equivalence**: A (src_offset, dst_offset, length) tuple describing a region of +"old" binary, at an offset of |src_offset|, that is similar to a region of "new" +binary, at an offset of |dst_offset|. + +**Raw delta unit**: Describes a raw modification to apply on the new image, as a +pair (copy_offset, diff), where copy_offset describes the position in new file +as an offset in the data that was copied from the old file, and diff is the +bytewise difference to apply. + +**Associated Targets**: A target in "old" binary is associated with a target in +"new" binary if both targets: +1. are part of similar regions from the same equivalence, and +2. have the same local offset (relative to respective start regions), and +3. are not part of any larger region from a different equivalence. +Not all targets are necessarily associated with another target. + +**Target Affinity**: Level of confidence in the association between two targets. +The affinity between targets that are potentially associated is measured based +on surrounding content, as well as reference type. + +**Label**: An integer assigned for each Target in "old" and "new" binary as part +of generating a patch, and used to alias targets when searching for similar +regions that will form equivalences. Labels are assigned such that +associated targets in old and new binaries share the same Label. Unmatched +Targets have a Label of 0. For example, given + * "Old" targets = [0x1111, 0x3333, 0x5555, 0x7777], + * "New" targets = [0x2222, 0x4444, 0x6666, 0x8888], +to represent matchings 0x1111 <=> 0x6666, 0x3333 <=> 0x2222, we'd assign + * Label 1 to 0x1111 (in "old") and 0x6666 (in "new"), + * Label 2 to 0x3333 (in "old") and 0x2222 (in "new"). + Represented as arrays indexed over Target Keys, we'd have: + * "Old" labels = [1, 2, 0 ,0], + * "New" labels = [2, 0, 1, 0]. + +**Encoded Image**: The result of projecting the content of an image to scalar +values that describe content on a higher level of abstraction, masking away +undesirable noise in raw content. Notably, the projection encodes references +based on their associated label. + +## Interfaces + +zucchini_lib: Core Zucchini library that operate on buffers to generate and +apply patches. + +zucchini_io: Wrapper on zucchini_lib that handles file I/O, using memory-mapped +I/O to interface with zucchini_lib. + +zucchini: Stand-alone executable that parses command-line arguments, and passes +the results to zucchini_io. Also implements various helper flows. + +## Zucchini Ensemble Patch Format + +### Types + +**int8**: 8-bit unsigned int. + +**uint32**: 32-bit unsigned int, little-endian. + +**int32**: 32-bit signed int, little-endian. + +**Varints**: This is a generic variable-length encoding for integer quantities +that strips away leading (most-significant) null bytes. +The Varints format is borrowed from protocol-buffers, see +[documentation](https://developers.google.com/protocol-buffers/docs/encoding#varints) +for more info. + +**varuint32**: A uint32 encoded using Varints format. + +**varint32**: A int32 encoded using Varints format. + +### File Layout + +Name | Format | Description +--- | --- | --- +header | PatchHeader | The header. +elements_count | uint32 | Number of patch units. +elements | PatchElement[elements_count] | List of all patch elements. + +Position of elements in new file is ascending. + +### Structures + +**PatchHeader** + +Name | Format | Description +--- | --- | --- +magic | uint32 = kMagic | Magic value. +old_size | uint32 | Size of old file in bytes. +old_crc | uint32 | CRC32 of old file. +new_size | uint32 | Size of new file in bytes. +new_crc | uint32 | CRC32 of new file. + +**kMagic** == `'Z' | ('u' << 8) | ('c' << 16)` + +**PatchElement** +Contains all the information required to produce a single element in new file. + +Name | Format | Description +--- | --- | --- +header | PatchElementHeader | The header. +equivalences | EquivalenceList | List of equivalences. +raw_deltas | RawDeltaList | List of raw deltas. +reference_deltas | ReferenceDeltaList | List of reference deltas. +pool_count | uint32 | Number of pools. +extra_targets | ExtraTargetList[pool_count] | Lists of extra targets. + +**PatchElementHeader** +Describes a correspondence between an element in old and in new files. Some +redundancy arise from storing |new_offset|, but it is necessary to make +PatchElement self contained. + +Name | Format | Description +--- | --- | --- +old_offset | uint32 | Starting offset of the element in old file. +old_length | uint32 | Length of the element in old file. +new_offset | uint32 | Starting offset of the element in new file. +new_length | uint32 | Length of the element in new file. +exe_type | uint32 | Executable type for this unit, see `enum ExecutableType`. + +**EquivalenceList** +Encodes a list of equivalences, where dst offsets (in new image) are ascending. + +Name | Format | Description +--- | --- | --- +src_skip | Buffer<varint32> | Src offset for each equivalence, delta encoded. +dst_skip | Buffer<varuint32> | Dst offset for each equivalence, delta encoded. +copy_count | Buffer<varuint32> | Length for each equivalence. + +**RawDeltaList** +Encodes a list of raw delta units, with ascending copy offsets. + +Name | Format | Description +--- | --- | --- +raw_delta_skip | Buffer<varuint32> | Copy offset for each delta unit, delta encoded and biased by -1. +raw_delta_diff | Buffer<int8> | Bytewise difference for each delta unit. + +**ReferenceDeltaList** +Encodes a list of reference deltas, in the order they appear in the new +image file. A reference delta is a signed integer representing a jump through a +list of targets. + +Name | Format | Description +--- | --- | --- +reference_delta | Buffer<varuint32> | Vector of reference deltas. + +**ExtraTargetList** +Encodes a list of additional targets in the new image file, in ascending +order. + +Name | Format | Description +--- | --- | --- +pool_tag | uint8_t | Unique identifier for this pool of targets. +extra_targets | Buffer<varuint32> | Additional targets, delta encoded and biased by -1. + +**Buffer<T>** +A generic vector of data. + +Name | Format | Description +--- | --- | --- +size |uint32 | Size of content in bytes. +content |T[] | List of integers. diff --git a/abs32_utils.cc b/abs32_utils.cc new file mode 100644 index 0000000..ad1c85e --- /dev/null +++ b/abs32_utils.cc @@ -0,0 +1,211 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/abs32_utils.h" + +#include <algorithm> +#include <type_traits> +#include <utility> + +#include "base/check_op.h" +#include "components/zucchini/io_utils.h" + +namespace zucchini { + +namespace { + +// Templated helper for AbsoluteAddress::Read(). +template <typename T> +bool ReadAbs(ConstBufferView image, offset_t offset, uint64_t* value) { + static_assert(std::is_unsigned<T>::value, "Value type must be unsigned."); + if (!image.can_access<T>(offset)) + return false; + *value = static_cast<uint64_t>(image.read<T>(offset)); + return true; +} + +// Templated helper for AbsoluteAddress::Write(). +template <typename T> +bool WriteAbs(offset_t offset, T value, MutableBufferView* image) { + static_assert(std::is_unsigned<T>::value, "Value type must be unsigned."); + if (!image->can_access<T>(offset)) + return false; + image->write<T>(offset, value); + return true; +} + +} // namespace + +/******** AbsoluteAddress ********/ + +AbsoluteAddress::AbsoluteAddress(Bitness bitness, uint64_t image_base) + : bitness_(bitness), image_base_(image_base), value_(image_base) { + CHECK(bitness_ == kBit64 || image_base_ < 0x100000000ULL); +} + +AbsoluteAddress::AbsoluteAddress(AbsoluteAddress&&) = default; + +AbsoluteAddress::~AbsoluteAddress() = default; + +bool AbsoluteAddress::FromRva(rva_t rva) { + if (rva >= kRvaBound) + return false; + uint64_t value = image_base_ + rva; + // Check overflow, which manifests as |value| "wrapping around", resulting in + // |value| less than |image_base_| (preprocessing needed for 32-bit). + if (((bitness_ == kBit32) ? (value & 0xFFFFFFFFU) : value) < image_base_) + return false; + value_ = value; + return true; +} + +rva_t AbsoluteAddress::ToRva() const { + if (value_ < image_base_) + return kInvalidRva; + uint64_t raw_rva = value_ - image_base_; + if (raw_rva >= kRvaBound) + return kInvalidRva; + return static_cast<rva_t>(raw_rva); +} + +bool AbsoluteAddress::Read(offset_t offset, const ConstBufferView& image) { + // Read raw data; |value_| is not guaranteed to represent a valid RVA. + if (bitness_ == kBit32) + return ReadAbs<uint32_t>(image, offset, &value_); + DCHECK_EQ(kBit64, bitness_); + return ReadAbs<uint64_t>(image, offset, &value_); +} + +bool AbsoluteAddress::Write(offset_t offset, MutableBufferView* image) { + if (bitness_ == kBit32) + return WriteAbs<uint32_t>(offset, static_cast<uint32_t>(value_), image); + DCHECK_EQ(kBit64, bitness_); + return WriteAbs<uint64_t>(offset, value_, image); +} + +/******** Abs32RvaExtractorWin32 ********/ + +Abs32RvaExtractorWin32::Abs32RvaExtractorWin32( + ConstBufferView image, + AbsoluteAddress&& addr, + const std::vector<offset_t>& abs32_locations, + offset_t lo, + offset_t hi) + : image_(image), addr_(std::move(addr)) { + CHECK_LE(lo, hi); + auto find_and_check = [this](const std::vector<offset_t>& locations, + offset_t offset) { + auto it = std::lower_bound(locations.begin(), locations.end(), offset); + // Ensure that |offset| does not straddle a reference body. + CHECK(it == locations.begin() || offset - *(it - 1) >= addr_.width()); + return it; + }; + cur_abs32_ = find_and_check(abs32_locations, lo); + end_abs32_ = find_and_check(abs32_locations, hi); +} + +Abs32RvaExtractorWin32::Abs32RvaExtractorWin32(Abs32RvaExtractorWin32&&) = + default; + +Abs32RvaExtractorWin32::~Abs32RvaExtractorWin32() = default; + +absl::optional<Abs32RvaExtractorWin32::Unit> Abs32RvaExtractorWin32::GetNext() { + while (cur_abs32_ < end_abs32_) { + offset_t location = *(cur_abs32_++); + if (!addr_.Read(location, image_)) + continue; + rva_t target_rva = addr_.ToRva(); + if (target_rva == kInvalidRva) + continue; + return Unit{location, target_rva}; + } + return absl::nullopt; +} + +/******** Abs32ReaderWin32 ********/ + +Abs32ReaderWin32::Abs32ReaderWin32(Abs32RvaExtractorWin32&& abs32_rva_extractor, + const AddressTranslator& translator) + : abs32_rva_extractor_(std::move(abs32_rva_extractor)), + target_rva_to_offset_(translator) {} + +Abs32ReaderWin32::~Abs32ReaderWin32() = default; + +absl::optional<Reference> Abs32ReaderWin32::GetNext() { + for (auto unit = abs32_rva_extractor_.GetNext(); unit.has_value(); + unit = abs32_rva_extractor_.GetNext()) { + offset_t location = unit->location; + offset_t unsafe_target = target_rva_to_offset_.Convert(unit->target_rva); + if (unsafe_target != kInvalidOffset) + return Reference{location, unsafe_target}; + } + return absl::nullopt; +} + +/******** Abs32WriterWin32 ********/ + +Abs32WriterWin32::Abs32WriterWin32(MutableBufferView image, + AbsoluteAddress&& addr, + const AddressTranslator& translator) + : image_(image), + addr_(std::move(addr)), + target_offset_to_rva_(translator) {} + +Abs32WriterWin32::~Abs32WriterWin32() = default; + +void Abs32WriterWin32::PutNext(Reference ref) { + rva_t target_rva = target_offset_to_rva_.Convert(ref.target); + if (target_rva != kInvalidRva) { + addr_.FromRva(target_rva); + addr_.Write(ref.location, &image_); + } +} + +/******** Exported Functions ********/ + +size_t RemoveUntranslatableAbs32(ConstBufferView image, + AbsoluteAddress&& addr, + const AddressTranslator& translator, + std::vector<offset_t>* locations) { + AddressTranslator::RvaToOffsetCache target_rva_checker(translator); + Abs32RvaExtractorWin32 extractor(image, std::move(addr), *locations, 0, + image.size()); + Abs32ReaderWin32 reader(std::move(extractor), translator); + std::vector<offset_t>::iterator write_it = locations->begin(); + // |reader| reads |locations| while |write_it| modifies it. However, there's + // no conflict since read occurs before write, and can skip ahead. + for (auto ref = reader.GetNext(); ref.has_value(); ref = reader.GetNext()) + *(write_it++) = ref->location; + DCHECK(write_it <= locations->end()); + size_t num_removed = locations->end() - write_it; + locations->erase(write_it, locations->end()); + return num_removed; +} + +size_t RemoveOverlappingAbs32Locations(uint32_t width, + std::vector<offset_t>* locations) { + if (locations->size() <= 1) + return 0; + + auto slow = locations->begin(); + auto fast = locations->begin() + 1; + for (;;) { + // Find next good location. + while (fast != locations->end() && *fast - *slow < width) + ++fast; + // Advance |slow|. For the last iteration this becomes the new sentinel. + ++slow; + if (fast == locations->end()) + break; + // Compactify good locations (potentially overwrite bad locations). + if (slow != fast) + *slow = *fast; + ++fast; + } + size_t num_removed = locations->end() - slow; + locations->erase(slow, locations->end()); + return num_removed; +} + +} // namespace zucchini diff --git a/abs32_utils.h b/abs32_utils.h new file mode 100644 index 0000000..07503b5 --- /dev/null +++ b/abs32_utils.h @@ -0,0 +1,142 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_ABS32_UTILS_H_ +#define COMPONENTS_ZUCCHINI_ABS32_UTILS_H_ + +#include <stddef.h> +#include <stdint.h> + +#include <vector> + +#include "components/zucchini/address_translator.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/image_utils.h" +#include "third_party/abseil-cpp/absl/types/optional.h" + +namespace zucchini { + +// A class to represent an abs32 address (32-bit or 64-bit). Accessors are +// provided to translate from / to RVA, and to read / write the represented +// abs32 address from / to an image. +class AbsoluteAddress { + public: + AbsoluteAddress(Bitness bitness, uint64_t image_base); + AbsoluteAddress(AbsoluteAddress&&); + ~AbsoluteAddress(); + + // Attempts to translate |rva| to an abs32 address. On success, assigns + // |value_| to the result and returns true. On failure (invalid |rva| or + // overflow), returns false. + bool FromRva(rva_t rva); + + // Returns the RVA for |value_|, or |kInvalidRva| if the represented value + // address does not correspond to a valid RVA. + rva_t ToRva() const; + + // Attempts to read the abs32 address at |image[offset]| into |value_|. On + // success, updates |value_| and returns true. On failure (invalid |offset|), + // returns false. + bool Read(offset_t offset, const ConstBufferView& image); + + // Attempts to write |value_| to to |(*image)[offset]|. On success, performs + // the write and returns true. On failure (invalid |offset|), returns false. + bool Write(offset_t offset, MutableBufferView* image); + + uint32_t width() const { return WidthOf(bitness_); } + + // Exposing |value_| for testing. + uint64_t* mutable_value() { return &value_; } + + private: + const Bitness bitness_; + const uint64_t image_base_; // Accommodates 32-bit and 64-bit. + uint64_t value_; // Accommodates 32-bit and 64-bit. +}; + +// A class to extract Win32 abs32 references from |abs32_locations| within +// |image_| bounded by |[lo, hi)|. GetNext() is used to successively return +// data as Units, which are locations and (potentially out-of-bound) RVAs. +// |addr| determines the bitness of abs32 values stored, and mediates all reads. +class Abs32RvaExtractorWin32 { + public: + struct Unit { + offset_t location; + rva_t target_rva; + }; + + // Requires |lo| <= |hi|, and they must not straddle a reference body (with + // length |addr.width()|) in |abs32_locations|. + Abs32RvaExtractorWin32(ConstBufferView image, + AbsoluteAddress&& addr, + const std::vector<offset_t>& abs32_locations, + offset_t lo, + offset_t hi); + Abs32RvaExtractorWin32(Abs32RvaExtractorWin32&&); + ~Abs32RvaExtractorWin32(); + + // Visits given abs32 locations, rejects invalid locations and non-existent + // RVAs, and returns reference as Unit, or absl::nullopt on completion. + absl::optional<Unit> GetNext(); + + private: + ConstBufferView image_; + AbsoluteAddress addr_; + std::vector<offset_t>::const_iterator cur_abs32_; + std::vector<offset_t>::const_iterator end_abs32_; +}; + +// A reader for Win32 abs32 references that filters and translates results from +// |abs32_rva_extractor_|. +class Abs32ReaderWin32 : public ReferenceReader { + public: + Abs32ReaderWin32(Abs32RvaExtractorWin32&& abs32_rva_extractor, + const AddressTranslator& translator); + Abs32ReaderWin32(const Abs32ReaderWin32&) = delete; + const Abs32ReaderWin32& operator=(const Abs32ReaderWin32&) = delete; + ~Abs32ReaderWin32() override; + + // ReferenceReader: + absl::optional<Reference> GetNext() override; + + private: + Abs32RvaExtractorWin32 abs32_rva_extractor_; + AddressTranslator::RvaToOffsetCache target_rva_to_offset_; +}; + +// A writer for Win32 abs32 references. |addr| determines the bitness of the +// abs32 values stored, and mediates all writes. +class Abs32WriterWin32 : public ReferenceWriter { + public: + Abs32WriterWin32(MutableBufferView image, + AbsoluteAddress&& addr, + const AddressTranslator& translator); + Abs32WriterWin32(const Abs32WriterWin32&) = delete; + const Abs32WriterWin32& operator=(const Abs32WriterWin32&) = delete; + ~Abs32WriterWin32() override; + + // ReferenceWriter: + void PutNext(Reference ref) override; + + private: + MutableBufferView image_; + AbsoluteAddress addr_; + AddressTranslator::OffsetToRvaCache target_offset_to_rva_; +}; + +// Given a list of abs32 |locations|, removes all elements whose targets cannot +// be translated. Returns the number of elements removed. +size_t RemoveUntranslatableAbs32(ConstBufferView image, + AbsoluteAddress&& addr, + const AddressTranslator& translator, + std::vector<offset_t>* locations); + +// Given a sorted list of abs32 |locations|, removes all elements whose body +// (with |width| given) overlaps with the body of a previous element. +size_t RemoveOverlappingAbs32Locations(uint32_t width, + std::vector<offset_t>* locations); + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_ABS32_UTILS_H_ diff --git a/abs32_utils_unittest.cc b/abs32_utils_unittest.cc new file mode 100644 index 0000000..ddbb685 --- /dev/null +++ b/abs32_utils_unittest.cc @@ -0,0 +1,543 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/abs32_utils.h" + +#include <stdint.h> + +#include <algorithm> +#include <string> +#include <utility> + +#include "base/numerics/safe_conversions.h" +#include "components/zucchini/address_translator.h" +#include "components/zucchini/image_utils.h" +#include "components/zucchini/test_utils.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +namespace { + +// A trivial AddressTranslator that applies constant shift. +class TestAddressTranslator : public AddressTranslator { + public: + TestAddressTranslator(size_t image_size, rva_t rva_begin) { + DCHECK_GE(rva_begin, 0U); + CHECK_EQ(AddressTranslator::kSuccess, + Initialize({{0, base::checked_cast<offset_t>(image_size), + rva_begin, base::checked_cast<rva_t>(image_size)}})); + } +}; + +// Helper to translate address |value| to RVA. May return |kInvalidRva|. +rva_t AddrValueToRva(uint64_t value, AbsoluteAddress* addr) { + *addr->mutable_value() = value; + return addr->ToRva(); +} + +} // namespace + +TEST(Abs32UtilsTest, AbsoluteAddress32) { + std::vector<uint8_t> data32 = ParseHexString( + "00 00 32 00 21 43 65 4A 00 00 00 00 FF FF FF FF FF FF 31 00"); + ConstBufferView image32(data32.data(), data32.size()); + MutableBufferView mutable_image32(data32.data(), data32.size()); + + AbsoluteAddress addr32(kBit32, 0x00320000U); + EXPECT_TRUE(addr32.Read(0x0U, image32)); + EXPECT_EQ(0x00000000U, addr32.ToRva()); + EXPECT_TRUE(addr32.Read(0x4U, image32)); + EXPECT_EQ(0x4A334321U, addr32.ToRva()); + EXPECT_TRUE(addr32.Read(0x8U, image32)); + EXPECT_EQ(kInvalidRva, addr32.ToRva()); // Underflow. + EXPECT_TRUE(addr32.Read(0xCU, image32)); + EXPECT_EQ(kInvalidRva, addr32.ToRva()); // Translated RVA would be too large. + EXPECT_TRUE(addr32.Read(0x10U, image32)); + EXPECT_EQ(kInvalidRva, addr32.ToRva()); // Underflow (boundary case). + + EXPECT_FALSE(addr32.Read(0x11U, image32)); + EXPECT_FALSE(addr32.Read(0x14U, image32)); + EXPECT_FALSE(addr32.Read(0x100000U, image32)); + EXPECT_FALSE(addr32.Read(0x80000000U, image32)); + EXPECT_FALSE(addr32.Read(0xFFFFFFFFU, image32)); + + EXPECT_TRUE(addr32.FromRva(0x11223344U)); + EXPECT_TRUE(addr32.Write(0x2U, &mutable_image32)); + EXPECT_TRUE(addr32.Write(0x10U, &mutable_image32)); + std::vector<uint8_t> expected_data32 = ParseHexString( + "00 00 44 33 54 11 65 4A 00 00 00 00 FF FF FF FF 44 33 54 11"); + EXPECT_EQ(expected_data32, data32); + EXPECT_FALSE(addr32.Write(0x11U, &mutable_image32)); + EXPECT_FALSE(addr32.Write(0xFFFFFFFFU, &mutable_image32)); + EXPECT_EQ(expected_data32, data32); +} + +TEST(Abs32UtilsTest, AbsoluteAddress32Overflow) { + AbsoluteAddress addr32(kBit32, 0xC0000000U); + EXPECT_TRUE(addr32.FromRva(0x00000000U)); + EXPECT_TRUE(addr32.FromRva(0x11223344U)); + EXPECT_TRUE(addr32.FromRva(0x3FFFFFFFU)); + EXPECT_FALSE(addr32.FromRva(0x40000000U)); + EXPECT_FALSE(addr32.FromRva(0x40000001U)); + EXPECT_FALSE(addr32.FromRva(0x80000000U)); + EXPECT_FALSE(addr32.FromRva(0xFFFFFFFFU)); + + EXPECT_EQ(0x00000000U, AddrValueToRva(0xC0000000U, &addr32)); + EXPECT_EQ(kInvalidRva, AddrValueToRva(0xBFFFFFFFU, &addr32)); + EXPECT_EQ(kInvalidRva, AddrValueToRva(0x00000000U, &addr32)); + EXPECT_EQ(0x3FFFFFFFU, AddrValueToRva(0xFFFFFFFFU, &addr32)); +} + +TEST(Abs32UtilsTest, AbsoluteAddress64) { + std::vector<uint8_t> data64 = ParseHexString( + "00 00 00 00 64 00 00 00 21 43 65 4A 64 00 00 00 " + "00 00 00 00 00 00 00 00 FF FF FF FF FF FF FF FF " + "00 00 00 00 64 00 00 80 FF FF FF FF 63 00 00 00"); + ConstBufferView image64(data64.data(), data64.size()); + MutableBufferView mutable_image64(data64.data(), data64.size()); + + AbsoluteAddress addr64(kBit64, 0x0000006400000000ULL); + EXPECT_TRUE(addr64.Read(0x0U, image64)); + EXPECT_EQ(0x00000000U, addr64.ToRva()); + EXPECT_TRUE(addr64.Read(0x8U, image64)); + EXPECT_EQ(0x4A654321U, addr64.ToRva()); + EXPECT_TRUE(addr64.Read(0x10U, image64)); // Succeeds, in spite of value. + EXPECT_EQ(kInvalidRva, addr64.ToRva()); // Underflow. + EXPECT_TRUE(addr64.Read(0x18U, image64)); + EXPECT_EQ(kInvalidRva, addr64.ToRva()); // Translated RVA too large. + EXPECT_TRUE(addr64.Read(0x20U, image64)); + EXPECT_EQ(kInvalidRva, addr64.ToRva()); // Translated RVA toolarge. + EXPECT_TRUE(addr64.Read(0x28U, image64)); + EXPECT_EQ(kInvalidRva, addr64.ToRva()); // Underflow. + + EXPECT_FALSE(addr64.Read(0x29U, image64)); // Extends outside. + EXPECT_FALSE(addr64.Read(0x30U, image64)); // Entirely outside (note: hex). + EXPECT_FALSE(addr64.Read(0x100000U, image64)); + EXPECT_FALSE(addr64.Read(0x80000000U, image64)); + EXPECT_FALSE(addr64.Read(0xFFFFFFFFU, image64)); + + EXPECT_TRUE(addr64.FromRva(0x11223344U)); + EXPECT_TRUE(addr64.Write(0x13U, &mutable_image64)); + EXPECT_TRUE(addr64.Write(0x20U, &mutable_image64)); + std::vector<uint8_t> expected_data64 = ParseHexString( + "00 00 00 00 64 00 00 00 21 43 65 4A 64 00 00 00 " + "00 00 00 44 33 22 11 64 00 00 00 FF FF FF FF FF " + "44 33 22 11 64 00 00 00 FF FF FF FF 63 00 00 00"); + EXPECT_EQ(expected_data64, data64); + EXPECT_FALSE(addr64.Write(0x29U, &mutable_image64)); + EXPECT_FALSE(addr64.Write(0x30U, &mutable_image64)); + EXPECT_FALSE(addr64.Write(0xFFFFFFFFU, &mutable_image64)); + EXPECT_EQ(expected_data64, data64); + + EXPECT_FALSE(addr64.FromRva(0xFFFFFFFFU)); +} + +TEST(Abs32UtilsTest, AbsoluteAddress64Overflow) { + { + // Counterpart to AbsoluteAddress632verflow test. + AbsoluteAddress addr64(kBit64, 0xFFFFFFFFC0000000ULL); + EXPECT_TRUE(addr64.FromRva(0x00000000U)); + EXPECT_TRUE(addr64.FromRva(0x11223344U)); + EXPECT_TRUE(addr64.FromRva(0x3FFFFFFFU)); + EXPECT_FALSE(addr64.FromRva(0x40000000U)); + EXPECT_FALSE(addr64.FromRva(0x40000001U)); + EXPECT_FALSE(addr64.FromRva(0x80000000U)); + EXPECT_FALSE(addr64.FromRva(0xFFFFFFFFU)); + + EXPECT_EQ(0x00000000U, AddrValueToRva(0xFFFFFFFFC0000000U, &addr64)); + EXPECT_EQ(kInvalidRva, AddrValueToRva(0xFFFFFFFFBFFFFFFFU, &addr64)); + EXPECT_EQ(kInvalidRva, AddrValueToRva(0x0000000000000000U, &addr64)); + EXPECT_EQ(kInvalidRva, AddrValueToRva(0xFFFFFFFF00000000U, &addr64)); + EXPECT_EQ(0x3FFFFFFFU, AddrValueToRva(0xFFFFFFFFFFFFFFFFU, &addr64)); + } + { + // Pseudo-counterpart to AbsoluteAddress632verflow test: Some now pass. + AbsoluteAddress addr64(kBit64, 0xC0000000U); + EXPECT_TRUE(addr64.FromRva(0x00000000U)); + EXPECT_TRUE(addr64.FromRva(0x11223344U)); + EXPECT_TRUE(addr64.FromRva(0x3FFFFFFFU)); + EXPECT_TRUE(addr64.FromRva(0x40000000U)); + EXPECT_TRUE(addr64.FromRva(0x40000001U)); + EXPECT_FALSE(addr64.FromRva(0x80000000U)); + EXPECT_FALSE(addr64.FromRva(0xFFFFFFFFU)); + + // ToRva() still fail though. + EXPECT_EQ(0x00000000U, AddrValueToRva(0xC0000000U, &addr64)); + EXPECT_EQ(kInvalidRva, AddrValueToRva(0xBFFFFFFFU, &addr64)); + EXPECT_EQ(kInvalidRva, AddrValueToRva(0x00000000U, &addr64)); + EXPECT_EQ(0x3FFFFFFFU, AddrValueToRva(0xFFFFFFFFU, &addr64)); + } + { + AbsoluteAddress addr64(kBit64, 0xC000000000000000ULL); + EXPECT_TRUE(addr64.FromRva(0x00000000ULL)); + EXPECT_TRUE(addr64.FromRva(0x11223344ULL)); + EXPECT_TRUE(addr64.FromRva(0x3FFFFFFFULL)); + EXPECT_TRUE(addr64.FromRva(0x40000000ULL)); + EXPECT_TRUE(addr64.FromRva(0x40000001ULL)); + EXPECT_FALSE(addr64.FromRva(0x80000000ULL)); + EXPECT_FALSE(addr64.FromRva(0xFFFFFFFFULL)); + + EXPECT_EQ(0x00000000U, AddrValueToRva(0xC000000000000000ULL, &addr64)); + EXPECT_EQ(kInvalidRva, AddrValueToRva(0xBFFFFFFFFFFFFFFFULL, &addr64)); + EXPECT_EQ(kInvalidRva, AddrValueToRva(0x0000000000000000ULL, &addr64)); + EXPECT_EQ(0x3FFFFFFFU, AddrValueToRva(0xC00000003FFFFFFFULL, &addr64)); + EXPECT_EQ(kInvalidRva, AddrValueToRva(0xFFFFFFFFFFFFFFFFULL, &addr64)); + } +} + +TEST(Abs32UtilsTest, Win32Read32) { + constexpr uint32_t kImageBase = 0xA0000000U; + constexpr uint32_t kRvaBegin = 0x00C00000U; + struct { + std::vector<uint8_t> data32; + std::vector<offset_t> abs32_locations; // Assumtion: Sorted. + offset_t lo; // Assumption: In range, does not straddle |abs32_location|. + offset_t hi; // Assumption: Also >= |lo|. + std::vector<Reference> expected_refs; + } test_cases[] = { + // Targets at beginning and end. + {ParseHexString("FF FF FF FF 0F 00 C0 A0 00 00 C0 A0 FF FF FF FF"), + {0x4U, 0x8U}, + 0x0U, + 0x10U, + {{0x4U, 0xFU}, {0x8U, 0x0U}}}, + // Targets at beginning and end are out of bound: Rejected. + {ParseHexString("FF FF FF FF 10 00 C0 A0 FF FF BF A0 FF FF FF FF"), + {0x4U, 0x8U}, + 0x0U, + 0x10U, + std::vector<Reference>()}, + // Same with more extreme target values: Rejected. + {ParseHexString("FF FF FF FF FF FF FF FF 00 00 00 00 FF FF FF FF"), + {0x4U, 0x8U}, + 0x0U, + 0x10U, + std::vector<Reference>()}, + // Locations at beginning and end, plus invalid locations. + {ParseHexString("08 00 C0 A0 FF FF FF FF FF FF FF FF 04 00 C0 A0"), + {0x0U, 0xCU, 0x10U, 0x1000U, 0x80000000U, 0xFFFFFFFFU}, + 0x0U, + 0x10U, + {{0x0U, 0x8U}, {0xCU, 0x4U}}}, + // Odd size, location, target. + {ParseHexString("FF FF FF 09 00 C0 A0 FF FF FF FF FF FF FF FF FF " + "FF FF FF"), + {0x3U}, + 0x0U, + 0x13U, + {{0x3U, 0x9U}}}, + // No location given. + {ParseHexString("FF FF FF FF 0C 00 C0 A0 00 00 C0 A0 FF FF FF FF"), + std::vector<offset_t>(), 0x0U, 0x10U, std::vector<Reference>()}, + // Simple alternation. + {ParseHexString("04 00 C0 A0 FF FF FF FF 0C 00 C0 A0 FF FF FF FF " + "14 00 C0 A0 FF FF FF FF 1C 00 C0 A0 FF FF FF FF"), + {0x0U, 0x8U, 0x10U, 0x18U}, + 0x0U, + 0x20U, + {{0x0U, 0x4U}, {0x8U, 0xCU}, {0x10U, 0x14U}, {0x18U, 0x1CU}}}, + // Same, with locations limited by |lo| and |hi|. By assumption these must + // not cut accross Reference body. + {ParseHexString("04 00 C0 A0 FF FF FF FF 0C 00 C0 A0 FF FF FF FF " + "14 00 C0 A0 FF FF FF FF 1C 00 C0 A0 FF FF FF FF"), + {0x0U, 0x8U, 0x10U, 0x18U}, + 0x04U, + 0x17U, + {{0x8U, 0xCU}, {0x10U, 0x14U}}}, + // Same, with very limiting |lo| and |hi|. + {ParseHexString("04 00 C0 A0 FF FF FF FF 0C 00 C0 A0 FF FF FF FF " + "14 00 C0 A0 FF FF FF FF 1C 00 C0 A0 FF FF FF FF"), + {0x0U, 0x8U, 0x10U, 0x18U}, + 0x0CU, + 0x10U, + std::vector<Reference>()}, + // Same, |lo| == |hi|. + {ParseHexString("04 00 C0 A0 FF FF FF FF 0C 00 C0 A0 FF FF FF FF " + "14 00 C0 A0 FF FF FF FF 1C 00 C0 A0 FF FF FF FF"), + {0x0U, 0x8U, 0x10U, 0x18U}, + 0x14U, + 0x14U, + std::vector<Reference>()}, + // Same, |lo| and |hi| at end. + {ParseHexString("04 00 C0 A0 FF FF FF FF 0C 00 C0 A0 FF FF FF FF " + "14 00 C0 A0 FF FF FF FF 1C 00 C0 A0 FF FF FF FF"), + {0x0U, 0x8U, 0x10U, 0x18U}, + 0x20U, + 0x20U, + std::vector<Reference>()}, + // Mix. Note that targets can overlap. + {ParseHexString("FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF " + "06 00 C0 A0 2C 00 C0 A0 FF FF C0 A0 2B 00 C0 A0 " + "FF 06 00 C0 A0 00 00 C0 A0 FF FF FF FF FF FF FF"), + {0x10U, 0x14U, 0x18U, 0x1CU, 0x21U, 0x25U, 0xAAAAU}, + 0x07U, + 0x25U, + {{0x10U, 0x6U}, {0x14U, 0x2CU}, {0x1CU, 0x2BU}, {0x21, 0x6U}}}, + }; + + for (const auto& test_case : test_cases) { + ConstBufferView image32(test_case.data32.data(), test_case.data32.size()); + Abs32RvaExtractorWin32 extractor(image32, {kBit32, kImageBase}, + test_case.abs32_locations, test_case.lo, + test_case.hi); + + TestAddressTranslator translator(test_case.data32.size(), kRvaBegin); + Abs32ReaderWin32 reader(std::move(extractor), translator); + + // Loop over |expected_ref| to check element-by-element. + absl::optional<Reference> ref; + for (const auto& expected_ref : test_case.expected_refs) { + ref = reader.GetNext(); + EXPECT_TRUE(ref.has_value()); + EXPECT_EQ(expected_ref, ref.value()); + } + // Check that nothing is left. + ref = reader.GetNext(); + EXPECT_FALSE(ref.has_value()); + } +} + +TEST(Abs32UtilsTest, Win32Read64) { + constexpr uint64_t kImageBase = 0x31415926A0000000U; + constexpr uint32_t kRvaBegin = 0x00C00000U; + // For simplicity, just test mixed case. + std::vector<uint8_t> data64 = ParseHexString( + "FF FF FF FF FF FF FF FF 00 00 C0 A0 26 59 41 31 " + "06 00 C0 A0 26 59 41 31 02 00 C0 A0 26 59 41 31 " + "FF FF FF BF 26 59 41 31 FF FF FF FF FF FF FF FF " + "02 00 C0 A0 26 59 41 31 07 00 C0 A0 26 59 41 31"); + std::vector<offset_t> abs32_locations = {0x8U, 0x10U, 0x18U, 0x20U, + 0x28U, 0x30U, 0x38U, 0x40U}; + offset_t lo = 0x10U; + offset_t hi = 0x38U; + std::vector<Reference> expected_refs = { + {0x10U, 0x06U}, {0x18U, 0x02U}, {0x30U, 0x02U}}; + + ConstBufferView image64(data64.data(), data64.size()); + Abs32RvaExtractorWin32 extractor(image64, {kBit64, kImageBase}, + abs32_locations, lo, hi); + TestAddressTranslator translator(data64.size(), kRvaBegin); + Abs32ReaderWin32 reader(std::move(extractor), translator); + + std::vector<Reference> refs; + absl::optional<Reference> ref; + for (ref = reader.GetNext(); ref.has_value(); ref = reader.GetNext()) + refs.push_back(ref.value()); + EXPECT_EQ(expected_refs, refs); +} + +TEST(Abs32UtilsTest, Win32ReadFail) { + // Make |bitness| a state to reduce repetition. + Bitness bitness = kBit32; + + constexpr uint32_t kImageBase = 0xA0000000U; // Shared for 32-bit and 64-bit. + std::vector<uint8_t> data(32U, 0xFFU); + ConstBufferView image(data.data(), data.size()); + + auto try_make = [&](std::vector<offset_t>&& abs32_locations, offset_t lo, + offset_t hi) { + Abs32RvaExtractorWin32 extractor(image, {bitness, kImageBase}, + abs32_locations, lo, hi); + extractor.GetNext(); // Dummy call so |extractor| gets used. + }; + + // 32-bit tests. + bitness = kBit32; + try_make({8U, 24U}, 0U, 32U); + EXPECT_DEATH(try_make({4U, 24U}, 32U, 0U), ""); // |lo| > |hi|. + try_make({8U, 24U}, 0U, 12U); + try_make({8U, 24U}, 0U, 28U); + try_make({8U, 24U}, 8U, 32U); + try_make({8U, 24U}, 24U, 32U); + EXPECT_DEATH(try_make({8U, 24U}, 0U, 11U), ""); // |hi| straddles. + EXPECT_DEATH(try_make({8U, 24U}, 26U, 32U), ""); // |lo| straddles. + try_make({8U, 24U}, 12U, 24U); + + // 64-bit tests. + bitness = kBit64; + try_make({6U, 22U}, 0U, 32U); + // |lo| > |hi|. + EXPECT_DEATH(try_make(std::vector<offset_t>(), 32U, 31U), ""); + try_make({6U, 22U}, 0U, 14U); + try_make({6U, 22U}, 0U, 30U); + try_make({6U, 22U}, 6U, 32U); + try_make({6U, 22U}, 22U, 32U); + EXPECT_DEATH(try_make({6U, 22U}, 0U, 29U), ""); // |hi| straddles. + EXPECT_DEATH(try_make({6U, 22U}, 7U, 32U), ""); // |lo| straddles. + try_make({6U, 22U}, 14U, 20U); + try_make({16U}, 16U, 24U); + EXPECT_DEATH(try_make({16U}, 18U, 18U), ""); // |lo|, |hi| straddle. +} + +TEST(Abs32UtilsTest, Win32Write32) { + constexpr uint32_t kImageBase = 0xA0000000U; + constexpr uint32_t kRvaBegin = 0x00C00000U; + std::vector<uint8_t> data32(0x30, 0xFFU); + MutableBufferView image32(data32.data(), data32.size()); + AbsoluteAddress addr(kBit32, kImageBase); + TestAddressTranslator translator(data32.size(), kRvaBegin); + Abs32WriterWin32 writer(image32, std::move(addr), translator); + + // Successful writes. + writer.PutNext({0x02U, 0x10U}); + writer.PutNext({0x0BU, 0x21U}); + writer.PutNext({0x16U, 0x10U}); + writer.PutNext({0x2CU, 0x00U}); + + // Invalid data: For simplicity, Abs32WriterWin32 simply ignores bad writes. + // Invalid location. + writer.PutNext({0x2DU, 0x20U}); + writer.PutNext({0x80000000U, 0x20U}); + writer.PutNext({0xFFFFFFFFU, 0x20U}); + // Invalid target. + writer.PutNext({0x1CU, 0x00001111U}); + writer.PutNext({0x10U, 0xFFFFFF00U}); + + std::vector<uint8_t> expected_data32 = ParseHexString( + "FF FF 10 00 C0 A0 FF FF FF FF FF 21 00 C0 A0 FF " + "FF FF FF FF FF FF 10 00 C0 A0 FF FF FF FF FF FF " + "FF FF FF FF FF FF FF FF FF FF FF FF 00 00 C0 A0"); + EXPECT_EQ(expected_data32, data32); +} + +TEST(Abs32UtilsTest, Win32Write64) { + constexpr uint64_t kImageBase = 0x31415926A0000000U; + constexpr uint32_t kRvaBegin = 0x00C00000U; + std::vector<uint8_t> data64(0x30, 0xFFU); + MutableBufferView image32(data64.data(), data64.size()); + AbsoluteAddress addr(kBit64, kImageBase); + TestAddressTranslator translator(data64.size(), kRvaBegin); + Abs32WriterWin32 writer(image32, std::move(addr), translator); + + // Successful writes. + writer.PutNext({0x02U, 0x10U}); + writer.PutNext({0x0BU, 0x21U}); + writer.PutNext({0x16U, 0x10U}); + writer.PutNext({0x28U, 0x00U}); + + // Invalid data: For simplicity, Abs32WriterWin32 simply ignores bad writes. + // Invalid location. + writer.PutNext({0x29U, 0x20U}); + writer.PutNext({0x80000000U, 0x20U}); + writer.PutNext({0xFFFFFFFFU, 0x20U}); + // Invalid target. + writer.PutNext({0x1CU, 0x00001111U}); + writer.PutNext({0x10U, 0xFFFFFF00U}); + + std::vector<uint8_t> expected_data64 = ParseHexString( + "FF FF 10 00 C0 A0 26 59 41 31 FF 21 00 C0 A0 26 " + "59 41 31 FF FF FF 10 00 C0 A0 26 59 41 31 FF FF " + "FF FF FF FF FF FF FF FF 00 00 C0 A0 26 59 41 31"); + EXPECT_EQ(expected_data64, data64); +} + +TEST(Abs32UtilsTest, RemoveUntranslatableAbs32) { + Bitness kBitness = kBit32; + uint64_t kImageBase = 0x2BCD0000; + + // Valid RVAs: [0x00001A00, 0x00001A28) and [0x00003A00, 0x00004000). + // Valid AVAs: [0x2BCD1A00, 0x2BCD1A28) and [0x2BCD3A00, 0x2BCD4000). + // Notice that the second section has has dangling RVA. + AddressTranslator translator; + ASSERT_EQ(AddressTranslator::kSuccess, + translator.Initialize( + {{0x04, +0x28, 0x1A00, +0x28}, {0x30, +0x30, 0x3A00, +0x600}})); + + std::vector<uint8_t> data = ParseHexString( + "FF FF FF FF 0B 3A CD 2B 00 00 00 04 3A CD 2B 00 " + "FC 3F CD 2B 14 1A CD 2B 44 00 00 00 CC 00 00 00 " + "00 00 55 00 00 00 1E 1A CD 2B 00 99 FF FF FF FF " + "10 3A CD 2B 22 00 00 00 00 00 00 11 00 00 00 00 " + "66 00 00 00 28 1A CD 2B 00 00 CD 2B 27 1A CD 2B " + "FF 39 CD 2B 00 00 00 00 18 1A CD 2B 00 00 00 00 " + "FF FF FF FF FF FF FF FF"); + MutableBufferView image(data.data(), data.size()); + + const offset_t kAbs1 = 0x04; // a:2BCD3A0B = r:3A0B = o:3B + const offset_t kAbs2 = 0x0B; // a:2BCD3A04 = r:3A04 = o:34 + const offset_t kAbs3 = 0x10; // a:2BCD3FFF = r:3FFF (dangling) + const offset_t kAbs4 = 0x14; // a:2BCD1A14 = r:1A14 = o:18 + const offset_t kAbs5 = 0x26; // a:2BCD1A1E = r:1A1E = o:22 + const offset_t kAbs6 = 0x30; // a:2BCD3A10 = r:3A10 = 0x40 + const offset_t kAbs7 = 0x44; // a:2BCD1A28 = r:1A28 (bad: sentinel) + const offset_t kAbs8 = 0x48; // a:2BCD0000 = r:0000 (bad: not covered) + const offset_t kAbs9 = 0x4C; // a:2BCD1A27 = r:1A27 = 0x2B + const offset_t kAbsA = 0x50; // a:2BCD39FF (bad: not covered) + const offset_t kAbsB = 0x54; // a:00000000 (bad: underflow) + const offset_t kAbsC = 0x58; // a:2BCD1A18 = r:1A18 = 0x1C + + std::vector<offset_t> locations = {kAbs1, kAbs2, kAbs3, kAbs4, kAbs5, kAbs6, + kAbs7, kAbs8, kAbs9, kAbsA, kAbsB, kAbsC}; + std::vector<offset_t> exp_locations = {kAbs1, kAbs2, kAbs3, kAbs4, + kAbs5, kAbs6, kAbs9, kAbsC}; + size_t exp_num_removed = locations.size() - exp_locations.size(); + size_t num_removed = RemoveUntranslatableAbs32(image, {kBitness, kImageBase}, + translator, &locations); + EXPECT_EQ(exp_num_removed, num_removed); + EXPECT_EQ(exp_locations, locations); +} + +TEST(Abs32UtilsTest, RemoveOverlappingAbs32Locations) { + // Make |width| a state to reduce repetition. + uint32_t width = WidthOf(kBit32); + + auto run_test = [&width](const std::vector<offset_t>& expected_locations, + std::vector<offset_t>&& locations) { + ASSERT_TRUE(std::is_sorted(locations.begin(), locations.end())); + size_t expected_removals = locations.size() - expected_locations.size(); + size_t removals = RemoveOverlappingAbs32Locations(width, &locations); + EXPECT_EQ(expected_removals, removals); + EXPECT_EQ(expected_locations, locations); + }; + + // 32-bit tests. + width = WidthOf(kBit32); + run_test(std::vector<offset_t>(), std::vector<offset_t>()); + run_test({4U}, {4U}); + run_test({4U, 10U}, {4U, 10U}); + run_test({4U, 8U}, {4U, 8U}); + run_test({4U}, {4U, 7U}); + run_test({4U}, {4U, 4U}); + run_test({4U, 8U}, {4U, 7U, 8U}); + run_test({4U, 10U}, {4U, 7U, 10U}); + run_test({4U, 9U}, {4U, 9U, 10U}); + run_test({3U}, {3U, 5U, 6U}); + run_test({3U, 7U}, {3U, 4U, 5U, 6U, 7U, 8U, 9U, 10U}); + run_test({3U, 7U, 11U}, {3U, 4U, 5U, 6U, 7U, 8U, 9U, 10U, 11U, 12U}); + run_test({4U, 8U, 12U}, {4U, 6U, 8U, 10U, 12U}); + run_test({4U, 8U, 12U, 16U}, {4U, 8U, 12U, 16U}); + run_test({4U, 8U, 12U}, {4U, 8U, 9U, 12U}); + run_test({4U}, {4U, 4U, 4U, 4U, 4U, 4U}); + run_test({3U}, {3U, 4U, 4U, 4U, 5U, 5U}); + run_test({3U, 7U}, {3U, 4U, 4U, 4U, 7U, 7U, 8U}); + run_test({10U, 20U, 30U, 40U}, {10U, 20U, 22U, 22U, 30U, 40U}); + run_test({1000000U, 1000004U}, {1000000U, 1000004U}); + run_test({1000000U}, {1000000U, 1000002U}); + + // 64-bit tests. + width = WidthOf(kBit64); + run_test(std::vector<offset_t>(), std::vector<offset_t>()); + run_test({4U}, {4U}); + run_test({4U, 20U}, {4U, 20U}); + run_test({4U, 12U}, {4U, 12U}); + run_test({4U}, {4U, 11U}); + run_test({4U}, {4U, 5U}); + run_test({4U}, {4U, 4U}); + run_test({4U, 12U, 20U}, {4U, 12U, 20U}); + run_test({1U, 9U, 17U}, {1U, 9U, 17U}); + run_test({1U, 17U}, {1U, 8U, 17U}); + run_test({1U, 10U}, {1U, 10U, 17U}); + run_test({3U, 11U}, {3U, 4U, 5U, 6U, 7U, 8U, 9U, 10U, 11U, 12U}); + run_test({4U, 12U}, {4U, 6U, 8U, 10U, 12U}); + run_test({4U, 12U}, {4U, 12U, 16U}); + run_test({4U, 12U, 20U, 28U}, {4U, 12U, 20U, 28U}); + run_test({4U}, {4U, 4U, 4U, 4U, 5U, 5U}); + run_test({3U, 11U}, {3U, 4U, 4U, 4U, 11U, 11U, 12U}); + run_test({10U, 20U, 30U, 40U}, {10U, 20U, 22U, 22U, 30U, 40U}); + run_test({1000000U, 1000008U}, {1000000U, 1000008U}); + run_test({1000000U}, {1000000U, 1000004U}); +} + +} // namespace zucchini diff --git a/address_translator.cc b/address_translator.cc new file mode 100644 index 0000000..d7d7201 --- /dev/null +++ b/address_translator.cc @@ -0,0 +1,258 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/address_translator.h" + +#include <algorithm> +#include <utility> + +#include "base/containers/cxx20_erase.h" + +namespace zucchini { + +/******** AddressTranslator::OffsetToRvaCache ********/ + +AddressTranslator::OffsetToRvaCache::OffsetToRvaCache( + const AddressTranslator& translator) + : translator_(translator) {} + +rva_t AddressTranslator::OffsetToRvaCache::Convert(offset_t offset) const { + if (offset >= translator_.fake_offset_begin_) { + // Rely on |translator_| to handle this special case. + return translator_.OffsetToRva(offset); + } + if (cached_unit_ && cached_unit_->CoversOffset(offset)) + return cached_unit_->OffsetToRvaUnsafe(offset); + const AddressTranslator::Unit* unit = translator_.OffsetToUnit(offset); + if (!unit) + return kInvalidRva; + cached_unit_ = unit; + return unit->OffsetToRvaUnsafe(offset); +} + +/******** AddressTranslator::RvaToOffsetCache ********/ + +AddressTranslator::RvaToOffsetCache::RvaToOffsetCache( + const AddressTranslator& translator) + : translator_(translator) {} + +bool AddressTranslator::RvaToOffsetCache::IsValid(rva_t rva) const { + if (rva == kInvalidRva) + return false; + if (!cached_unit_ || !cached_unit_->CoversRva(rva)) { + const AddressTranslator::Unit* unit = translator_.RvaToUnit(rva); + if (!unit) + return false; + cached_unit_ = unit; + } + return true; +} + +offset_t AddressTranslator::RvaToOffsetCache::Convert(rva_t rva) const { + if (!cached_unit_ || !cached_unit_->CoversRva(rva)) { + const AddressTranslator::Unit* unit = translator_.RvaToUnit(rva); + if (!unit) + return kInvalidOffset; + cached_unit_ = unit; + } + return cached_unit_->RvaToOffsetUnsafe(rva, translator_.fake_offset_begin_); +} + +/******** AddressTranslator ********/ + +AddressTranslator::AddressTranslator() = default; + +AddressTranslator::AddressTranslator(AddressTranslator&&) = default; + +AddressTranslator::~AddressTranslator() = default; + +AddressTranslator::Status AddressTranslator::Initialize( + std::vector<Unit>&& units) { + for (Unit& unit : units) { + // Check for overflows and fail if found. + if (!RangeIsBounded<offset_t>(unit.offset_begin, unit.offset_size, + kOffsetBound) || + !RangeIsBounded<rva_t>(unit.rva_begin, unit.rva_size, kRvaBound)) { + return kErrorOverflow; + } + // If |rva_size < offset_size|: Just shrink |offset_size| to accommodate. + unit.offset_size = std::min(unit.offset_size, unit.rva_size); + // Now |rva_size >= offset_size|. Note that |rva_size > offset_size| is + // allowed; these lead to dangling RVA. + } + + // Remove all empty units. + base::EraseIf(units, [](const Unit& unit) { return unit.IsEmpty(); }); + + // Sort |units| by RVA, then uniquefy. + std::sort(units.begin(), units.end(), [](const Unit& a, const Unit& b) { + return std::tie(a.rva_begin, a.rva_size) < + std::tie(b.rva_begin, b.rva_size); + }); + units.erase(std::unique(units.begin(), units.end()), units.end()); + + // Scan for RVA range overlaps, validate, and merge wherever possible. + if (units.size() > 1) { + // Traverse with two iterators: |slow| stays behind and modifies Units that + // absorb all overlapping (or tangent if suitable) Units; |fast| explores + // new Units as candidates for consistency checks and potential merge into + // |slow|. + auto slow = units.begin(); + + // All |it| with |slow| < |it| < |fast| contain garbage. + for (auto fast = slow + 1; fast != units.end(); ++fast) { + // Comment notation: S = slow offset, F = fast offset, O = overlap offset, + // s = slow RVA, f = fast RVA, o = overlap RVA. + DCHECK_GE(fast->rva_begin, slow->rva_begin); + if (slow->rva_end() < fast->rva_begin) { + // ..ssssss..ffffff..: Disjoint: Can advance |slow|. + *(++slow) = *fast; + continue; + } + + // ..ssssffff..: Tangent: Merge is optional. + // ..sssooofff.. / ..sssooosss..: Overlap: Merge is required. + bool merge_is_optional = slow->rva_end() == fast->rva_begin; + + // Check whether |fast| and |slow| have identical RVA -> offset shift. + // If not, then merge cannot be resolved. Examples: + // ..ssssffff.. -> ..SSSSFFFF..: Good, can merge. + // ..ssssffff.. -> ..SSSS..FFFF..: Non-fatal: don't merge. + // ..ssssffff.. -> ..FFFF..SSSS..: Non-fatal: don't merge. + // ..ssssffff.. -> ..SSOOFF..: Fatal: Ignore for now (handled later). + // ..sssooofff.. -> ..SSSOOOFFF..: Good, can merge. + // ..sssooofff.. -> ..SSSSSOFFFFF..: Fatal. + // ..sssooofff.. -> ..FFOOOOSS..: Fatal. + // ..sssooofff.. -> ..SSSOOOF..: Good, notice |fast| has dangling RVAs. + // ..oooooo.. -> ..OOOOOO..: Good, can merge. + if (fast->offset_begin < slow->offset_begin || + fast->offset_begin - slow->offset_begin != + fast->rva_begin - slow->rva_begin) { + if (merge_is_optional) { + *(++slow) = *fast; + continue; + } + return kErrorBadOverlap; + } + + // Check whether dangling RVAs (if they exist) are consistent. Examples: + // ..sssooofff.. -> ..SSSOOOF..: Good, can merge. + // ..sssooosss.. -> ..SSSOOOS..: Good, can merge. + // ..sssooofff.. -> ..SSSOO..: Good, can merge. + // ..sssooofff.. -> ..SSSOFFF..: Fatal. + // ..sssooosss.. -> ..SSSOOFFFF..: Fatal. + // ..oooooo.. -> ..OOO..: Good, can merge. + // Idea of check: Suppose |fast| has dangling RVA, then + // |[fast->rva_start, fast->rva_start + fast->offset_start)| -> + // |[fast->offset_start, **fast->offset_end()**)|, with remaining RVA + // mapping to fake offsets. This means |fast->offset_end()| must be >= + // |slow->offset_end()|, and failure to do so resluts in error. The + // argument for |slow| havng dangling RVA is symmetric. + if ((fast->HasDanglingRva() && fast->offset_end() < slow->offset_end()) || + (slow->HasDanglingRva() && slow->offset_end() < fast->offset_end())) { + if (merge_is_optional) { + *(++slow) = *fast; + continue; + } + return kErrorBadOverlapDanglingRva; + } + + // Merge |fast| into |slow|. + slow->rva_size = + std::max(slow->rva_size, fast->rva_end() - slow->rva_begin); + slow->offset_size = + std::max(slow->offset_size, fast->offset_end() - slow->offset_begin); + } + ++slow; + units.erase(slow, units.end()); + } + + // After resolving RVA overlaps, any offset overlap would imply error. + std::sort(units.begin(), units.end(), [](const Unit& a, const Unit& b) { + return a.offset_begin < b.offset_begin; + }); + + if (units.size() > 1) { + auto previous = units.begin(); + for (auto current = previous + 1; current != units.end(); ++current) { + if (previous->offset_end() > current->offset_begin) + return kErrorBadOverlap; + previous = current; + } + } + + // For to fake offset heuristics: Compute exclusive upper bounds for offsets + // and RVAs. + offset_t offset_bound = 0; + rva_t rva_bound = 0; + for (const Unit& unit : units) { + offset_bound = std::max(offset_bound, unit.offset_end()); + rva_bound = std::max(rva_bound, unit.rva_end()); + } + + // Compute pessimistic range and see if it still fits within space of valid + // offsets. This limits image size to one half of |kOffsetBound|, and is a + // main drawback for the current heuristic to convert dangling RVA to fake + // offsets. + if (!RangeIsBounded(offset_bound, rva_bound, kOffsetBound)) + return kErrorFakeOffsetBeginTooLarge; + + // Success. Store results. |units| is currently sorted by offset, so assign. + units_sorted_by_offset_.assign(units.begin(), units.end()); + + // Sort |units| by RVA, and just store it directly + std::sort(units.begin(), units.end(), [](const Unit& a, const Unit& b) { + return a.rva_begin < b.rva_begin; + }); + units_sorted_by_rva_ = std::move(units); + + fake_offset_begin_ = offset_bound; + return kSuccess; +} + +rva_t AddressTranslator::OffsetToRva(offset_t offset) const { + if (offset >= fake_offset_begin_) { + // Handle dangling RVA: First shift it to regular RVA space. + rva_t rva = offset - fake_offset_begin_; + // If result is indeed a dangling RVA, return it; else return |kInvalidRva|. + const Unit* unit = RvaToUnit(rva); + return (unit && unit->HasDanglingRva() && unit->CoversDanglingRva(rva)) + ? rva + : kInvalidRva; + } + const Unit* unit = OffsetToUnit(offset); + return unit ? unit->OffsetToRvaUnsafe(offset) : kInvalidRva; +} + +offset_t AddressTranslator::RvaToOffset(rva_t rva) const { + const Unit* unit = RvaToUnit(rva); + // This also handles dangling RVA. + return unit ? unit->RvaToOffsetUnsafe(rva, fake_offset_begin_) + : kInvalidOffset; +} + +const AddressTranslator::Unit* AddressTranslator::OffsetToUnit( + offset_t offset) const { + // Finds first Unit with |offset_begin| > |offset|, rewind by 1 to find the + // last Unit with |offset_begin| >= |offset| (if it exists). + auto it = std::upper_bound( + units_sorted_by_offset_.begin(), units_sorted_by_offset_.end(), offset, + [](offset_t a, const Unit& b) { return a < b.offset_begin; }); + if (it == units_sorted_by_offset_.begin()) + return nullptr; + --it; + return it->CoversOffset(offset) ? &(*it) : nullptr; +} + +const AddressTranslator::Unit* AddressTranslator::RvaToUnit(rva_t rva) const { + auto it = std::upper_bound( + units_sorted_by_rva_.begin(), units_sorted_by_rva_.end(), rva, + [](rva_t a, const Unit& b) { return a < b.rva_begin; }); + if (it == units_sorted_by_rva_.begin()) + return nullptr; + --it; + return it->CoversRva(rva) ? &(*it) : nullptr; +} + +} // namespace zucchini diff --git a/address_translator.h b/address_translator.h new file mode 100644 index 0000000..a517a2c --- /dev/null +++ b/address_translator.h @@ -0,0 +1,199 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_ADDRESS_TRANSLATOR_H_ +#define COMPONENTS_ZUCCHINI_ADDRESS_TRANSLATOR_H_ + +#include <stdint.h> + +#include <tuple> +#include <vector> + +#include "components/zucchini/algorithm.h" +#include "components/zucchini/image_utils.h" + +namespace zucchini { + +// There are several ways to reason about addresses in an image: +// - Offset: Position relative to start of image. +// - VA (Virtual Address): Virtual memory address of a loaded image. This is +// subject to relocation by the OS. +// - RVA (Relative Virtual Address): VA relative to some base address. This is +// the preferred way to specify pointers in an image. +// +// Zucchini is primarily concerned with offsets and RVAs. Executable images like +// PE and ELF are organized into sections. Each section specifies offset and RVA +// ranges as: +// {Offset start, offset size, RVA start, RVA size}. +// This constitutes a basic unit to translate between offsets and RVAs. Note: +// |offset size| < |RVA size| is possible. For example, the .bss section can can +// have zero-filled statically-allocated data that have no corresponding bytes +// on image (to save space). This poses a problem for Zucchini, which stores +// addresses as offsets: now we'd have "dangling RVAs" that don't map to +// offsets! Some ways to handling this are: +// 1. Ignore all dangling RVAs. This simplifies the algorithm, but also means +// some reference targets would escape detection and processing. +// 2. Create distinct "fake offsets" to accommodate dangling RVAs. Image data +// must not be read on these fake offsets, which are only valid as target +// addresses for reference matching. +// As for |RVA size| < |offset size|, the extra portion just gets ignored. +// +// Status: Zucchini implements (2) in a simple way: dangling RVAs are mapped to +// fake offsets by adding a large value. This value can be chosen as an +// exclusive upper bound of all offsets (i.e., image size). This allows them to +// be easily detected and processed as a special-case. +// TODO(huangs): Investigate option (1), now that the refactored code makes +// experimentation easier. +// TODO(huangs): Make AddressTranslator smarter: Allocate unused |offset_t| +// ranges and create "fake" units to accommodate dangling RVAs. Then +// AddressTranslator can be simplified. + +// Virtual Address relative to some base address (RVA). There's distinction +// between "valid RVA" and "existent RVA": +// - Valid RVA: An RVA that's reasonably small, i.e., below |kRvaBound|. +// - Existent RVA: An RVA that has semantic meaning in an image, and may +// translate to an offset in an image or (if a dangling RVA) a fake offset. +// All existent RVAs are valid RVAs. +using rva_t = uint32_t; +// Divide by 2 to match |kOffsetBound|. +constexpr rva_t kRvaBound = static_cast<rva_t>(-1) / 2; +constexpr rva_t kInvalidRva = static_cast<rva_t>(-2); + +// A utility to translate between offsets and RVAs in an image. +class AddressTranslator { + public: + // A basic unit for address translation, roughly maps to a section, but may + // be processed (e.g., merged) as an optimization. + struct Unit { + offset_t offset_end() const { return offset_begin + offset_size; } + rva_t rva_end() const { return rva_begin + rva_size; } + bool IsEmpty() const { + // |rva_size == 0| and |offset_size > 0| means Unit hasn't been trimmed + // yet, and once it is then it's empty. + // |rva_size > 0| and |offset_size == 0| means Unit has dangling RVA, but + // is not empty. + return rva_size == 0; + } + bool CoversOffset(offset_t offset) const { + return RangeCovers(offset_begin, offset_size, offset); + } + bool CoversRva(rva_t rva) const { + return RangeCovers(rva_begin, rva_size, rva); + } + bool CoversDanglingRva(rva_t rva) const { + return CoversRva(rva) && rva - rva_begin >= offset_size; + } + // Assumes valid |offset| (*cannot* be fake offset). + rva_t OffsetToRvaUnsafe(offset_t offset) const { + return offset - offset_begin + rva_begin; + } + // Assumes valid |rva| (*can* be danging RVA). + offset_t RvaToOffsetUnsafe(rva_t rva, offset_t fake_offset_begin) const { + rva_t delta = rva - rva_begin; + return delta < offset_size ? delta + offset_begin + : fake_offset_begin + rva; + } + bool HasDanglingRva() const { return rva_size > offset_size; } + friend bool operator==(const Unit& a, const Unit& b) { + return std::tie(a.offset_begin, a.offset_size, a.rva_begin, a.rva_size) == + std::tie(b.offset_begin, b.offset_size, b.rva_begin, b.rva_size); + } + + offset_t offset_begin; + offset_t offset_size; + rva_t rva_begin; + rva_t rva_size; + }; + + // An adaptor for AddressTranslator::OffsetToRva() that caches the last Unit + // found, to reduce the number of OffsetToUnit() calls for clustered queries. + class OffsetToRvaCache { + public: + // Embeds |translator| for use. Now object lifetime is tied to |translator| + // lifetime. + explicit OffsetToRvaCache(const AddressTranslator& translator); + OffsetToRvaCache(const OffsetToRvaCache&) = delete; + const OffsetToRvaCache& operator=(const OffsetToRvaCache&) = delete; + + rva_t Convert(offset_t offset) const; + + private: + const AddressTranslator& translator_; + mutable const AddressTranslator::Unit* cached_unit_ = nullptr; + }; + + // An adaptor for AddressTranslator::RvaToOffset() that caches the last Unit + // found, to reduce the number of RvaToUnit() calls for clustered queries. + class RvaToOffsetCache { + public: + // Embeds |translator| for use. Now object lifetime is tied to |translator| + // lifetime. + explicit RvaToOffsetCache(const AddressTranslator& translator); + RvaToOffsetCache(const RvaToOffsetCache&) = delete; + const RvaToOffsetCache& operator=(const RvaToOffsetCache&) = delete; + + bool IsValid(rva_t rva) const; + + offset_t Convert(rva_t rva) const; + + private: + const AddressTranslator& translator_; + mutable const AddressTranslator::Unit* cached_unit_ = nullptr; + }; + + enum Status { + kSuccess = 0, + kErrorOverflow, + kErrorBadOverlap, + kErrorBadOverlapDanglingRva, + kErrorFakeOffsetBeginTooLarge, + }; + + AddressTranslator(); + AddressTranslator(AddressTranslator&&); + AddressTranslator(const AddressTranslator&) = delete; + const AddressTranslator& operator=(const AddressTranslator&) = delete; + ~AddressTranslator(); + + // Consumes |units| to populate data in this class. Performs consistency + // checks and overlapping Units. Returns Status to indicate success. + Status Initialize(std::vector<Unit>&& units); + + // Returns the (possibly dangling) RVA corresponding to |offset|, or + // kInvalidRva if not found. + rva_t OffsetToRva(offset_t offset) const; + + // Returns the (possibly fake) offset corresponding to |rva|, or + // kInvalidOffset if not found (i.e., |rva| is non-existent). + offset_t RvaToOffset(rva_t rva) const; + + // For testing. + offset_t fake_offset_begin() const { return fake_offset_begin_; } + + const std::vector<Unit>& units_sorted_by_offset() const { + return units_sorted_by_offset_; + } + + const std::vector<Unit>& units_sorted_by_rva() const { + return units_sorted_by_rva_; + } + + private: + // Helper to find the Unit that contains given |offset| or |rva|. Returns null + // if not found. + const Unit* OffsetToUnit(offset_t offset) const; + const Unit* RvaToUnit(rva_t rva) const; + + // Storage of Units. All offset ranges are non-empty and disjoint. Likewise + // for all RVA ranges. + std::vector<Unit> units_sorted_by_offset_; + std::vector<Unit> units_sorted_by_rva_; + + // Conversion factor to translate between dangling RVAs and fake offsets. + offset_t fake_offset_begin_; +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_ADDRESS_TRANSLATOR_H_ diff --git a/address_translator_unittest.cc b/address_translator_unittest.cc new file mode 100644 index 0000000..efa2f14 --- /dev/null +++ b/address_translator_unittest.cc @@ -0,0 +1,586 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/address_translator.h" + +#include <algorithm> +#include <string> +#include <utility> + +#include "base/format_macros.h" +#include "base/strings/stringprintf.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +namespace { + +// Test case structs. The convention of EXPECT() specifies "expectd" value +// before ""actual". However, AddressTranslator interfaces explicitly state "X +// to Y". So it is clearer in test cases to specify "input" before "expect". +struct OffsetToRvaTestCase { + offset_t input; + rva_t expect; +}; + +struct RvaToOffsetTestCase { + rva_t input; + offset_t expect; +}; + +class TestAddressTranslator : public AddressTranslator { + public: + using AddressTranslator::AddressTranslator; + + // Initialize() alternative that parses a visual representation of offset and + // RVA ranges. Illustrative example ("special" means '.' or '!'): + // "..AAA...|....aaaa" => "..AAA..." for offsets, and "....aaaa" for RVAs: + // - "..AAA...": First non-period character is at 2, so |offset_begin| = 2. + // - "..AAA...": There are 3 non-special characters, so |offset_size| = +3. + // - "....aaaa": First non-period character is at 4, so |rva_begin| = 4. + // - "....aaaa": There are 4 non-special characters, so |rva_size| = +4. + // For the special case of length-0 range, '!' can be used. For example, + // "...!...." specifies |begin| = 3 and |size| = +0. + AddressTranslator::Status InitializeWithStrings( + const std::vector<std::string>& specs) { + std::vector<Unit> units; + units.reserve(specs.size()); + for (const std::string& s : specs) { + size_t sep = s.find('|'); + CHECK_NE(sep, std::string::npos); + std::string s1 = s.substr(0, sep); + std::string s2 = s.substr(sep + 1); + + auto first_non_blank = [](const std::string& t) { + auto is_blank = [](char ch) { return ch == '.'; }; + return std::find_if_not(t.begin(), t.end(), is_blank) - t.begin(); + }; + auto count_non_special = [](const std::string& t) { + auto is_special = [](char ch) { return ch == '.' || ch == '!'; }; + return t.size() - std::count_if(t.begin(), t.end(), is_special); + }; + units.push_back({static_cast<offset_t>(first_non_blank(s1)), + static_cast<offset_t>(count_non_special(s1)), + static_cast<rva_t>(first_non_blank(s2)), + static_cast<rva_t>(count_non_special(s2))}); + } + return Initialize(std::move(units)); + } +}; + +// Simple test: Initialize TestAddressTranslator using |specs|, and match +// |expected| results re. success or failure. +void SimpleTest(const std::vector<std::string>& specs, + AddressTranslator::Status expected, + const std::string& case_name) { + TestAddressTranslator translator; + auto result = translator.InitializeWithStrings(specs); + EXPECT_EQ(expected, result) << case_name; +} + +// Test AddressTranslator::Initialize's Unit overlap and error checks over +// multiple test cases, each case consists of a fixed unit (specified as +// string), and a variable string taken from an list. +class TwoUnitOverlapTester { + public: + struct TestCase { + std::string unit_str; + AddressTranslator::Status expected; + }; + + static void RunTest(const std::string& unit_str1, + const std::vector<TestCase>& test_cases) { + for (size_t i = 0; i < test_cases.size(); ++i) { + const auto& test_case = test_cases[i]; + const std::string& unit_str2 = test_case.unit_str; + const std::string str = + base::StringPrintf("Case #%" PRIuS ": %s", i, unit_str2.c_str()); + SimpleTest({unit_str1, unit_str2}, test_case.expected, str); + // Switch order. Expect same results. + SimpleTest({unit_str2, unit_str1}, test_case.expected, str); + } + } +}; + +} // namespace + +TEST(AddressTranslatorTest, Empty) { + using AT = AddressTranslator; + TestAddressTranslator translator; + EXPECT_EQ(AT::kSuccess, + translator.Initialize(std::vector<AddressTranslator::Unit>())); + offset_t fake_offset_begin = translator.fake_offset_begin(); + + // Optimized versions. + AddressTranslator::OffsetToRvaCache offset_to_rva(translator); + AddressTranslator::RvaToOffsetCache rva_to_offset(translator); + + EXPECT_EQ(kInvalidRva, translator.OffsetToRva(0U)); + EXPECT_EQ(kInvalidRva, translator.OffsetToRva(100U)); + EXPECT_EQ(kInvalidRva, offset_to_rva.Convert(0U)); + EXPECT_EQ(kInvalidRva, offset_to_rva.Convert(100U)); + + EXPECT_EQ(kInvalidOffset, translator.RvaToOffset(0U)); + EXPECT_EQ(kInvalidOffset, translator.RvaToOffset(100U)); + EXPECT_EQ(kInvalidOffset, rva_to_offset.Convert(0U)); + EXPECT_EQ(kInvalidOffset, rva_to_offset.Convert(100U)); + + EXPECT_EQ(kInvalidRva, translator.OffsetToRva(fake_offset_begin)); + EXPECT_EQ(kInvalidRva, offset_to_rva.Convert(fake_offset_begin)); +} + +TEST(AddressTranslatorTest, Single) { + using AT = AddressTranslator; + TestAddressTranslator translator; + // Offsets to RVA: [10, 30) -> [100, 120). + EXPECT_EQ(AT::kSuccess, translator.Initialize({{10U, +20U, 100U, +20U}})); + offset_t fake_offset_begin = translator.fake_offset_begin(); + + // Optimized versions. + AddressTranslator::OffsetToRvaCache offset_to_rva(translator); + AddressTranslator::RvaToOffsetCache rva_to_offset(translator); + EXPECT_EQ(30U, fake_offset_begin); // Test implementation detail. + + // Offsets to RVAs. + OffsetToRvaTestCase test_cases1[] = { + {0U, kInvalidRva}, {9U, kInvalidRva}, {10U, 100U}, + {20U, 110U}, {29U, 119U}, {30U, kInvalidRva}, + }; + for (auto& test_case : test_cases1) { + EXPECT_EQ(test_case.expect, translator.OffsetToRva(test_case.input)); + EXPECT_EQ(test_case.expect, offset_to_rva.Convert(test_case.input)); + } + + // RVAs to offsets. + RvaToOffsetTestCase test_cases2[] = { + {0U, kInvalidOffset}, {99U, kInvalidOffset}, {100U, 10U}, + {110U, 20U}, {119U, 29U}, {120U, kInvalidOffset}, + }; + for (auto& test_case : test_cases2) { + EXPECT_EQ(test_case.expect, translator.RvaToOffset(test_case.input)); + EXPECT_EQ(test_case.expect, rva_to_offset.Convert(test_case.input)); + } +} + +TEST(AddressTranslatorTest, SingleDanglingRva) { + using AT = AddressTranslator; + TestAddressTranslator translator; + // Offsets to RVA: [10, 30) -> [100, 120 + 7), so has dangling RVAs. + EXPECT_EQ(AT::kSuccess, + translator.Initialize({{10U, +20U, 100U, +20U + 7U}})); + offset_t fake_offset_begin = translator.fake_offset_begin(); + + EXPECT_EQ(30U, fake_offset_begin); // Test implementation detail. + + // Optimized versions. + AddressTranslator::OffsetToRvaCache offset_to_rva(translator); + AddressTranslator::RvaToOffsetCache rva_to_offset(translator); + + // Offsets to RVAs. + OffsetToRvaTestCase test_cases1[] = { + {0U, kInvalidRva}, + {9U, kInvalidRva}, + {10U, 100U}, + {20U, 110U}, + {29U, 119U}, + {30U, kInvalidRva}, + // Fake offsets to dangling RVAs. + {fake_offset_begin + 100U, kInvalidRva}, + {fake_offset_begin + 119U, kInvalidRva}, + {fake_offset_begin + 120U, 120U}, + {fake_offset_begin + 126U, 126U}, + {fake_offset_begin + 127U, kInvalidRva}, + }; + for (auto& test_case : test_cases1) { + EXPECT_EQ(test_case.expect, translator.OffsetToRva(test_case.input)); + EXPECT_EQ(test_case.expect, offset_to_rva.Convert(test_case.input)); + } + + // RVAs to offsets. + RvaToOffsetTestCase test_cases2[] = { + {0U, kInvalidOffset}, + {99U, kInvalidOffset}, + {100U, 10U}, + {110U, 20U}, + {119U, 29U}, + // Dangling RVAs to fake offsets. + {120U, fake_offset_begin + 120U}, + {126U, fake_offset_begin + 126U}, + {127U, kInvalidOffset}, + }; + for (auto& test_case : test_cases2) { + EXPECT_EQ(test_case.expect, translator.RvaToOffset(test_case.input)); + EXPECT_EQ(test_case.expect, rva_to_offset.Convert(test_case.input)); + } +} + +TEST(AddressTranslatorTest, BasicUsage) { + using AT = AddressTranslator; + TestAddressTranslator translator; + // Offsets covered: [10, 30), [40, 70), [70, 110). + // Map to RVAs: [200, 220 + 5), [300, 330), [100, 140), so has dangling RVAs. + auto result = translator.Initialize({ + {10U, +20U, 200U, +20U + 5U}, // Has dangling RVAs. + {40U, +30U, 300U, +20U}, // Extra offset truncated and ignored. + {50U, +20U, 310U, +20U}, // Overlap with previous: Merged. + {70U, +40U, 100U, +20U}, // Tangent with previous but inconsistent; extra + // offset truncated and ignored. + {90U, +20U, 120U, +20U}, // Tangent with previous and consistent: Merged. + }); + EXPECT_EQ(AT::kSuccess, result); + offset_t fake_offset_begin = translator.fake_offset_begin(); + EXPECT_EQ(110U, fake_offset_begin); // Test implementation detail. + + // Optimized versions. + AddressTranslator::OffsetToRvaCache offset_to_rva(translator); + AddressTranslator::RvaToOffsetCache rva_to_offset(translator); + + // Offsets to RVAs. + OffsetToRvaTestCase test_cases1[] = { + {0U, kInvalidRva}, + {9U, kInvalidRva}, + {10U, 200U}, + {20U, 210U}, + {29U, 219U}, + {30U, kInvalidRva}, + {39U, kInvalidRva}, + {40U, 300U}, + {55U, 315U}, + {69U, 329U}, + {70U, 100U}, + {90U, 120U}, + {109U, 139U}, + {110U, kInvalidRva}, + // Fake offsets to dangling RVAs. + {fake_offset_begin + 220U, 220U}, + {fake_offset_begin + 224U, 224U}, + {fake_offset_begin + 225U, kInvalidRva}, + }; + for (auto& test_case : test_cases1) { + EXPECT_EQ(test_case.expect, translator.OffsetToRva(test_case.input)); + EXPECT_EQ(test_case.expect, offset_to_rva.Convert(test_case.input)); + } + + // RVAs to offsets. + RvaToOffsetTestCase test_cases2[] = { + {0U, kInvalidOffset}, + {99U, kInvalidOffset}, + {100U, 70U}, + {120U, 90U}, + {139U, 109U}, + {140U, kInvalidOffset}, + {199U, kInvalidOffset}, + {200U, 10U}, + {210U, 20U}, + {219U, 29U}, + {225U, kInvalidOffset}, + {299U, kInvalidOffset}, + {300U, 40U}, + {315U, 55U}, + {329U, 69U}, + {330U, kInvalidOffset}, + // Dangling RVAs to fake offsets. + {220U, fake_offset_begin + 220U}, + {224U, fake_offset_begin + 224U}, + {225U, kInvalidOffset}, + }; + for (auto& test_case : test_cases2) { + EXPECT_EQ(test_case.expect, translator.RvaToOffset(test_case.input)); + EXPECT_EQ(test_case.expect, rva_to_offset.Convert(test_case.input)); + } +} + +TEST(AddressTranslatorTest, Overflow) { + using AT = AddressTranslator; + // Test assumes that offset_t and rva_t to be 32-bit. + static_assert(sizeof(offset_t) == 4 && sizeof(rva_t) == 4, + "Needs to update test."); + { + AddressTranslator translator1; + EXPECT_EQ(AT::kErrorOverflow, + translator1.Initialize({{0, +0xC0000000U, 0, +0xC0000000U}})); + } + { + AddressTranslator translator2; + EXPECT_EQ(AT::kErrorOverflow, + translator2.Initialize({{0, +0, 0, +0xC0000000U}})); + } + { + // Units are okay, owing to but limitations of the heuristic to convert + // dangling RVA to fake offset, AddressTranslator::Initialize() fails. + AddressTranslator translator3; + EXPECT_EQ(AT::kErrorFakeOffsetBeginTooLarge, + translator3.Initialize( + {{32, +0, 32, +0x50000000U}, {0x50000000U, +16, 0, +16}})); + } +} + +// Sanity test for TestAddressTranslator::InitializeWithStrings(); +TEST(AddressTranslatorTest, AddUnitAsString) { + using AT = AddressTranslator; + { + TestAddressTranslator translator1; + EXPECT_EQ(AT::kSuccess, translator1.InitializeWithStrings({"..A..|.aaa."})); + AddressTranslator::Unit unit1 = translator1.units_sorted_by_offset()[0]; + EXPECT_EQ(2U, unit1.offset_begin); + EXPECT_EQ(+1U, unit1.offset_size); + EXPECT_EQ(1U, unit1.rva_begin); + EXPECT_EQ(+3U, unit1.rva_size); + } + { + TestAddressTranslator translator2; + EXPECT_EQ(AT::kSuccess, + translator2.InitializeWithStrings({".....!...|.bbbbbb..."})); + AddressTranslator::Unit unit2 = translator2.units_sorted_by_offset()[0]; + EXPECT_EQ(5U, unit2.offset_begin); + EXPECT_EQ(+0U, unit2.offset_size); + EXPECT_EQ(1U, unit2.rva_begin); + EXPECT_EQ(+6U, unit2.rva_size); + } +} + +// AddressTranslator::Initialize() lists Unit merging examples in comments. The +// format is different from that used by InitializeWithStrings(), but adapting +// them is easy, so we may as well do so. +TEST(AddressTranslatorTest, OverlapFromComment) { + using AT = AddressTranslator; + constexpr auto OK = AT::kSuccess; + struct { + const char* rva_str; // RVA comes first in this case. + const char* offset_str; + AT::Status expected; + } test_cases[] = { + {"..ssssffff..", "..SSSSFFFF..", OK}, + {"..ssssffff..", "..SSSS..FFFF..", OK}, + {"..ssssffff..", "..FFFF..SSSS..", OK}, + {"..ssssffff..", "..SSOOFF..", AT::kErrorBadOverlap}, + {"..sssooofff..", "..SSSOOOFFF..", OK}, + {"..sssooofff..", "..SSSSSOFFFFF..", AT::kErrorBadOverlap}, + {"..sssooofff..", "..FFOOOOSS..", AT::kErrorBadOverlap}, + {"..sssooofff..", "..SSSOOOF..", OK}, + {"..sssooofff..", "..SSSOOOF..", OK}, + {"..sssooosss..", "..SSSOOOS..", OK}, + {"..sssooofff..", "..SSSOO..", OK}, + {"..sssooofff..", "..SSSOFFF..", AT::kErrorBadOverlapDanglingRva}, + {"..sssooosss..", "..SSSOOSSSS..", AT::kErrorBadOverlapDanglingRva}, + {"..oooooo..", "..OOO..", OK}, + }; + + auto to_period = [](std::string s, char ch) { // |s| passed by value. + std::replace(s.begin(), s.end(), ch, '.'); + return s; + }; + + size_t idx = 0; + for (const auto& test_case : test_cases) { + std::string base_str = + std::string(test_case.offset_str) + "|" + test_case.rva_str; + std::string unit_str1 = to_period(to_period(base_str, 'S'), 's'); + std::string unit_str2 = to_period(to_period(base_str, 'F'), 'f'); + SimpleTest({unit_str1, unit_str2}, test_case.expected, + base::StringPrintf("Case #%" PRIuS, idx)); + ++idx; + } +} + +TEST(AddressTranslatorTest, Overlap) { + using AT = AddressTranslator; + constexpr auto OK = AT::kSuccess; + constexpr const char* unit_str1 = "....AAA.......|.....aaa......"; + + std::vector<TwoUnitOverlapTester::TestCase> test_cases = { + //....AAA.......|.....aaa...... The first Unit. NOLINT + {"....BBB.......|.....bbb......", OK}, + {"..BBB.........|...bbb........", OK}, + {"......BBB.....|.......bbb....", OK}, + {"..BBBBBBBBB...|...bbb........", OK}, // Extra offset get truncated. + {"......BBBBBBBB|.......bbb....", OK}, + {"....BBB.......|.......bbb....", AT::kErrorBadOverlap}, + {"..BBB.........|.......bbb....", AT::kErrorBadOverlap}, + {".......BBB....|.......bbb....", AT::kErrorBadOverlap}, + //....AAA.......|.....aaa...... The first Unit. NOLINT + {"....BBB.......|..........bbb.", AT::kErrorBadOverlap}, + {"..........BBB.|.......bbb....", AT::kErrorBadOverlap}, + {"......BBB.....|.....bbb......", AT::kErrorBadOverlap}, + {"......BBB.....|..bbb.........", AT::kErrorBadOverlap}, + {"......BBB.....|bbb...........", AT::kErrorBadOverlap}, + {"BBB...........|bbb...........", OK}, // Disjoint. + {"........BBB...|.........bbb..", OK}, // Disjoint. + {"BBB...........|..........bbb.", OK}, // Disjoint, offset elsewhere. + //....AAA.......|.....aaa...... The first Unit. NOLINT + {".BBB..........|..bbb.........", OK}, // Tangent. + {".......BBB....|........bbb...", OK}, // Tangent. + {".BBB..........|........bbb...", OK}, // Tangent, offset elsewhere. + {"BBBBBB........|bbb...........", OK}, // Repeat, with extra offsets. + {"........BBBB..|.........bbb..", OK}, + {"BBBBBB........|..........bbb.", OK}, + {".BBBBBB.......|..bbb.........", OK}, + {".......BBBBB..|........bbb...", OK}, + //....AAA.......|.....aaa...... The first Unit. NOLINT + {".BBB..........|........bbb...", OK}, // Tangent, offset elsewhere. + {"..BBB.........|........bbb...", AT::kErrorBadOverlap}, + {"...BB.........|....bb........", OK}, + {"....BB........|.....bb.......", OK}, + {".......BB.....|........bb....", OK}, + {"...BBBBBB.....|....bbbbbb....", OK}, + {"..BBBBBB......|...bbbbbb.....", OK}, + {"......BBBBBB..|.......bbbbbb.", OK}, + //....AAA.......|.....aaa...... The first Unit. NOLINT + {"BBBBBBBBBBBBBB|bbbbbbbbbbbbbb", AT::kErrorBadOverlap}, + {"B.............|b.............", OK}, + {"B.............|.............b", OK}, + {"....B.........|.....b........", OK}, + {"....B.........|......b.......", AT::kErrorBadOverlap}, + {"....B.........|......b.......", AT::kErrorBadOverlap}, + {"....BBB.......|.....bb.......", OK}, + {"....BBBB......|.....bbb......", OK}, + //....AAA.......|.....aaa...... The first Unit. NOLINT + {".........BBBBB|.b............", OK}, + {"....AAA.......|.....!........", OK}, + {"....!.........|.....!........", OK}, // Empty units gets deleted early. + {"....!.........|..........!...", OK}, // Forgiving! + }; + + TwoUnitOverlapTester::RunTest(unit_str1, test_cases); +} + +TEST(AddressTranslatorTest, OverlapOffsetMultiple) { + using AT = AddressTranslator; + // Simple case. Note that RVA ranges don't get merged. + SimpleTest({"A..|a....", // + ".A.|..a..", // + "..A|....a"}, + AT::kSuccess, "Case #0"); + + // Offset range 1 overlaps 2 and 3, but truncation takes place to trim down + // offset ranges, so still successful. + SimpleTest({"..A|a....", // + ".AA|..a..", // + "AAA|....a"}, + AT::kSuccess, "Case #1"); + + // Offset range 2 and 3 overlap, so fail. + SimpleTest({"A..|a....", // + ".A.|..a..", // + ".A.|....a"}, + AT::kErrorBadOverlap, "Case #2"); +} + +TEST(AddressTranslatorTest, OverlapDangling) { + using AT = AddressTranslator; + constexpr auto OK = AT::kSuccess; + // First Unit has dangling offsets at + constexpr const char* unit_str1 = "....AAA.......|.....aaaaaa..."; + + std::vector<TwoUnitOverlapTester::TestCase> test_cases = { + //....AAA.......|.....aaaaaa... The first Unit. NOLINT + {"....BBB.......|.....bbbbbb...", OK}, + {"....BBB.......|.....bbbbb....", OK}, + {"....BBB.......|.....bbbb.....", OK}, + {"....BBB.......|.....bbb......", OK}, + {".....BBB......|......bbb.....", AT::kErrorBadOverlapDanglingRva}, + {".....BB.......|......bbb.....", OK}, + {"....BBB.......|.....bbbbbbbb.", OK}, + {"..BBBBB.......|...bbbbbbbb...", OK}, + //....AAA.......|.....aaaaaa... The first Unit. NOLINT + {"......!.......|.bbb..........", AT::kErrorBadOverlap}, + {"..BBBBB.......|...bbbbb......", OK}, + {".......BBB....|.bbb..........", OK}, // Just tangent: Can go elsewhere. + {".......BBB....|.bbbb.........", OK}, // Can be another dangling RVA. + {".......!......|.bbbb.........", OK}, // Same with empty. + {"......!.......|.......!......", OK}, // Okay, but gets deleted. + {"......!.......|.......b......", AT::kErrorBadOverlapDanglingRva}, + {"......B.......|.......b......", OK}, + //....AAA.......|.....aaaaaa... The first Unit. NOLINT + {"......BBBB....|.......bbbb...", AT::kErrorBadOverlapDanglingRva}, + {"......BB......|.......bb.....", AT::kErrorBadOverlapDanglingRva}, + {"......BB......|bb............", AT::kErrorBadOverlap}, + }; + + TwoUnitOverlapTester::RunTest(unit_str1, test_cases); +} + +// Tests implementation since algorithm is tricky. +TEST(AddressTranslatorTest, Merge) { + using AT = AddressTranslator; + // Merge a bunch of overlapping Units into one big Unit. + std::vector<std::string> test_case1 = { + "AAA.......|.aaa......", // Comment to prevent wrap by formatter. + "AA........|.aa.......", // + "..AAA.....|...aaa....", // + "....A.....|.....a....", // + ".....AAA..|......aaa.", // + "........A.|.........a", // + }; + // Try all 6! permutations. + std::sort(test_case1.begin(), test_case1.end()); + do { + TestAddressTranslator translator1; + EXPECT_EQ(AT::kSuccess, translator1.InitializeWithStrings(test_case1)); + EXPECT_EQ(9U, translator1.fake_offset_begin()); + + AT::Unit expected{0U, +9U, 1U, +9U}; + EXPECT_EQ(1U, translator1.units_sorted_by_offset().size()); + EXPECT_EQ(expected, translator1.units_sorted_by_offset()[0]); + EXPECT_EQ(1U, translator1.units_sorted_by_rva().size()); + EXPECT_EQ(expected, translator1.units_sorted_by_rva()[0]); + } while (std::next_permutation(test_case1.begin(), test_case1.end())); + + // Merge RVA-adjacent Units into two Units. + std::vector<std::string> test_case2 = { + ".....A..|.a......", // First Unit. + "......A.|..a.....", // + "A.......|...a....", // Second Unit: RVA-adjacent to first Unit, but + ".A......|....a...", // offset would become inconsistent, so a new + "..A.....|.....a..", // Unit gets created. + }; + // Try all 5! permutations. + std::sort(test_case2.begin(), test_case2.end()); + do { + TestAddressTranslator translator2; + EXPECT_EQ(AT::kSuccess, translator2.InitializeWithStrings(test_case2)); + EXPECT_EQ(7U, translator2.fake_offset_begin()); + + AT::Unit expected1{0U, +3U, 3U, +3U}; + AT::Unit expected2{5U, +2U, 1U, +2U}; + EXPECT_EQ(2U, translator2.units_sorted_by_offset().size()); + EXPECT_EQ(expected1, translator2.units_sorted_by_offset()[0]); + EXPECT_EQ(expected2, translator2.units_sorted_by_offset()[1]); + EXPECT_EQ(2U, translator2.units_sorted_by_rva().size()); + EXPECT_EQ(expected2, translator2.units_sorted_by_rva()[0]); + EXPECT_EQ(expected1, translator2.units_sorted_by_rva()[1]); + } while (std::next_permutation(test_case2.begin(), test_case2.end())); +} + +TEST(AddressTranslatorTest, RvaToOffsetCache_IsValid) { + AddressTranslator translator; + // Notice that the second section has dangling RVA. + ASSERT_EQ(AddressTranslator::kSuccess, + translator.Initialize( + {{0x04, +0x28, 0x1A00, +0x28}, {0x30, +0x10, 0x3A00, +0x30}})); + AddressTranslator::RvaToOffsetCache rva_checker(translator); + + EXPECT_FALSE(rva_checker.IsValid(kInvalidRva)); + + for (int i = 0; i < 0x28; ++i) + EXPECT_TRUE(rva_checker.IsValid(0x1A00 + i)); + EXPECT_FALSE(rva_checker.IsValid(0x1A00 + 0x28)); + EXPECT_FALSE(rva_checker.IsValid(0x1A00 + 0x29)); + EXPECT_FALSE(rva_checker.IsValid(0x1A00 - 1)); + EXPECT_FALSE(rva_checker.IsValid(0x1A00 - 2)); + + for (int i = 0; i < 0x30; ++i) + EXPECT_TRUE(rva_checker.IsValid(0x3A00 + i)); + EXPECT_FALSE(rva_checker.IsValid(0x3A00 + 0x30)); + EXPECT_FALSE(rva_checker.IsValid(0x3A00 + 0x31)); + EXPECT_FALSE(rva_checker.IsValid(0x3A00 - 1)); + EXPECT_FALSE(rva_checker.IsValid(0x3A00 - 2)); + + EXPECT_FALSE(rva_checker.IsValid(0)); + EXPECT_FALSE(rva_checker.IsValid(0x10)); + EXPECT_FALSE(rva_checker.IsValid(0x7FFFFFFFU)); + EXPECT_FALSE(rva_checker.IsValid(0xFFFFFFFFU)); +} + +} // namespace zucchini diff --git a/algorithm.h b/algorithm.h new file mode 100644 index 0000000..f5d49e3 --- /dev/null +++ b/algorithm.h @@ -0,0 +1,146 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_ALGORITHM_H_ +#define COMPONENTS_ZUCCHINI_ALGORITHM_H_ + +#include <stddef.h> + +#include <algorithm> +#include <type_traits> +#include <vector> + +#include "base/check_op.h" + +// Collection of simple utilities used in for low-level computation. + +namespace zucchini { + +// Safely determines whether |[begin, begin + size)| is in |[0, bound)|. Note: +// The special case |[bound, bound)| is not considered to be in |[0, bound)|. +template <typename T> +bool RangeIsBounded(T begin, T size, size_t bound) { + static_assert(std::is_unsigned<T>::value, "Value type must be unsigned."); + return begin < bound && size <= bound - begin; +} + +// Safely determines whether |value| lies in |[begin, begin + size)|. Works +// properly even if |begin + size| overflows -- although such ranges are +// considered pathological, and should fail validation elsewhere. +template <typename T> +bool RangeCovers(T begin, T size, T value) { + static_assert(std::is_unsigned<T>::value, "Value type must be unsigned."); + return begin <= value && value - begin < size; +} + +// Returns the integer in inclusive range |[lo, hi]| that's closest to |value|. +// This departs from the usual usage of semi-inclusive ranges, but is useful +// because (1) sentinels can use this, (2) a valid output always exists. It is +// assumed that |lo <= hi|. +template <class T> +T InclusiveClamp(T value, T lo, T hi) { + static_assert(std::is_unsigned<T>::value, "Value type must be unsigned."); + DCHECK_LE(lo, hi); + return value <= lo ? lo : (value >= hi ? hi : value); +} + +// Returns the minimum multiple of |m| that's no less than |x|. Assumes |m > 0| +// and |x| is sufficiently small so that no overflow occurs. +template <class T> +constexpr T AlignCeil(T x, T m) { + static_assert(std::is_unsigned<T>::value, "Value type must be unsigned."); + return T((x + m - 1) / m) * m; +} + +// Specialized alignment helpers that returns the increment to |pos| to get the +// next n-aligned value, where n is in {2, 4}. This is useful for aligning +// iterators relative to a base iterator using: +// it += IncrementForAlignCeil2(it - base); +template <class T> +inline int IncrementForAlignCeil2(T pos) { + return static_cast<int>(pos & 1); // Optimized from (-pos) & 1. +} + +template <class T> +inline int IncrementForAlignCeil4(T pos) { + return static_cast<int>((-pos) & 3); +} + +// Sorts values in |container| and removes duplicates. +template <class T> +void SortAndUniquify(std::vector<T>* container) { + std::sort(container->begin(), container->end()); + container->erase(std::unique(container->begin(), container->end()), + container->end()); + container->shrink_to_fit(); +} + +// Extracts a single bit at |pos| from integer |v|. +template <int pos, typename T> +constexpr T GetBit(T v) { + return (v >> pos) & 1; +} + +// Extracts bits in inclusive range [|lo|, |hi|] from integer |v|, and returns +// the sign-extend result. For example, let the (MSB-first) bits in a 32-bit int +// |v| be: +// xxxxxxxx xxxxxSii iiiiiiii iyyyyyyy, +// hi^ lo^ => lo = 7, hi = 18 +// To extract "Sii iiiiiiii i", calling +// GetSignedBits<7, 18>(v); +// produces the sign-extended result: +// SSSSSSSS SSSSSSSS SSSSSiii iiiiiiii. +template <int lo, int hi, typename T> +constexpr typename std::make_signed<T>::type GetSignedBits(T v) { + constexpr int kNumBits = sizeof(T) * 8; + using SignedType = typename std::make_signed<T>::type; + // Assumes 0 <= |lo| <= |hi| < |kNumBits|. + // How this works: + // (1) Shift-left by |kNumBits - 1 - hi| to clear "left" bits. + // (2) Shift-right by |kNumBits - 1 - hi + lo| to clear "right" bits. The + // input is casted to a signed type to perform sign-extension. + return static_cast<SignedType>(v << (kNumBits - 1 - hi)) >> + (kNumBits - 1 - hi + lo); +} + +// Similar to GetSignedBits(), but returns the zero-extended result. For the +// above example, calling +// GetUnsignedBits<7, 18>(v); +// results in: +// 00000000 00000000 0000Siii iiiiiiii. +template <int lo, int hi, typename T> +constexpr typename std::make_unsigned<T>::type GetUnsignedBits(T v) { + constexpr int kNumBits = sizeof(T) * 8; + using UnsignedType = typename std::make_unsigned<T>::type; + return static_cast<UnsignedType>(v << (kNumBits - 1 - hi)) >> + (kNumBits - 1 - hi + lo); +} + +// Copies bits at |pos| in |v| to all higher bits, and returns the result as the +// same int type as |v|. +template <typename T> +constexpr T SignExtend(int pos, T v) { + int kNumBits = sizeof(T) * 8; + int kShift = kNumBits - 1 - pos; + return static_cast<typename std::make_signed<T>::type>(v << kShift) >> kShift; +} + +// Optimized version where |pos| becomes a template parameter. +template <int pos, typename T> +constexpr T SignExtend(T v) { + constexpr int kNumBits = sizeof(T) * 8; + constexpr int kShift = kNumBits - 1 - pos; + return static_cast<typename std::make_signed<T>::type>(v << kShift) >> kShift; +} + +// Determines whether |v|, if interpreted as a signed integer, is representable +// using |digs| bits. |1 <= digs <= sizeof(T)| is assumed. +template <int digs, typename T> +constexpr bool SignedFit(T v) { + return v == SignExtend<digs - 1, T>(v); +} + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_ALGORITHM_H_ diff --git a/algorithm_unittest.cc b/algorithm_unittest.cc new file mode 100644 index 0000000..2e1f94d --- /dev/null +++ b/algorithm_unittest.cc @@ -0,0 +1,347 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/algorithm.h" + +#include <stddef.h> +#include <stdint.h> + +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +namespace { + +// Casting functions to specify signed 8-bit and 16-bit integer constants. +// For example, signed8(0xFF) == int8_t(-1). +inline int8_t signed8(uint8_t v) { + return *reinterpret_cast<const int8_t*>(&v); +} + +inline int32_t signed16(uint16_t v) { + return *reinterpret_cast<const int16_t*>(&v); +} + +} // namespace + +TEST(AlgorithmTest, RangeIsBounded) { + // Basic tests. + EXPECT_TRUE(RangeIsBounded<uint8_t>(0U, +0U, 10U)); + EXPECT_TRUE(RangeIsBounded<uint8_t>(0U, +10U, 10U)); + EXPECT_TRUE(RangeIsBounded<uint8_t>(1U, +9U, 10U)); + EXPECT_FALSE(RangeIsBounded<uint8_t>(1U, +10U, 10U)); + EXPECT_TRUE(RangeIsBounded<uint8_t>(8U, +1U, 10U)); + EXPECT_TRUE(RangeIsBounded<uint8_t>(8U, +2U, 10U)); + EXPECT_TRUE(RangeIsBounded<uint8_t>(9U, +0U, 10U)); + EXPECT_FALSE(RangeIsBounded<uint8_t>(10U, +0U, 10U)); // ! + EXPECT_FALSE(RangeIsBounded<uint8_t>(100U, +0U, 10U)); + EXPECT_FALSE(RangeIsBounded<uint8_t>(100U, +1U, 10U)); + + // Test at boundary of overflow. + EXPECT_TRUE(RangeIsBounded<uint8_t>(42U, +137U, 255U)); + EXPECT_TRUE(RangeIsBounded<uint8_t>(0U, +255U, 255U)); + EXPECT_TRUE(RangeIsBounded<uint8_t>(1U, +254U, 255U)); + EXPECT_FALSE(RangeIsBounded<uint8_t>(1U, +255U, 255U)); + EXPECT_TRUE(RangeIsBounded<uint8_t>(254U, +0U, 255U)); + EXPECT_TRUE(RangeIsBounded<uint8_t>(254U, +1U, 255U)); + EXPECT_FALSE(RangeIsBounded<uint8_t>(255U, +0U, 255U)); + EXPECT_FALSE(RangeIsBounded<uint8_t>(255U, +3U, 255U)); + + // Test with uint32_t. + EXPECT_TRUE(RangeIsBounded<uint32_t>(0U, +0x1000U, 0x2000U)); + EXPECT_TRUE(RangeIsBounded<uint32_t>(0x0FFFU, +0x1000U, 0x2000U)); + EXPECT_TRUE(RangeIsBounded<uint32_t>(0x1000U, +0x1000U, 0x2000U)); + EXPECT_FALSE(RangeIsBounded<uint32_t>(0x1000U, +0x1001U, 0x2000U)); + EXPECT_TRUE(RangeIsBounded<uint32_t>(0x1FFFU, +1U, 0x2000U)); + EXPECT_FALSE(RangeIsBounded<uint32_t>(0x2000U, +0U, 0x2000U)); // ! + EXPECT_FALSE(RangeIsBounded<uint32_t>(0x3000U, +0U, 0x2000U)); + EXPECT_FALSE(RangeIsBounded<uint32_t>(0x3000U, +1U, 0x2000U)); + EXPECT_TRUE(RangeIsBounded<uint32_t>(0U, +0xFFFFFFFEU, 0xFFFFFFFFU)); + EXPECT_TRUE(RangeIsBounded<uint32_t>(0U, +0xFFFFFFFFU, 0xFFFFFFFFU)); + EXPECT_TRUE(RangeIsBounded<uint32_t>(1U, +0xFFFFFFFEU, 0xFFFFFFFFU)); + EXPECT_FALSE(RangeIsBounded<uint32_t>(1U, +0xFFFFFFFFU, 0xFFFFFFFFU)); + EXPECT_TRUE(RangeIsBounded<uint32_t>(0x80000000U, +0x7FFFFFFFU, 0xFFFFFFFFU)); + EXPECT_FALSE( + RangeIsBounded<uint32_t>(0x80000000U, +0x80000000U, 0xFFFFFFFFU)); + EXPECT_TRUE(RangeIsBounded<uint32_t>(0xFFFFFFFEU, +1U, 0xFFFFFFFFU)); + EXPECT_FALSE(RangeIsBounded<uint32_t>(0xFFFFFFFFU, +0U, 0xFFFFFFFFU)); // ! + EXPECT_FALSE( + RangeIsBounded<uint32_t>(0xFFFFFFFFU, +0xFFFFFFFFU, 0xFFFFFFFFU)); +} + +TEST(AlgorithmTest, RangeCovers) { + // Basic tests. + EXPECT_TRUE(RangeCovers<uint8_t>(0U, +10U, 0U)); + EXPECT_TRUE(RangeCovers<uint8_t>(0U, +10U, 5U)); + EXPECT_TRUE(RangeCovers<uint8_t>(0U, +10U, 9U)); + EXPECT_FALSE(RangeCovers<uint8_t>(0U, +10U, 10U)); + EXPECT_FALSE(RangeCovers<uint8_t>(0U, +10U, 100U)); + EXPECT_FALSE(RangeCovers<uint8_t>(0U, +10U, 255U)); + + EXPECT_FALSE(RangeCovers<uint8_t>(42U, +137U, 0U)); + EXPECT_FALSE(RangeCovers<uint8_t>(42U, +137U, 41U)); + EXPECT_TRUE(RangeCovers<uint8_t>(42U, +137U, 42U)); + EXPECT_TRUE(RangeCovers<uint8_t>(42U, +137U, 100U)); + EXPECT_TRUE(RangeCovers<uint8_t>(42U, +137U, 178U)); + EXPECT_FALSE(RangeCovers<uint8_t>(42U, +137U, 179U)); + EXPECT_FALSE(RangeCovers<uint8_t>(42U, +137U, 255U)); + + // 0-size ranges. + EXPECT_FALSE(RangeCovers<uint8_t>(42U, +0U, 41U)); + EXPECT_FALSE(RangeCovers<uint8_t>(42U, +0U, 42U)); + EXPECT_FALSE(RangeCovers<uint8_t>(42U, +0U, 43U)); + + // Test at boundary of overflow. + EXPECT_TRUE(RangeCovers<uint8_t>(254U, +1U, 254U)); + EXPECT_FALSE(RangeCovers<uint8_t>(254U, +1U, 255U)); + EXPECT_FALSE(RangeCovers<uint8_t>(255U, +0U, 255U)); + EXPECT_TRUE(RangeCovers<uint8_t>(255U, +1U, 255U)); + EXPECT_FALSE(RangeCovers<uint8_t>(255U, +5U, 0U)); + + // Test with unit32_t. + EXPECT_FALSE(RangeCovers<uint32_t>(1234567U, +7654321U, 0U)); + EXPECT_FALSE(RangeCovers<uint32_t>(1234567U, +7654321U, 1234566U)); + EXPECT_TRUE(RangeCovers<uint32_t>(1234567U, +7654321U, 1234567U)); + EXPECT_TRUE(RangeCovers<uint32_t>(1234567U, +7654321U, 4444444U)); + EXPECT_TRUE(RangeCovers<uint32_t>(1234567U, +7654321U, 8888887U)); + EXPECT_FALSE(RangeCovers<uint32_t>(1234567U, +7654321U, 8888888U)); + EXPECT_FALSE(RangeCovers<uint32_t>(1234567U, +7654321U, 0x80000000U)); + EXPECT_FALSE(RangeCovers<uint32_t>(1234567U, +7654321U, 0xFFFFFFFFU)); + EXPECT_FALSE(RangeCovers<uint32_t>(0xFFFFFFFFU, +0, 0xFFFFFFFFU)); + EXPECT_TRUE(RangeCovers<uint32_t>(0xFFFFFFFFU, +1, 0xFFFFFFFFU)); + EXPECT_FALSE(RangeCovers<uint32_t>(0xFFFFFFFFU, +2, 0)); +} + +TEST(AlgorithmTest, InclusiveClamp) { + EXPECT_EQ(1U, InclusiveClamp<uint32_t>(0U, 1U, 9U)); + EXPECT_EQ(1U, InclusiveClamp<uint32_t>(1U, 1U, 9U)); + EXPECT_EQ(5U, InclusiveClamp<uint32_t>(5U, 1U, 9U)); + EXPECT_EQ(8U, InclusiveClamp<uint32_t>(8U, 1U, 9U)); + EXPECT_EQ(9U, InclusiveClamp<uint32_t>(9U, 1U, 9U)); + EXPECT_EQ(9U, InclusiveClamp<uint32_t>(10U, 1U, 9U)); + EXPECT_EQ(9U, InclusiveClamp<uint32_t>(0xFFFFFFFFU, 1U, 9U)); + EXPECT_EQ(42U, InclusiveClamp<uint32_t>(0U, 42U, 42U)); + EXPECT_EQ(42U, InclusiveClamp<uint32_t>(41U, 42U, 42U)); + EXPECT_EQ(42U, InclusiveClamp<uint32_t>(42U, 42U, 42U)); + EXPECT_EQ(42U, InclusiveClamp<uint32_t>(43U, 42U, 42U)); + EXPECT_EQ(0U, InclusiveClamp<uint32_t>(0U, 0U, 0U)); + EXPECT_EQ(0xFFFFFFFF, + InclusiveClamp<uint32_t>(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)); +} + +TEST(AlgorithmTest, AlignCeil) { + EXPECT_EQ(0U, AlignCeil<uint32_t>(0U, 2U)); + EXPECT_EQ(2U, AlignCeil<uint32_t>(1U, 2U)); + EXPECT_EQ(2U, AlignCeil<uint32_t>(2U, 2U)); + EXPECT_EQ(4U, AlignCeil<uint32_t>(3U, 2U)); + EXPECT_EQ(4U, AlignCeil<uint32_t>(4U, 2U)); + EXPECT_EQ(11U, AlignCeil<uint32_t>(10U, 11U)); + EXPECT_EQ(11U, AlignCeil<uint32_t>(11U, 11U)); + EXPECT_EQ(22U, AlignCeil<uint32_t>(12U, 11U)); + EXPECT_EQ(22U, AlignCeil<uint32_t>(21U, 11U)); + EXPECT_EQ(22U, AlignCeil<uint32_t>(22U, 11U)); + EXPECT_EQ(33U, AlignCeil<uint32_t>(23U, 11U)); +} + +TEST(AlgorithmTest, IncrementForAlignCeil) { + struct TestCase { + int exp; // Increment to |pos| to get the next nearest aligned value. + int pos; + }; + TestCase kTestCases2[] = { + {0, 0}, {1, 1}, {0, 2}, {1, 3}, {0, 4}, {1, 5}, + {1, 97}, {0, 98}, {1, 99}, {0, 100}, {1, -1}, {0, -2}, + {1, -101}, {0, -100}, {1, -99}, {0, -98}, {1, -97}, {0, -96}, + }; + for (const auto& test_case : kTestCases2) { + EXPECT_EQ(test_case.exp, IncrementForAlignCeil2<int32_t>(test_case.pos)); + if (test_case.pos >= 0) + EXPECT_EQ(test_case.exp, IncrementForAlignCeil2<uint32_t>(test_case.pos)); + } + TestCase kTestCases4[] = { + {0, 0}, {3, 1}, {2, 2}, {1, 3}, {0, 4}, {3, 5}, + {3, 97}, {2, 98}, {1, 99}, {0, 100}, {1, -1}, {2, -2}, + {1, -101}, {0, -100}, {3, -99}, {2, -98}, {1, -97}, {0, -96}, + }; + for (const auto& test_case : kTestCases4) { + EXPECT_EQ(test_case.exp, IncrementForAlignCeil4<int32_t>(test_case.pos)); + if (test_case.pos >= 0) + EXPECT_EQ(test_case.exp, IncrementForAlignCeil4<uint32_t>(test_case.pos)); + } +} + +TEST(AlgorithmTest, GetBit) { + // 0xC5 = 0b1100'0101. + constexpr uint8_t v = 0xC5; + EXPECT_EQ(uint8_t(1), (GetBit<0>(v))); + EXPECT_EQ(int8_t(0), (GetBit<1>(signed8(v)))); + EXPECT_EQ(uint8_t(1), (GetBit<2>(v))); + EXPECT_EQ(int8_t(0), (GetBit<3>(signed8(v)))); + EXPECT_EQ(uint8_t(0), (GetBit<4>(v))); + EXPECT_EQ(int8_t(0), (GetBit<5>(signed8(v)))); + EXPECT_EQ(uint8_t(1), (GetBit<6>(v))); + EXPECT_EQ(int8_t(1), (GetBit<7>(signed8(v)))); + + EXPECT_EQ(int16_t(1), (GetBit<3, int16_t>(0x0008))); + EXPECT_EQ(uint16_t(0), (GetBit<14, uint16_t>(0xB000))); + EXPECT_EQ(uint16_t(1), (GetBit<15, uint16_t>(0xB000))); + + EXPECT_EQ(uint32_t(1), (GetBit<0, uint32_t>(0xFFFFFFFF))); + EXPECT_EQ(int32_t(1), (GetBit<31, int32_t>(0xFFFFFFFF))); + + EXPECT_EQ(uint32_t(0), (GetBit<0, uint32_t>(0xFF00A596))); + EXPECT_EQ(int32_t(1), (GetBit<1, int32_t>(0xFF00A596))); + EXPECT_EQ(uint32_t(1), (GetBit<4, uint32_t>(0xFF00A596))); + EXPECT_EQ(int32_t(1), (GetBit<7, int32_t>(0xFF00A596))); + EXPECT_EQ(uint32_t(0), (GetBit<9, uint32_t>(0xFF00A596))); + EXPECT_EQ(int32_t(0), (GetBit<16, int32_t>(0xFF00A59))); + EXPECT_EQ(uint32_t(1), (GetBit<24, uint32_t>(0xFF00A596))); + EXPECT_EQ(int32_t(1), (GetBit<31, int32_t>(0xFF00A596))); + + EXPECT_EQ(uint64_t(0), (GetBit<62, uint64_t>(0xB000000000000000ULL))); + EXPECT_EQ(int64_t(1), (GetBit<63, int64_t>(0xB000000000000000LL))); +} + +TEST(AlgorithmTest, GetBits) { + // Zero-extended: Basic cases for various values. + uint32_t test_cases[] = {0, 1, 2, 7, 137, 0x10000, 0x69969669, 0xFFFFFFFF}; + for (uint32_t v : test_cases) { + EXPECT_EQ(uint32_t(v & 0xFF), (GetUnsignedBits<0, 7>(v))); + EXPECT_EQ(uint32_t((v >> 8) & 0xFF), (GetUnsignedBits<8, 15>(v))); + EXPECT_EQ(uint32_t((v >> 16) & 0xFF), (GetUnsignedBits<16, 23>(v))); + EXPECT_EQ(uint32_t((v >> 24) & 0xFF), (GetUnsignedBits<24, 31>(v))); + EXPECT_EQ(uint32_t(v & 0xFFFF), (GetUnsignedBits<0, 15>(v))); + EXPECT_EQ(uint32_t((v >> 1) & 0x3FFFFFFF), (GetUnsignedBits<1, 30>(v))); + EXPECT_EQ(uint32_t((v >> 2) & 0x0FFFFFFF), (GetUnsignedBits<2, 29>(v))); + EXPECT_EQ(uint32_t(v), (GetUnsignedBits<0, 31>(v))); + } + + // Zero-extended: Reading off various nibbles. + EXPECT_EQ(uint32_t(0x4), (GetUnsignedBits<20, 23>(0x00432100U))); + EXPECT_EQ(uint32_t(0x43), (GetUnsignedBits<16, 23>(0x00432100))); + EXPECT_EQ(uint32_t(0x432), (GetUnsignedBits<12, 23>(0x00432100U))); + EXPECT_EQ(uint32_t(0x4321), (GetUnsignedBits<8, 23>(0x00432100))); + EXPECT_EQ(uint32_t(0x321), (GetUnsignedBits<8, 19>(0x00432100U))); + EXPECT_EQ(uint32_t(0x21), (GetUnsignedBits<8, 15>(0x00432100))); + EXPECT_EQ(uint32_t(0x1), (GetUnsignedBits<8, 11>(0x00432100U))); + + // Sign-extended: 0x3CA5 = 0b0011'1100'1010'0101. + EXPECT_EQ(signed16(0xFFFF), (GetSignedBits<0, 0>(0x3CA5U))); + EXPECT_EQ(signed16(0x0001), (GetSignedBits<0, 1>(0x3CA5))); + EXPECT_EQ(signed16(0xFFFD), (GetSignedBits<0, 2>(0x3CA5U))); + EXPECT_EQ(signed16(0x0005), (GetSignedBits<0, 4>(0x3CA5))); + EXPECT_EQ(signed16(0xFFA5), (GetSignedBits<0, 7>(0x3CA5U))); + EXPECT_EQ(signed16(0xFCA5), (GetSignedBits<0, 11>(0x3CA5))); + EXPECT_EQ(signed16(0x0005), (GetSignedBits<0, 3>(0x3CA5U))); + EXPECT_EQ(signed16(0xFFFA), (GetSignedBits<4, 7>(0x3CA5))); + EXPECT_EQ(signed16(0xFFFC), (GetSignedBits<8, 11>(0x3CA5U))); + EXPECT_EQ(signed16(0x0003), (GetSignedBits<12, 15>(0x3CA5))); + EXPECT_EQ(signed16(0x0000), (GetSignedBits<4, 4>(0x3CA5U))); + EXPECT_EQ(signed16(0xFFFF), (GetSignedBits<5, 5>(0x3CA5))); + EXPECT_EQ(signed16(0x0002), (GetSignedBits<4, 6>(0x3CA5U))); + EXPECT_EQ(signed16(0x1E52), (GetSignedBits<1, 14>(0x3CA5))); + EXPECT_EQ(signed16(0xFF29), (GetSignedBits<2, 13>(0x3CA5U))); + EXPECT_EQ(int32_t(0x00001E52), (GetSignedBits<1, 14>(0x3CA5))); + EXPECT_EQ(int32_t(0xFFFFFF29), (GetSignedBits<2, 13>(0x3CA5U))); + + // 64-bits: Extract from middle 0x66 = 0b0110'0110. + EXPECT_EQ(uint64_t(0x0000000000000009LL), + (GetUnsignedBits<30, 33>(int64_t(0x2222222661111111LL)))); + EXPECT_EQ(int64_t(0xFFFFFFFFFFFFFFF9LL), + (GetSignedBits<30, 33>(uint64_t(0x2222222661111111LL)))); +} + +TEST(AlgorithmTest, SignExtend) { + // 0x6A = 0b0110'1010. + EXPECT_EQ(uint8_t(0x00), (SignExtend<uint8_t>(0, 0x6A))); + EXPECT_EQ(signed8(0xFE), (SignExtend<int8_t>(1, signed8(0x6A)))); + EXPECT_EQ(uint8_t(0x02), (SignExtend<uint8_t>(2, 0x6A))); + EXPECT_EQ(signed8(0xFA), (SignExtend<int8_t>(3, signed8(0x6A)))); + EXPECT_EQ(uint8_t(0x0A), (SignExtend<uint8_t>(4, 0x6A))); + EXPECT_EQ(signed8(0xEA), (SignExtend<int8_t>(5, signed8(0x6A)))); + EXPECT_EQ(uint8_t(0xEA), (SignExtend<uint8_t>(6, 0x6A))); + EXPECT_EQ(signed8(0x6A), (SignExtend<int8_t>(7, signed8(0x6A)))); + + EXPECT_EQ(signed16(0xFFFA), (SignExtend<int16_t>(3, 0x6A))); + EXPECT_EQ(uint16_t(0x000A), (SignExtend<uint16_t>(4, 0x6A))); + + EXPECT_EQ(int32_t(0xFFFF8000), (SignExtend<int32_t>(15, 0x00008000))); + EXPECT_EQ(uint32_t(0x00008000U), (SignExtend<uint32_t>(16, 0x00008000))); + EXPECT_EQ(int32_t(0xFFFFFC00), (SignExtend<int32_t>(10, 0x00000400))); + EXPECT_EQ(uint32_t(0xFFFFFFFFU), (SignExtend<uint32_t>(31, 0xFFFFFFFF))); + + EXPECT_EQ(int64_t(0xFFFFFFFFFFFFFE6ALL), + (SignExtend<int64_t>(9, 0x000000000000026ALL))); + EXPECT_EQ(int64_t(0x000000000000016ALL), + (SignExtend<int64_t>(9, 0xFFFFFFFFFFFFFD6ALL))); + EXPECT_EQ(uint64_t(0xFFFFFFFFFFFFFE6AULL), + (SignExtend<uint64_t>(9, 0x000000000000026AULL))); + EXPECT_EQ(uint64_t(0x000000000000016AULL), + (SignExtend<uint64_t>(9, 0xFFFFFFFFFFFFFD6AULL))); +} + +TEST(AlgorithmTest, SignExtendTemplated) { + // 0x6A = 0b0110'1010. + EXPECT_EQ(uint8_t(0x00), (SignExtend<0, uint8_t>(0x6A))); + EXPECT_EQ(signed8(0xFE), (SignExtend<1, int8_t>(signed8(0x6A)))); + EXPECT_EQ(uint8_t(0x02), (SignExtend<2, uint8_t>(0x6A))); + EXPECT_EQ(signed8(0xFA), (SignExtend<3, int8_t>(signed8(0x6A)))); + EXPECT_EQ(uint8_t(0x0A), (SignExtend<4, uint8_t>(0x6A))); + EXPECT_EQ(signed8(0xEA), (SignExtend<5, int8_t>(signed8(0x6A)))); + EXPECT_EQ(uint8_t(0xEA), (SignExtend<6, uint8_t>(0x6A))); + EXPECT_EQ(signed8(0x6A), (SignExtend<7, int8_t>(signed8(0x6A)))); + + EXPECT_EQ(signed16(0xFFFA), (SignExtend<3, int16_t>(0x6A))); + EXPECT_EQ(uint16_t(0x000A), (SignExtend<4, uint16_t>(0x6A))); + + EXPECT_EQ(int32_t(0xFFFF8000), (SignExtend<15, int32_t>(0x00008000))); + EXPECT_EQ(uint32_t(0x00008000U), (SignExtend<16, uint32_t>(0x00008000))); + EXPECT_EQ(int32_t(0xFFFFFC00), (SignExtend<10, int32_t>(0x00000400))); + EXPECT_EQ(uint32_t(0xFFFFFFFFU), (SignExtend<31, uint32_t>(0xFFFFFFFF))); + + EXPECT_EQ(int64_t(0xFFFFFFFFFFFFFE6ALL), + (SignExtend<9, int64_t>(0x000000000000026ALL))); + EXPECT_EQ(int64_t(0x000000000000016ALL), + (SignExtend<9, int64_t>(0xFFFFFFFFFFFFFD6ALL))); + EXPECT_EQ(uint64_t(0xFFFFFFFFFFFFFE6AULL), + (SignExtend<9, uint64_t>(0x000000000000026AULL))); + EXPECT_EQ(uint64_t(0x000000000000016AULL), + (SignExtend<9, uint64_t>(0xFFFFFFFFFFFFFD6AULL))); +} + +TEST(AlgorithmTest, SignedFit) { + for (int v = -0x80; v < 0x80; ++v) { + EXPECT_EQ(v >= -1 && v < 1, (SignedFit<1, int8_t>(v))); + EXPECT_EQ(v >= -1 && v < 1, (SignedFit<1, uint8_t>(v))); + EXPECT_EQ(v >= -2 && v < 2, (SignedFit<2, int8_t>(v))); + EXPECT_EQ(v >= -4 && v < 4, (SignedFit<3, uint8_t>(v))); + EXPECT_EQ(v >= -8 && v < 8, (SignedFit<4, int16_t>(v))); + EXPECT_EQ(v >= -16 && v < 16, (SignedFit<5, uint32_t>(v))); + EXPECT_EQ(v >= -32 && v < 32, (SignedFit<6, int32_t>(v))); + EXPECT_EQ(v >= -64 && v < 64, (SignedFit<7, uint64_t>(v))); + EXPECT_TRUE((SignedFit<8, int8_t>(v))); + EXPECT_TRUE((SignedFit<8, uint8_t>(v))); + } + + EXPECT_TRUE((SignedFit<16, uint32_t>(0x00000000))); + EXPECT_TRUE((SignedFit<16, uint32_t>(0x00007FFF))); + EXPECT_TRUE((SignedFit<16, uint32_t>(0xFFFF8000))); + EXPECT_TRUE((SignedFit<16, uint32_t>(0xFFFFFFFF))); + EXPECT_TRUE((SignedFit<16, int32_t>(0x00007FFF))); + EXPECT_TRUE((SignedFit<16, int32_t>(0xFFFF8000))); + + EXPECT_FALSE((SignedFit<16, uint32_t>(0x80000000))); + EXPECT_FALSE((SignedFit<16, uint32_t>(0x7FFFFFFF))); + EXPECT_FALSE((SignedFit<16, uint32_t>(0x00008000))); + EXPECT_FALSE((SignedFit<16, uint32_t>(0xFFFF7FFF))); + EXPECT_FALSE((SignedFit<16, int32_t>(0x00008000))); + EXPECT_FALSE((SignedFit<16, int32_t>(0xFFFF7FFF))); + + EXPECT_TRUE((SignedFit<48, int64_t>(0x00007FFFFFFFFFFFLL))); + EXPECT_TRUE((SignedFit<48, int64_t>(0xFFFF800000000000LL))); + EXPECT_FALSE((SignedFit<48, int64_t>(0x0008000000000000LL))); + EXPECT_FALSE((SignedFit<48, int64_t>(0xFFFF7FFFFFFFFFFFLL))); +} + +} // namespace zucchini diff --git a/arm_utils.cc b/arm_utils.cc new file mode 100644 index 0000000..2a915a8 --- /dev/null +++ b/arm_utils.cc @@ -0,0 +1,597 @@ +// Copyright 2019 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/arm_utils.h" + +#include "components/zucchini/algorithm.h" + +namespace zucchini { + +namespace { + +inline bool IsMisaligned(rva_t rva, ArmAlign align) { + return (rva & (align - 1)) != 0; +} + +} // namespace + +/******** AArch32Rel32Translator ********/ + +AArch32Rel32Translator::AArch32Rel32Translator() = default; + +// The mapping between ARM instruction "Code" to "Displacement" involves complex +// bit manipulation. The comments below annotate bits mappings using a string. +// * Bits are listed from highest-order to lowerst-order (like in the manual). +// * '0' and '1' denote literals. +// * Uppercase letters denote a single bit in "Code". For example, 'S' denotes +// a sign bit that gets extended in "Displacement". To follow naming in the +// manual, these may enumerated, and written as "(I1)", "(I2)", etc. +// * Lowercase letters denote bit fields with orders preserved. + +// static +ArmAlign AArch32Rel32Translator::DecodeA24(uint32_t code32, arm_disp_t* disp) { + // Handle multiple instructions. Let cccc != 1111: + // B encoding A1: + // Code: cccc1010 Siiiiiii iiiiiiii iiiiiiii + // Displacement: SSSSSSSi iiiiiiii iiiiiiii iiiiii00 + // BL encoding A1: + // Code: cccc1011 Siiiiiii iiiiiiii iiiiiiii + // Displacement: SSSSSSSi iiiiiiii iiiiiiii iiiiii00 + // BLX encoding A2: + // Code: 1111101H Siiiiiii iiiiiiii iiiiiiii + // Displacement: SSSSSSSi iiiiiiii iiiiiiii iiiiiiH0 + uint8_t bits = GetUnsignedBits<24, 27>(code32); + if (bits == 0xA || bits == 0xB) { // B, BL, or BLX. + *disp = GetSignedBits<0, 23>(code32) << 2; + uint8_t cond = GetUnsignedBits<28, 31>(code32); + if (cond == 0xF) { // BLX. + uint32_t H = GetBit<24>(code32); + *disp |= H << 1; + return kArmAlign2; + } + return kArmAlign4; + } + return kArmAlignFail; +} + +// static +bool AArch32Rel32Translator::EncodeA24(arm_disp_t disp, uint32_t* code32) { + uint32_t t = *code32; + uint8_t bits = GetUnsignedBits<24, 27>(t); + if (bits == 0xA || bits == 0xB) { + // B, BL, or BLX. + if (!SignedFit<26>(disp)) // Detect overflow. + return false; + uint8_t cond = GetUnsignedBits<28, 31>(t); + if (cond == 0xF) { + if (disp % 2) // BLX (encoding A2) requires 2-byte alignment. + return false; + uint32_t H = GetBit<1>(disp); + t = (t & 0xFEFFFFFF) | (H << 24); + } else { + if (disp % 4) // B and BL require 4-byte alignment. + return false; + } + t = (t & 0xFF000000) | ((disp >> 2) & 0x00FFFFFF); + *code32 = t; + return true; + } + return false; +} + +// static +bool AArch32Rel32Translator::ReadA24(rva_t instr_rva, + uint32_t code32, + rva_t* target_rva) { + constexpr ArmAlign kInstrAlign = kArmAlign4; + if (IsMisaligned(instr_rva, kInstrAlign)) + return false; + arm_disp_t disp; + ArmAlign target_align = DecodeA24(code32, &disp); + if (target_align == kArmAlignFail) + return false; + *target_rva = GetArmTargetRvaFromDisp(instr_rva, disp, target_align); + return true; +} + +// static +bool AArch32Rel32Translator::WriteA24(rva_t instr_rva, + rva_t target_rva, + uint32_t* code32) { + constexpr ArmAlign kInstrAlign = kArmAlign4; + if (IsMisaligned(instr_rva, kInstrAlign)) + return false; + // Dummy decode to get |target_align|. + arm_disp_t dummy_disp; + ArmAlign target_align = DecodeA24(*code32, &dummy_disp); + if (target_align == kArmAlignFail || IsMisaligned(target_rva, target_align)) + return false; + arm_disp_t disp = + GetArmDispFromTargetRva(instr_rva, target_rva, target_align); + return EncodeA24(disp, code32); +} + +// static +ArmAlign AArch32Rel32Translator::DecodeT8(uint16_t code16, arm_disp_t* disp) { + if ((code16 & 0xF000) == 0xD000 && (code16 & 0x0F00) != 0x0F00) { + // B encoding T1: + // Code: 1101cccc Siiiiiii + // Displacement: SSSSSSSS SSSSSSSS SSSSSSSS iiiiiii0 + *disp = GetSignedBits<0, 7>(code16) << 1; + return kArmAlign2; + } + return kArmAlignFail; +} + +// static +bool AArch32Rel32Translator::EncodeT8(arm_disp_t disp, uint16_t* code16) { + uint16_t t = *code16; + if ((t & 0xF000) == 0xD000 && (t & 0x0F00) != 0x0F00) { + if (disp % 2) // Require 2-byte alignment. + return false; + if (!SignedFit<9>(disp)) // Detect overflow. + return false; + t = (t & 0xFF00) | ((disp >> 1) & 0x00FF); + *code16 = t; + return true; + } + return false; +} + +// static +bool AArch32Rel32Translator::ReadT8(rva_t instr_rva, + uint16_t code16, + rva_t* target_rva) { + constexpr ArmAlign kInstrAlign = kArmAlign2; + if (IsMisaligned(instr_rva, kInstrAlign)) + return false; + arm_disp_t disp; + ArmAlign target_align = DecodeT8(code16, &disp); + if (target_align == kArmAlignFail) + return false; + *target_rva = GetThumb2TargetRvaFromDisp(instr_rva, disp, target_align); + return true; +} + +// static +bool AArch32Rel32Translator::WriteT8(rva_t instr_rva, + rva_t target_rva, + uint16_t* code16) { + constexpr ArmAlign kInstrAlign = kArmAlign2; + constexpr ArmAlign kTargetAlign = kArmAlign2; + if (IsMisaligned(instr_rva, kInstrAlign) || + IsMisaligned(target_rva, kTargetAlign)) { + return false; + } + arm_disp_t disp = + GetThumb2DispFromTargetRva(instr_rva, target_rva, kTargetAlign); + return EncodeT8(disp, code16); +} + +// static +ArmAlign AArch32Rel32Translator::DecodeT11(uint16_t code16, arm_disp_t* disp) { + if ((code16 & 0xF800) == 0xE000) { + // B encoding T2: + // Code: 11100Sii iiiiiiii + // Displacement: SSSSSSSS SSSSSSSS SSSSSiii iiiiiii0 + *disp = GetSignedBits<0, 10>(code16) << 1; + return kArmAlign2; + } + return kArmAlignFail; +} + +// static +bool AArch32Rel32Translator::EncodeT11(arm_disp_t disp, uint16_t* code16) { + uint16_t t = *code16; + if ((t & 0xF800) == 0xE000) { + if (disp % 2) // Require 2-byte alignment. + return false; + if (!SignedFit<12>(disp)) // Detect overflow. + return false; + t = (t & 0xF800) | ((disp >> 1) & 0x07FF); + *code16 = t; + return true; + } + return false; +} + +// static +bool AArch32Rel32Translator::ReadT11(rva_t instr_rva, + uint16_t code16, + rva_t* target_rva) { + constexpr ArmAlign kInstrAlign = kArmAlign2; + if (IsMisaligned(instr_rva, kInstrAlign)) + return false; + arm_disp_t disp; + ArmAlign target_align = DecodeT11(code16, &disp); + if (target_align == kArmAlignFail) + return false; + *target_rva = GetThumb2TargetRvaFromDisp(instr_rva, disp, target_align); + return true; +} + +// static +bool AArch32Rel32Translator::WriteT11(rva_t instr_rva, + rva_t target_rva, + uint16_t* code16) { + constexpr ArmAlign kInstrAlign = kArmAlign2; + constexpr ArmAlign kTargetAlign = kArmAlign2; + if (IsMisaligned(instr_rva, kInstrAlign) || + IsMisaligned(target_rva, kTargetAlign)) { + return false; + } + arm_disp_t disp = + GetThumb2DispFromTargetRva(instr_rva, target_rva, kTargetAlign); + return EncodeT11(disp, code16); +} + +// static +ArmAlign AArch32Rel32Translator::DecodeT20(uint32_t code32, arm_disp_t* disp) { + if ((code32 & 0xF800D000) == 0xF0008000 && + (code32 & 0x03C00000) != 0x03C00000) { + // B encoding T3. Note the reversal of "(J1)" and "(J2)". + // Code: 11110Scc cciiiiii 10(J1)0(J2)jjj jjjjjjjj + // Displacement: SSSSSSSS SSSS(J2)(J1)ii iiiijjjj jjjjjjj0 + uint32_t imm11 = GetUnsignedBits<0, 10>(code32); // jj...j. + uint32_t J2 = GetBit<11>(code32); + uint32_t J1 = GetBit<13>(code32); + uint32_t imm6 = GetUnsignedBits<16, 21>(code32); // ii...i. + uint32_t S = GetBit<26>(code32); + uint32_t t = (imm6 << 12) | (imm11 << 1); + t |= (S << 20) | (J2 << 19) | (J1 << 18); + *disp = SignExtend<20, int32_t>(t); + return kArmAlign2; + } + return kArmAlignFail; +} + +// static +bool AArch32Rel32Translator::EncodeT20(arm_disp_t disp, uint32_t* code32) { + uint32_t t = *code32; + if ((t & 0xF800D000) == 0xF0008000 && (t & 0x03C00000) != 0x03C00000) { + if (disp % 2) // Require 2-byte alignment. + return false; + if (!SignedFit<21>(disp)) // Detect overflow. + return false; + uint32_t S = GetBit<20>(disp); + uint32_t J2 = GetBit<19>(disp); + uint32_t J1 = GetBit<18>(disp); + uint32_t imm6 = GetUnsignedBits<12, 17>(disp); // ii...i. + uint32_t imm11 = GetUnsignedBits<1, 11>(disp); // jj...j. + t &= 0xFBC0D000; + t |= (S << 26) | (imm6 << 16) | (J1 << 13) | (J2 << 11) | imm11; + *code32 = t; + return true; + } + return false; +} + +// static +bool AArch32Rel32Translator::ReadT20(rva_t instr_rva, + uint32_t code32, + rva_t* target_rva) { + constexpr ArmAlign kInstrAlign = kArmAlign2; + if (IsMisaligned(instr_rva, kInstrAlign)) + return false; + arm_disp_t disp; + ArmAlign target_align = DecodeT20(code32, &disp); + if (target_align == kArmAlignFail) + return false; + *target_rva = GetThumb2TargetRvaFromDisp(instr_rva, disp, target_align); + return true; +} + +// static +bool AArch32Rel32Translator::WriteT20(rva_t instr_rva, + rva_t target_rva, + uint32_t* code32) { + constexpr ArmAlign kInstrAlign = kArmAlign2; + constexpr ArmAlign kTargetAlign = kArmAlign2; + if (IsMisaligned(instr_rva, kInstrAlign) || + IsMisaligned(target_rva, kTargetAlign)) { + return false; + } + arm_disp_t disp = + GetThumb2DispFromTargetRva(instr_rva, target_rva, kTargetAlign); + return EncodeT20(disp, code32); +} + +// static +ArmAlign AArch32Rel32Translator::DecodeT24(uint32_t code32, arm_disp_t* disp) { + uint32_t bits = code32 & 0xF800D000; + if (bits == 0xF0009000 || bits == 0xF000D000 || bits == 0xF000C000) { + // Let I1 = J1 ^ S ^ 1, I2 = J2 ^ S ^ 1. + // B encoding T4: + // Code: 11110Sii iiiiiiii 10(J1)1(J2)jjj jjjjjjjj + // Displacement: SSSSSSSS (I1)(I2)iiiiii iiiijjjj jjjjjjj0 + // BL encoding T1: + // Code: 11110Sii iiiiiiii 11(J1)1(J2)jjj jjjjjjjj + // Displacement: SSSSSSSS (I1)(I2)iiiiii iiiijjjj jjjjjjj0 + // BLX encoding T2: H should be 0: + // Code: 11110Sii iiiiiiii 11(J1)0(J2)jjj jjjjjjjH + // Displacement: SSSSSSSS (I1)(I2)iiiiii iiiijjjj jjjjjjH0 + uint32_t imm11 = GetUnsignedBits<0, 10>(code32); // jj...j. + uint32_t J2 = GetBit<11>(code32); + uint32_t J1 = GetBit<13>(code32); + uint32_t imm10 = GetUnsignedBits<16, 25>(code32); // ii...i. + uint32_t S = GetBit<26>(code32); + uint32_t t = (imm10 << 12) | (imm11 << 1); + t |= (S << 24) | ((J1 ^ S ^ 1) << 23) | ((J2 ^ S ^ 1) << 22); + t = SignExtend<24, int32_t>(t); + // BLX encoding T2 requires final target to be 4-byte aligned by rounding + // downward. This is applied to |t| *after* clipping. + ArmAlign target_align = kArmAlign2; + if (bits == 0xF000C000) { + uint32_t H = GetBit<0>(code32); + if (H) + return kArmAlignFail; // Illegal instruction: H must be 0. + target_align = kArmAlign4; + } + *disp = static_cast<int32_t>(t); + return target_align; + } + return kArmAlignFail; +} + +// static +bool AArch32Rel32Translator::EncodeT24(arm_disp_t disp, uint32_t* code32) { + uint32_t t = *code32; + uint32_t bits = t & 0xF800D000; + if (bits == 0xF0009000 || bits == 0xF000D000 || bits == 0xF000C000) { + if (disp % 2) // Require 2-byte alignment. + return false; + // BLX encoding T2 requires H == 0, and that |disp| results in |target_rva| + // with a 4-byte aligned address. + if (bits == 0xF000C000) { + uint32_t H = GetBit<1>(disp); + if (H) + return false; // Illegal |disp|: H must be 0. + } + if (!SignedFit<25>(disp)) // Detect overflow. + return false; + uint32_t imm11 = GetUnsignedBits<1, 11>(disp); // jj...j. + uint32_t imm10 = GetUnsignedBits<12, 21>(disp); // ii...i. + uint32_t I2 = GetBit<22>(disp); + uint32_t I1 = GetBit<23>(disp); + uint32_t S = GetBit<24>(disp); + t &= 0xF800D000; + t |= (S << 26) | (imm10 << 16) | ((I1 ^ S ^ 1) << 13) | + ((I2 ^ S ^ 1) << 11) | imm11; + *code32 = t; + return true; + } + return false; +} + +// static +bool AArch32Rel32Translator::ReadT24(rva_t instr_rva, + uint32_t code32, + rva_t* target_rva) { + constexpr ArmAlign kInstrAlign = kArmAlign2; + if (IsMisaligned(instr_rva, kInstrAlign)) + return false; + arm_disp_t disp; + ArmAlign target_align = DecodeT24(code32, &disp); + if (target_align == kArmAlignFail) + return false; + *target_rva = GetThumb2TargetRvaFromDisp(instr_rva, disp, target_align); + return true; +} + +// static +bool AArch32Rel32Translator::WriteT24(rva_t instr_rva, + rva_t target_rva, + uint32_t* code32) { + constexpr ArmAlign kInstrAlign = kArmAlign2; + if (IsMisaligned(instr_rva, kInstrAlign)) + return false; + // Dummy decode to get |target_align|. + arm_disp_t dummy_disp; + ArmAlign target_align = DecodeT24(*code32, &dummy_disp); + if (target_align == kArmAlignFail || IsMisaligned(target_rva, target_align)) + return false; + arm_disp_t disp = + GetThumb2DispFromTargetRva(instr_rva, target_rva, target_align); + return EncodeT24(disp, code32); +} + +/******** AArch64Rel32Translator ********/ + +AArch64Rel32Translator::AArch64Rel32Translator() = default; + +// static +ArmAlign AArch64Rel32Translator::DecodeImmd14(uint32_t code32, + arm_disp_t* disp) { + // TBZ: + // Code: b0110110 bbbbbSii iiiiiiii iiittttt + // Displacement: SSSSSSSS SSSSSSSS Siiiiiii iiiiii00 + // TBNZ: + // Code: b0110111 bbbbbSii iiiiiiii iiittttt + // Displacement: SSSSSSSS SSSSSSSS Siiiiiii iiiiii00 + uint32_t bits = code32 & 0x7F000000; + if (bits == 0x36000000 || bits == 0x37000000) { + *disp = GetSignedBits<5, 18>(code32) << 2; + return kArmAlign4; + } + return kArmAlignFail; +} + +// static +bool AArch64Rel32Translator::EncodeImmd14(arm_disp_t disp, uint32_t* code32) { + uint32_t t = *code32; + uint32_t bits = t & 0x7F000000; + if (bits == 0x36000000 || bits == 0x37000000) { + if (disp % 4) // Require 4-byte alignment. + return false; + if (!SignedFit<16>(disp)) // Detect overflow. + return false; + uint32_t imm14 = GetUnsignedBits<2, 15>(disp); // ii...i. + t &= 0xFFF8001F; + t |= imm14 << 5; + *code32 = t; + return true; + } + return false; +} + +// static +bool AArch64Rel32Translator::ReadImmd14(rva_t instr_rva, + uint32_t code32, + rva_t* target_rva) { + constexpr ArmAlign kInstrAlign = kArmAlign4; + if (IsMisaligned(instr_rva, kInstrAlign)) + return false; + arm_disp_t disp; + if (DecodeImmd14(code32, &disp) == kArmAlignFail) + return false; + *target_rva = GetTargetRvaFromDisp(instr_rva, disp); + return true; +} + +// static +bool AArch64Rel32Translator::WriteImmd14(rva_t instr_rva, + rva_t target_rva, + uint32_t* code32) { + constexpr ArmAlign kInstrAlign = kArmAlign4; + constexpr ArmAlign kTargetAlign = kArmAlign4; + if (IsMisaligned(instr_rva, kInstrAlign) || + IsMisaligned(target_rva, kTargetAlign)) { + return false; + } + arm_disp_t disp = GetDispFromTargetRva(instr_rva, target_rva); + return EncodeImmd14(disp, code32); +} + +// static +ArmAlign AArch64Rel32Translator::DecodeImmd19(uint32_t code32, + arm_disp_t* disp) { + // B.cond: + // Code: 01010100 Siiiiiii iiiiiiii iii0cccc + // Displacement: SSSSSSSS SSSSiiii iiiiiiii iiiiii00 + // CBZ: + // Code: z0110100 Siiiiiii iiiiiiii iiittttt + // Displacement: SSSSSSSS SSSSiiii iiiiiiii iiiiii00 + // CBNZ: + // Code: z0110101 Siiiiiii iiiiiiii iiittttt + // Displacement: SSSSSSSS SSSSiiii iiiiiiii iiiiii00 + uint32_t bits1 = code32 & 0xFF000010; + uint32_t bits2 = code32 & 0x7F000000; + if (bits1 == 0x54000000 || bits2 == 0x34000000 || bits2 == 0x35000000) { + *disp = GetSignedBits<5, 23>(code32) << 2; + return kArmAlign4; + } + return kArmAlignFail; +} + +// static +bool AArch64Rel32Translator::EncodeImmd19(arm_disp_t disp, uint32_t* code32) { + uint32_t t = *code32; + uint32_t bits1 = t & 0xFF000010; + uint32_t bits2 = t & 0x7F000000; + if (bits1 == 0x54000000 || bits2 == 0x34000000 || bits2 == 0x35000000) { + if (disp % 4) // Require 4-byte alignment. + return false; + if (!SignedFit<21>(disp)) // Detect overflow. + return false; + uint32_t imm19 = GetUnsignedBits<2, 20>(disp); // ii...i. + t &= 0xFF00001F; + t |= imm19 << 5; + *code32 = t; + return true; + } + return false; +} + +// static +bool AArch64Rel32Translator::ReadImmd19(rva_t instr_rva, + uint32_t code32, + rva_t* target_rva) { + constexpr ArmAlign kInstrAlign = kArmAlign4; + if (IsMisaligned(instr_rva, kInstrAlign)) + return false; + arm_disp_t disp; + if (DecodeImmd19(code32, &disp) == kArmAlignFail) + return false; + *target_rva = GetTargetRvaFromDisp(instr_rva, disp); + return true; +} + +// static +bool AArch64Rel32Translator::WriteImmd19(rva_t instr_rva, + rva_t target_rva, + uint32_t* code32) { + constexpr ArmAlign kInstrAlign = kArmAlign4; + constexpr ArmAlign kTargetAlign = kArmAlign4; + if (IsMisaligned(instr_rva, kInstrAlign) || + IsMisaligned(target_rva, kTargetAlign)) { + return false; + } + arm_disp_t disp = GetDispFromTargetRva(instr_rva, target_rva); + return EncodeImmd19(disp, code32); +} + +// static +ArmAlign AArch64Rel32Translator::DecodeImmd26(uint32_t code32, + arm_disp_t* disp) { + // B: + // Code: 000101Si iiiiiiii iiiiiiii iiiiiiii + // Displacement: SSSSSiii iiiiiiii iiiiiiii iiiiii00 + // BL: + // Code: 100101Si iiiiiiii iiiiiiii iiiiiiii + // Displacement: SSSSSiii iiiiiiii iiiiiiii iiiiii00 + uint32_t bits = code32 & 0xFC000000; + if (bits == 0x14000000 || bits == 0x94000000) { + *disp = GetSignedBits<0, 25>(code32) << 2; + return kArmAlign4; + } + return kArmAlignFail; +} + +// static +bool AArch64Rel32Translator::EncodeImmd26(arm_disp_t disp, uint32_t* code32) { + uint32_t t = *code32; + uint32_t bits = t & 0xFC000000; + if (bits == 0x14000000 || bits == 0x94000000) { + if (disp % 4) // Require 4-byte alignment. + return false; + if (!SignedFit<28>(disp)) // Detect overflow. + return false; + uint32_t imm26 = GetUnsignedBits<2, 27>(disp); // ii...i. + t &= 0xFC000000; + t |= imm26; + *code32 = t; + return true; + } + return false; +} + +// static +bool AArch64Rel32Translator::ReadImmd26(rva_t instr_rva, + uint32_t code32, + rva_t* target_rva) { + constexpr ArmAlign kInstrAlign = kArmAlign4; + if (IsMisaligned(instr_rva, kInstrAlign)) + return false; + arm_disp_t disp; + if (DecodeImmd26(code32, &disp) == kArmAlignFail) + return false; + *target_rva = GetTargetRvaFromDisp(instr_rva, disp); + return true; +} + +// static +bool AArch64Rel32Translator::WriteImmd26(rva_t instr_rva, + rva_t target_rva, + uint32_t* code32) { + constexpr ArmAlign kInstrAlign = kArmAlign4; + constexpr ArmAlign kTargetAlign = kArmAlign4; + if (IsMisaligned(instr_rva, kInstrAlign) || + IsMisaligned(target_rva, kTargetAlign)) { + return false; + } + arm_disp_t disp = GetDispFromTargetRva(instr_rva, target_rva); + return EncodeImmd26(disp, code32); +} + +} // namespace zucchini diff --git a/arm_utils.h b/arm_utils.h new file mode 100644 index 0000000..8664f3e --- /dev/null +++ b/arm_utils.h @@ -0,0 +1,423 @@ +// Copyright 2019 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_ARM_UTILS_H_ +#define COMPONENTS_ZUCCHINI_ARM_UTILS_H_ + +#include <stddef.h> +#include <stdint.h> + +#include "base/check_op.h" +#include "components/zucchini/address_translator.h" +#include "components/zucchini/buffer_view.h" + +namespace zucchini { + +// References: +// * AArch32 (32-bit ARM, AKA ARM32): +// https://static.docs.arm.com/ddi0406/c/DDI0406C_C_arm_architecture_reference_manual.pdf +// * AArch64 (64-bit ARM): +// https://static.docs.arm.com/ddi0487/da/DDI0487D_a_armv8_arm.pdf + +// Definitions (used in Zucchini): +// * |instr_rva|: Instruction RVA: The RVA where an instruction is located. In +// ARM mode and for AArch64 this is 4-byte aligned; in THUMB2 mode this is +// 2-byte aligned. +// * |code|: Instruction code: ARM instruction code as seen in manual. In ARM +// mode and for AArch64, this is a 32-bit int. In THUMB2 mode, this may be a +// 16-bit or 32-bit int. +// * |disp|: Displacement: For branch instructions (e.g.: B, BL, BLX, and +// conditional varieties) this is the value encoded in instruction bytes. +// * PC: Program Counter: In ARM mode this is |instr_rva + 8|; in THUMB2 mode +// this is |instr_rva + 4|; for AArch64 this is |instr_rva|. +// * |target_rva|: Target RVA: The RVA targeted by a branch instruction. +// +// These are related by: +// |code| = Fetch(image data at offset(|instr_rva|)). +// |disp| = Decode(|code|). +// PC = |instr_rva| + {8 in ARM mode, 4 in THUMB2 mode, 0 for AArch64}. +// |target_rva| = PC + |disp| - (see "BLX complication" below) +// +// Example 1 (ARM mode): +// 00103050: 00 01 02 EA B 00183458 +// |instr_rva| = 0x00103050 (4-byte aligned). +// |code| = 0xEA020100 (little endian fetched from data). +// |disp| = 0x00080400 (decoded from |code| with A24 -> B encoding T1). +// PC = |instr_rva| + 8 = 0x00103058 (ARM mode). +// |target_rva| = PC + |disp| = 0x00183458. +// +// Example 2 (THUMB2 mode): +// 001030A2: 00 F0 01 FA BL 001034A8 +// |instr_rva| = 0x001030A2 (2-byte aligned). +// |code| = 0xF000FA01 (special THUMB2 mode data fetch). +// |disp| = 0x00000402 (decoded from |code| with T24 -> BL encoding T1). +// PC = |instr_rva| + 4 = 0x001030A6 (THUMB2 mode). +// |target_rva| = PC + |disp| = 0x001034A8. +// +// Example 3 (AArch64): +// 0000000000305070: 03 02 01 14 B 000000000034587C +// |instr_rva| = 0x00305070 (4-byte aligned, assumed to fit in 32-bit). +// |code| = 0x14010203 (little endian fetchd from data). +// |disp| = 0x0004080C (decoded from |code| with Immd -> B). +// PC = |instr_rva| = 0x00305070 (AArch64). +// |target_rva| = PC + |disp| = 0x0034587C. + +// BLX complication: BLX transits between ARM mode and THUMB2 mode, and branches +// to an address. Therefore |instr_rva| must align by the "old" mode, and +// |target_rva| must align by the "new" mode. In particular: +// * BLX encoding A2 (ARM -> THUMB2): |instr_rva| is 4-byte aligned with +// PC = |instr_rva| + 8; |target_rva| is 2-byte aligned, and so |disp| is +// 2-byte aligned. +// * BLX encoding T2 (THUMB2 -> ARM): |instr_rva| is 2-byte aligned with +// PC = |instr_rva| + 4; |target_rva| is 4-byte aligned. Complication: BLX +// encoding T2 stores a bit |H| that corresponds to "2" in binary, but |H| +// must be set to 0. Thus the encoded value is effectively 4-byte aligned. So +// when computing |target_rva| by adding PC (2-byte aligned) to the stored +// value (4-byte aligned), the result must be rounded down to the nearest +// 4-byte aligned address. +// The last situation creates ambiguity in how |disp| is defined! Alternatives: +// (1) |disp| := |target_rva| - PC: So |code| <-> |disp| for BLX encoding T2, +// requires |instr_rva| % 4 to be determined, and adjustments made. +// (2) |disp| := Value stored in |code|: So |disp| <-> |target_rva| for BLX +// encoding T2 requires adjustment: |disp| -> |target_rva| needs to round +// down, whereas |target_rva| -> |disp| needs to round up. +// We adopt (2) to simplify |code| <-> |disp|, since that gets used. + +using arm_disp_t = int32_t; + +// Alignment requirement for |target_rva|, useful for |disp| <-> |target_rva| +// (also requires |instr_rva|). Alignment is determined by parsing |code| in +// *Decode() functions. kArmAlignFail is also defined to indicate parse failure. +// Alignments can be 2 or 4. These values are also used in the enum, so +// |x % align| with |x & (align - 1)| to compute alignment. +enum ArmAlign : uint32_t { + kArmAlignFail = 0U, + kArmAlign2 = 2U, + kArmAlign4 = 4U, +}; + +// Traits for rel32 address types (technically rel64 for AArch64 -- but we +// assume values are small enough), which form collections of strategies to +// process each rel32 address type. +template <typename ENUM_ADDR_TYPE, + ENUM_ADDR_TYPE ADDR_TYPE, + typename CODE_T, + CODE_T (*FETCH)(ConstBufferView, offset_t), + void (*STORE)(MutableBufferView, offset_t, CODE_T), + ArmAlign (*DECODE)(CODE_T, arm_disp_t*), + bool (*ENCODE)(arm_disp_t, CODE_T*), + bool (*READ)(rva_t, CODE_T, rva_t*), + bool (*WRITE)(rva_t, rva_t, CODE_T*)> +class ArmAddrTraits { + public: + static constexpr ENUM_ADDR_TYPE addr_type = ADDR_TYPE; + using code_t = CODE_T; + static constexpr CODE_T (*Fetch)(ConstBufferView, offset_t) = FETCH; + static constexpr void (*Store)(MutableBufferView, offset_t, CODE_T) = STORE; + static constexpr ArmAlign (*Decode)(CODE_T, arm_disp_t*) = DECODE; + static constexpr bool (*Encode)(arm_disp_t, CODE_T*) = ENCODE; + static constexpr bool (*Read)(rva_t, CODE_T, rva_t*) = READ; + static constexpr bool (*Write)(rva_t, rva_t, CODE_T*) = WRITE; +}; + +// Given THUMB2 instruction |code16|, returns 2 if it's from a 16-bit THUMB2 +// instruction, or 4 if it's from a 32-bit THUMB2 instruction. +inline int GetThumb2InstructionSize(uint16_t code16) { + return ((code16 & 0xF000) == 0xF000 || (code16 & 0xF800) == 0xE800) ? 4 : 2; +} + +// A translator for ARM mode and THUMB2 mode with static functions that +// translate among |code|, |disp|, and |target_rva|. +class AArch32Rel32Translator { + public: + // Rel32 address types enumeration. + enum AddrType : uint8_t { + ADDR_NONE = 0xFF, + // Naming: Here "A24" represents ARM mode instructions where |code| + // dedicates 24 bits (including sign bit) to specify |disp|. Similarly, "T8" + // represents THUMB2 mode instructions with 8 bits for |disp|. Currently + // only {A24, T8, T11, T20, T24} are defined. These are not to be confused + // with "B encoding A1", "B encoding T3", etc., which are specific encoding + // schemes given by the manual for the "B" (or other) instructions (only + // {A1, A2, T1, T2, T3, T4} are seen). + ADDR_A24 = 0, + ADDR_T8, + ADDR_T11, + ADDR_T20, + ADDR_T24, + NUM_ADDR_TYPE + }; + + AArch32Rel32Translator(); + AArch32Rel32Translator(const AArch32Rel32Translator&) = delete; + const AArch32Rel32Translator& operator=(const AArch32Rel32Translator&) = + delete; + + // Fetches the 32-bit ARM instruction |code| at |view[idx]|. + static inline uint32_t FetchArmCode32(ConstBufferView view, offset_t idx) { + return view.read<uint32_t>(idx); + } + + // Fetches the 16-bit THUMB2 instruction |code| at |view[idx]|. + static inline uint16_t FetchThumb2Code16(ConstBufferView view, offset_t idx) { + return view.read<uint16_t>(idx); + } + + // Fetches the 32-bit THUMB2 instruction |code| at |view[idx]|. + static inline uint32_t FetchThumb2Code32(ConstBufferView view, offset_t idx) { + // By convention, 32-bit THUMB2 instructions are written (as seen later) as: + // [byte3, byte2, byte1, byte0]. + // However (assuming little-endian ARM) the in-memory representation is + // [byte2, byte3, byte0, byte1]. + return (static_cast<uint32_t>(view.read<uint16_t>(idx)) << 16) | + view.read<uint16_t>(idx + 2); + } + + // Stores the 32-bit ARM instruction |code| to |mutable_view[idx]|. + static inline void StoreArmCode32(MutableBufferView mutable_view, + offset_t idx, + uint32_t code) { + mutable_view.write<uint32_t>(idx, code); + } + + // Stores the 16-bit THUMB2 instruction |code| to |mutable_view[idx]|. + static inline void StoreThumb2Code16(MutableBufferView mutable_view, + offset_t idx, + uint16_t code) { + mutable_view.write<uint16_t>(idx, code); + } + + // Stores the next 32-bit THUMB2 instruction |code| to |mutable_view[idx]|. + static inline void StoreThumb2Code32(MutableBufferView mutable_view, + offset_t idx, + uint32_t code) { + mutable_view.write<uint16_t>(idx, static_cast<uint16_t>(code >> 16)); + mutable_view.write<uint16_t>(idx + 2, static_cast<uint16_t>(code & 0xFFFF)); + } + + // The following functions convert |code| (16-bit or 32-bit) from/to |disp| + // or |target_rva|, for specific branch instruction types. + // Read*() and write*() functions convert between |code| and |target_rva|. + // * Decode*() determines whether |code16/code32| is a branch instruction + // of a specific type. If so, then extracts |*disp| and returns the required + // ArmAlign. Otherwise returns kArmAlignFail. + // * Encode*() determines whether |*code16/*code32| is a branch instruction of + // a specific type, and whether it can accommodate |disp|. If so, then + // re-encodes |*code32| using |disp|, and returns true. Otherwise returns + // false. + // * Read*() is similar to Decode*(), but on success, extracts |*target_rva| + // using |instr_rva| as aid, performs the proper alignment, and returns + // true. Otherwise returns false. + // * Write*() is similar to Encode*(), takes |target_rva| instead, and uses + // |instr_rva| as aid. + static ArmAlign DecodeA24(uint32_t code32, arm_disp_t* disp); + static bool EncodeA24(arm_disp_t disp, uint32_t* code32); + // TODO(huangs): Refactor the Read*() functions: These are identical + // except for Decode*() and Get*TargetRvaFromDisp(). + static bool ReadA24(rva_t instr_rva, uint32_t code32, rva_t* target_rva); + static bool WriteA24(rva_t instr_rva, rva_t target_rva, uint32_t* code32); + + static ArmAlign DecodeT8(uint16_t code16, arm_disp_t* disp); + static bool EncodeT8(arm_disp_t disp, uint16_t* code16); + static bool ReadT8(rva_t instr_rva, uint16_t code16, rva_t* target_rva); + static bool WriteT8(rva_t instr_rva, rva_t target_rva, uint16_t* code16); + + static ArmAlign DecodeT11(uint16_t code16, arm_disp_t* disp); + static bool EncodeT11(arm_disp_t disp, uint16_t* code16); + static bool ReadT11(rva_t instr_rva, uint16_t code16, rva_t* target_rva); + static bool WriteT11(rva_t instr_rva, rva_t target_rva, uint16_t* code16); + + static ArmAlign DecodeT20(uint32_t code32, arm_disp_t* disp); + static bool EncodeT20(arm_disp_t disp, uint32_t* code32); + static bool ReadT20(rva_t instr_rva, uint32_t code32, rva_t* target_rva); + static bool WriteT20(rva_t instr_rva, rva_t target_rva, uint32_t* code32); + + static ArmAlign DecodeT24(uint32_t code32, arm_disp_t* disp); + static bool EncodeT24(arm_disp_t disp, uint32_t* code32); + static bool ReadT24(rva_t instr_rva, uint32_t code32, rva_t* target_rva); + static bool WriteT24(rva_t instr_rva, rva_t target_rva, uint32_t* code32); + + // Computes |target_rva| from |instr_rva| and |disp| in ARM mode. + static inline rva_t GetArmTargetRvaFromDisp(rva_t instr_rva, + arm_disp_t disp, + ArmAlign align) { + rva_t ret = static_cast<rva_t>(instr_rva + 8 + disp); + // Align down. + DCHECK_NE(align, kArmAlignFail); + return ret - (ret & static_cast<rva_t>(align - 1)); + } + + // Computes |target_rva| from |instr_rva| and |disp| in THUMB2 mode. + static inline rva_t GetThumb2TargetRvaFromDisp(rva_t instr_rva, + arm_disp_t disp, + ArmAlign align) { + rva_t ret = static_cast<rva_t>(instr_rva + 4 + disp); + // Align down. + DCHECK_NE(align, kArmAlignFail); + return ret - (ret & static_cast<rva_t>(align - 1)); + } + + // Computes |disp| from |instr_rva| and |target_rva| in ARM mode. + static inline arm_disp_t GetArmDispFromTargetRva(rva_t instr_rva, + rva_t target_rva, + ArmAlign align) { + // Assumes that |instr_rva + 8| does not overflow. + arm_disp_t ret = static_cast<arm_disp_t>(target_rva) - + static_cast<arm_disp_t>(instr_rva + 8); + // Align up. + DCHECK_NE(align, kArmAlignFail); + return ret + ((-ret) & static_cast<arm_disp_t>(align - 1)); + } + + // Computes |disp| from |instr_rva| and |target_rva| in THUMB2 mode. + static inline arm_disp_t GetThumb2DispFromTargetRva(rva_t instr_rva, + rva_t target_rva, + ArmAlign align) { + // Assumes that |instr_rva + 4| does not overflow. + arm_disp_t ret = static_cast<arm_disp_t>(target_rva) - + static_cast<arm_disp_t>(instr_rva + 4); + // Align up. + DCHECK_NE(align, kArmAlignFail); + return ret + ((-ret) & static_cast<arm_disp_t>(align - 1)); + } + + // Strategies to process each rel32 address type. + using AddrTraits_A24 = ArmAddrTraits<AddrType, + ADDR_A24, + uint32_t, + FetchArmCode32, + StoreArmCode32, + DecodeA24, + EncodeA24, + ReadA24, + WriteA24>; + using AddrTraits_T8 = ArmAddrTraits<AddrType, + ADDR_T8, + uint16_t, + FetchThumb2Code16, + StoreThumb2Code16, + DecodeT8, + EncodeT8, + ReadT8, + WriteT8>; + using AddrTraits_T11 = ArmAddrTraits<AddrType, + ADDR_T11, + uint16_t, + FetchThumb2Code16, + StoreThumb2Code16, + DecodeT11, + EncodeT11, + ReadT11, + WriteT11>; + using AddrTraits_T20 = ArmAddrTraits<AddrType, + ADDR_T20, + uint32_t, + FetchThumb2Code32, + StoreThumb2Code32, + DecodeT20, + EncodeT20, + ReadT20, + WriteT20>; + using AddrTraits_T24 = ArmAddrTraits<AddrType, + ADDR_T24, + uint32_t, + FetchThumb2Code32, + StoreThumb2Code32, + DecodeT24, + EncodeT24, + ReadT24, + WriteT24>; +}; + +// Translator for AArch64, which is simpler than 32-bit ARM. Although pointers +// are 64-bit, displacements are within 32-bit. +class AArch64Rel32Translator { + public: + // Rel64 address types enumeration. + enum AddrType : uint8_t { + ADDR_NONE = 0xFF, + ADDR_IMMD14 = 0, + ADDR_IMMD19, + ADDR_IMMD26, + NUM_ADDR_TYPE + }; + + // Although RVA for 64-bit architecture can be 64-bit in length, we make the + // bold assumption that for ELF images that RVA will stay nicely in 32-bit! + AArch64Rel32Translator(); + AArch64Rel32Translator(const AArch64Rel32Translator&) = delete; + const AArch64Rel32Translator& operator=(const AArch64Rel32Translator&) = + delete; + + static inline uint32_t FetchCode32(ConstBufferView view, offset_t idx) { + return view.read<uint32_t>(idx); + } + + static inline void StoreCode32(MutableBufferView mutable_view, + offset_t idx, + uint32_t code) { + mutable_view.write<uint32_t>(idx, code); + } + + // Conversion functions for |code32| from/to |disp| or |target_rva|, similar + // to the counterparts in AArch32Rel32Translator. + static ArmAlign DecodeImmd14(uint32_t code32, arm_disp_t* disp); + static bool EncodeImmd14(arm_disp_t disp, uint32_t* code32); + // TODO(huangs): Refactor the Read*() functions: These are identical + // except for Decode*(). + static bool ReadImmd14(rva_t instr_rva, uint32_t code32, rva_t* target_rva); + static bool WriteImmd14(rva_t instr_rva, rva_t target_rva, uint32_t* code32); + + static ArmAlign DecodeImmd19(uint32_t code32, arm_disp_t* disp); + static bool EncodeImmd19(arm_disp_t disp, uint32_t* code32); + static bool ReadImmd19(rva_t instr_rva, uint32_t code32, rva_t* target_rva); + static bool WriteImmd19(rva_t instr_rva, rva_t target_rva, uint32_t* code32); + + static ArmAlign DecodeImmd26(uint32_t code32, arm_disp_t* disp); + static bool EncodeImmd26(arm_disp_t disp, uint32_t* code32); + static bool ReadImmd26(rva_t instr_rva, uint32_t code32, rva_t* target_rva); + static bool WriteImmd26(rva_t instr_rva, rva_t target_rva, uint32_t* code32); + + static inline rva_t GetTargetRvaFromDisp(rva_t instr_rva, arm_disp_t disp) { + return static_cast<rva_t>(instr_rva + disp); + } + + static inline arm_disp_t GetDispFromTargetRva(rva_t instr_rva, + rva_t target_rva) { + return static_cast<arm_disp_t>(target_rva - instr_rva); + } + + // Strategies to process each rel32 address type. + using AddrTraits_Immd14 = ArmAddrTraits<AddrType, + ADDR_IMMD14, + uint32_t, + FetchCode32, + StoreCode32, + DecodeImmd14, + EncodeImmd14, + ReadImmd14, + WriteImmd14>; + using AddrTraits_Immd19 = ArmAddrTraits<AddrType, + ADDR_IMMD19, + uint32_t, + FetchCode32, + StoreCode32, + DecodeImmd19, + EncodeImmd19, + ReadImmd19, + WriteImmd19>; + using AddrTraits_Immd26 = ArmAddrTraits<AddrType, + ADDR_IMMD26, + uint32_t, + FetchCode32, + StoreCode32, + DecodeImmd26, + EncodeImmd26, + ReadImmd26, + WriteImmd26>; +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_ARM_UTILS_H_ diff --git a/arm_utils_unittest.cc b/arm_utils_unittest.cc new file mode 100644 index 0000000..8109c92 --- /dev/null +++ b/arm_utils_unittest.cc @@ -0,0 +1,862 @@ +// Copyright 2019 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/arm_utils.h" + +#include <stddef.h> +#include <stdint.h> + +#include <algorithm> +#include <cctype> +#include <initializer_list> +#include <map> +#include <sstream> +#include <string> +#include <vector> + +#include "base/check_op.h" +#include "components/zucchini/address_translator.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +namespace { + +// "Clean slate" |code|s for branch instruction encodings with |disp| = 0, and +// if applicable, |cond| = 0. +uint32_t kCleanSlateB_A1 = 0x0A000000; // A24. +uint32_t kCleanSlateBL_A1 = 0x0B000000; // A24. +uint32_t kCleanSlateBLX_A2 = 0xFA000000; // A24. +uint16_t kCleanSlateB_T1 = 0xD000; // T8. +uint16_t kCleanSlateB_T2 = 0xE000; // T11. +uint32_t kCleanSlateB_T3 = 0xF0008000; // T20. +// For T24 encodings, |disp| = 0 means J1 = J2 = 1, so include 0x00002800. +uint32_t kCleanSlateB_T4 = 0xF0009000 | 0x00002800; // T24. +uint32_t kCleanSlateBL_T1 = 0xF000D000 | 0x00002800; // T24. +uint32_t kCleanSlateBLX_T2 = 0xF000C000 | 0x00002800; // T24. + +// For AArch64. +uint32_t kCleanSlate64TBZw = 0x36000000; // Immd14. +uint32_t kCleanSlate64TBZz = 0xB6000000; // Immd14. +uint32_t kCleanSlate64TBNZw = 0x37000000; // Immd14. +uint32_t kCleanSlate64TBNZz = 0xB7000000; // Immd14. +uint32_t kCleanSlate64Bcond = 0x54000000; // Immd19. +uint32_t kCleanSlate64CBZw = 0x34000000; // Immd19. +uint32_t kCleanSlate64CBZz = 0xB4000000; // Immd19. +uint32_t kCleanSlate64CBNZw = 0x35000000; // Immd19. +uint32_t kCleanSlate64CBNZz = 0xB5000000; // Immd19. +uint32_t kCleanSlate64B = 0x14000000; // Immd26. +uint32_t kCleanSlate64BL = 0x94000000; // Immd26. + +// Special case: Cond = 0xE => AL. +uint32_t kCleanSlateBAL_A1 = kCleanSlateB_A1 | (0xE << 28); // + +// Test helper: Extracts |components| from |value| (may be |code| or |disp|) +// based on |pattern|. Also performs consistency checks. On success, writes to +// |*components| and returns true. Otherwise returns false. +// Example (all numbers are in binary): +// |pattern| = "11110Scc cciiiiii 10(J1)0(J2)jjj jjjj...." +// |value| = 11110111 00111000 10 1 0 0 111 11000101 +// Result: Noting that all 0's and 1's are consistent, returns true with: +// |*components| = {S: 1, c: 1100, i: 111000, J1: 1, J2: 0, j: 1111100} +// Rules for |pattern|: +// * Spaces are ignored. +// * '.' means "don't care". +// * '0' and '1' are expected literals; mismatch leads to failure. +// * A variable name is specified as: +// * A single letter. +// * "(var)", where "var" is a name that begins with a letter. +// * If a variable's first letter is uppercase, then it's a singleton bit. +// * If repeated, consistency check is applied (must be identical). +// * If a variable's first letter is lowercase, then it spans multiple bits. +// * These need not be contiguous, but order is preserved (big-endian). +static bool SplitBits(const std::string& pattern, + uint32_t value, + std::map<std::string, uint32_t>* components) { + CHECK(components); + + // Split |pattern| into |token_list|. + std::vector<std::string> token_list; + size_t bracket_start = std::string::npos; + for (size_t i = 0; i < pattern.size(); ++i) { + char ch = pattern[i]; + if (bracket_start == std::string::npos) { + if (ch == '(') + bracket_start = i + 1; + else if (ch != ' ') // Ignore space. + token_list.push_back(std::string(1, ch)); + } else if (ch == ')') { + token_list.push_back(pattern.substr(bracket_start, i - bracket_start)); + bracket_start = std::string::npos; + } + } + CHECK_EQ(std::string::npos, bracket_start); // No dangling "(". + + // Process each token. + size_t num_tokens = token_list.size(); + std::map<std::string, uint32_t> temp_components; + CHECK(num_tokens == 32 || (num_tokens == 16 && value <= 0xFFFF)); + for (size_t i = 0; i < num_tokens; ++i) { + const std::string& token = token_list[i]; + CHECK(!token.empty()); + uint32_t bit = (value >> (num_tokens - 1 - i)) & 1; + if (token == "0" || token == "1") { + if (token[0] != static_cast<char>('0' + bit)) + return false; // Fail: Mismatch. + } else if (isupper(token[0])) { + if (temp_components.count(token)) { + if (temp_components[token] != bit) + return false; // Fail: Singleton bit not uniform. + } else { + temp_components[token] = bit; + } + } else if (islower(token[0])) { + temp_components[token] = (temp_components[token] << 1) | bit; + } else if (token != ".") { + return false; // Fail: Unrecognized token. + } + } + components->swap(temp_components); + return true; +} + +// AArch32 or AArch64 instruction specification for tests. May be 16-bit or +// 32-bit (determined by INT_T). +template <typename INT_T> +struct ArmRelInstruction { + ArmRelInstruction(const std::string& code_pattern_in, INT_T code) + : code_pattern(code_pattern_in), clean_slate_code(code) {} + + // Code pattern for SplitBits(). + std::string code_pattern; + + // "Clean slate" |code| encodes |disp| = 0. + INT_T clean_slate_code; +}; + +// Tester for ARM Encode / Decode functions for |disp| <-> |code|. +template <typename TRAITS> +class ArmTranslatorEncodeDecodeTest { + public: + using CODE_T = typename TRAITS::code_t; + + ArmTranslatorEncodeDecodeTest() {} + + // For each instruction (with |clean_slate_code| in |instr_list|) and for each + // |disp| in |good_disp_list|, forms |code| with |encode_fun()| and checks for + // success. Extracts |disp_out| with |decode_fun()| and checks that it's the + // original |disp|. For each (|disp|, |code|) pair, extracts components using + // SplitBits(), and checks that components from |toks_list| are identical. For + // each |disp| in |bad_disp_list|, checks that |decode_fun_()| fails. + void Run(const std::string& disp_pattern, + const std::vector<std::string>& toks_list, + const std::vector<ArmRelInstruction<CODE_T>>& instr_list, + const std::vector<arm_disp_t>& good_disp_list, + const std::vector<arm_disp_t>& bad_disp_list) { + ArmAlign (*decode_fun)(CODE_T, arm_disp_t*) = TRAITS::Decode; + bool (*encode_fun)(arm_disp_t, CODE_T*) = TRAITS::Encode; + + for (const ArmRelInstruction<CODE_T> instr : instr_list) { + // Parse clean slate code bytes, and ensure it's well-formed. + std::map<std::string, uint32_t> clean_slate_code_components; + EXPECT_TRUE(SplitBits(instr.code_pattern, instr.clean_slate_code, + &clean_slate_code_components)); + + for (arm_disp_t disp : good_disp_list) { + CODE_T code = instr.clean_slate_code; + // Encode |disp| to |code|. + EXPECT_TRUE((*encode_fun)(disp, &code)) << disp; + arm_disp_t disp_out = 0; + + // Extract components (performs consistency checks) and compare. + std::map<std::string, uint32_t> disp_components; + EXPECT_TRUE(SplitBits(disp_pattern, static_cast<uint32_t>(disp), + &disp_components)); + std::map<std::string, uint32_t> code_components; + EXPECT_TRUE(SplitBits(instr.code_pattern, code, &code_components)); + for (const std::string& tok : toks_list) { + EXPECT_EQ(1U, disp_components.count(tok)) << tok; + EXPECT_EQ(1U, code_components.count(tok)) << tok; + EXPECT_EQ(disp_components[tok], code_components[tok]) << tok; + } + + // Decode |code| to |disp_out|, check fidelity. + EXPECT_NE(kArmAlignFail, (*decode_fun)(code, &disp_out)); + EXPECT_EQ(disp, disp_out); + + // Sanity check: Re-encode |disp| into |code|, ensure no change. + CODE_T code_copy = code; + EXPECT_TRUE((*encode_fun)(disp, &code)); + EXPECT_EQ(code_copy, code); + + // Encode 0, ensure we get clean slate |code| back. + EXPECT_TRUE((*encode_fun)(0, &code)); + EXPECT_EQ(instr.clean_slate_code, code); + } + + for (arm_disp_t disp : bad_disp_list) { + CODE_T code = instr.clean_slate_code; + EXPECT_FALSE((*encode_fun)(disp, &code)) << disp; + // Value does not get modified after failure. + EXPECT_EQ(instr.clean_slate_code, code); + } + } + } +}; + +// Tester for ARM Write / Read functions for |target_rva| <-> |code|. +template <typename TRAITS> +class ArmTranslatorWriteReadTest { + public: + using CODE_T = typename TRAITS::code_t; + + ArmTranslatorWriteReadTest() {} + + // Expects successful Write() to |clean_slate_code| for each |target_rva_list| + // RVA, using each |instr_rva_list| RVA, and that the resulting |code| leads + // to successful Read(), which recovers |instr_rva|. + void Accept(CODE_T clean_slate_code, + const std::vector<rva_t>& instr_rva_list, + const std::vector<rva_t>& target_rva_list) { + bool (*read_fun)(rva_t, CODE_T, rva_t*) = TRAITS::Read; + bool (*write_fun)(rva_t, rva_t, CODE_T*) = TRAITS::Write; + + for (rva_t instr_rva : instr_rva_list) { + for (rva_t target_rva : target_rva_list) { + CODE_T code = clean_slate_code; + // Write |target_rva| to |code|. + EXPECT_TRUE((*write_fun)(instr_rva, target_rva, &code)) << target_rva; + rva_t target_rva_out = kInvalidRva; + + // Read |code| to |target_rva_out|, check fidelity. + EXPECT_TRUE((*read_fun)(instr_rva, code, &target_rva_out)); + EXPECT_EQ(target_rva, target_rva_out); + + // Sanity check: Rewrite |target_rva| into |code|, ensure no change. + CODE_T code_copy = code; + EXPECT_TRUE((*write_fun)(instr_rva, target_rva, &code)); + EXPECT_EQ(code_copy, code); + } + } + } + + // Expects failed Write() to |clean_slate_code| for each |target_rva_list| + // RVA, using each |instr_rva_list| RVA. + void Reject(CODE_T clean_slate_code, + const std::vector<rva_t>& instr_rva_list, + const std::vector<rva_t>& target_rva_list) { + bool (*write_fun)(rva_t, rva_t, CODE_T*) = TRAITS::Write; + + for (rva_t instr_rva : instr_rva_list) { + for (rva_t target_rva : target_rva_list) { + CODE_T code = clean_slate_code; + EXPECT_FALSE((*write_fun)(instr_rva, target_rva, &code)) << target_rva; + // Output variable is unmodified after failure. + EXPECT_EQ(clean_slate_code, code); + } + } + } +}; + +} // namespace + +// Test for test helper. +TEST(ArmUtilsTest, SplitBits) { + // If |expected| == "BAD" then we expect failure. + auto run_test = [](const std::string& expected, const std::string& pattern, + uint32_t value) { + std::map<std::string, uint32_t> components; + if (expected == "BAD") { + EXPECT_FALSE(SplitBits(pattern, value, &components)); + EXPECT_TRUE(components.empty()); + } else { + EXPECT_TRUE(SplitBits(pattern, value, &components)); + std::ostringstream oss; + // Not using AsHex<>, since number of digits is not fixed. + oss << std::uppercase << std::hex; + std::string sep = ""; + for (auto it : components) { + oss << sep << it.first << "=" << it.second; + sep = ","; + } + EXPECT_EQ(expected, oss.str()); + } + }; + + run_test("a=ABCD0123", "aaaaaaaa aaaaaaaa aaaaaaaa aaaaaaaa", 0xABCD0123); + run_test("a=ABCD,b=123", "aaaaaaaa aaaaaaaa bbbbbbbb bbbbbbbb", 0xABCD0123); + run_test("a=23,b=1,c=CD,d=AB", "dddddddd cccccccc bbbbbbbb aaaaaaaa", + 0xABCD0123); + run_test("", "........ ........ ........ ........", 0xABCD0123); + run_test("t=AC02", " tttt.... tt tt.... tttt....tttt.... ", 0xABCD0123); + + run_test("a=8,b=C,c=E,d1=F", "aaaabbbb cccc(d1)(d1)(d1)(d1)", 0x8CEF); + run_test("a=F,b=7,c=3,d1=1", "abc(d1)abc(d1) abc(d1)abc(d1)", 0x8CEF); + + run_test("A1=0,X=1", "(A1)XX(A1) X(A1)(A1)(A1) (X)(A1)(X)X(X)(X)X(A1)", + 0x68BE); + run_test("BAD", "(A1)XX(A1) X(A1)(A1)(A1) (X)(A1)(X)X(X)(X)X(A1)", 0x68BF); + run_test("BAD", "(A1)XX(A1) X(A1)(A1)(A1) (X)(A1)(X)X(X)(X)X(A1)", 0x683E); + + run_test("A=1,B=0,a=C", "AAAAaaaa BBBB01..", 0xFC06); + run_test("A=1,B=0,a=4", "AAAAaaaa BBBB01..", 0xF406); + run_test("A=0,B=1,a=C", "AAAAaaaa BBBB01..", 0x0CF5); + run_test("BAD", "AAAAaaaa BBBB01..", 0xEC06); // Non-uniform A. + run_test("BAD", "AAAAaaaa BBBB01..", 0xFC16); // Non-uniform B. + run_test("BAD", "AAAAaaaa BBBB01..", 0xFC02); // Constant mismatch. +} + +TEST(AArch32Rel32Translator, Fetch) { + std::vector<uint8_t> bytes = {0x10, 0x32, 0x54, 0x76, 0x98, 0xBA, 0xDC, 0xFE}; + ConstBufferView region(&bytes[0], bytes.size()); + AArch32Rel32Translator translator; + EXPECT_EQ(0x76543210U, translator.FetchArmCode32(region, 0U)); + EXPECT_EQ(0xFEDCBA98U, translator.FetchArmCode32(region, 4U)); + + EXPECT_EQ(0x3210U, translator.FetchThumb2Code16(region, 0U)); + EXPECT_EQ(0xFEDCU, translator.FetchThumb2Code16(region, 6U)); + + EXPECT_EQ(0x32107654U, translator.FetchThumb2Code32(region, 0U)); + EXPECT_EQ(0xBA98FEDCU, translator.FetchThumb2Code32(region, 4U)); +} + +TEST(AArch32Rel32Translator, Store) { + std::vector<uint8_t> expected = { + 0xFF, 0xFF, 0xFF, 0xFF, // Padding. + 0x10, 0x32, 0x54, 0x76, // ARM 32-bit. + 0xFF, 0xFF, // Padding. + 0x42, 0x86, // THUMB2 16-bit. + 0xFF, 0xFF, // Padding. + 0xDC, 0xFE, 0x98, 0xBA, // THUMB2 32-bit. + 0xFF, 0xFF, 0xFF, 0xFF // Padding. + }; + + std::vector<uint8_t> bytes(4 * 2 + 2 * 3 + 4 * 2, 0xFF); + MutableBufferView region(&bytes[0], bytes.size()); + CHECK_EQ(expected.size(), bytes.size()); + + AArch32Rel32Translator translator; + translator.StoreArmCode32(region, 4U, 0x76543210U); + translator.StoreThumb2Code16(region, 10U, 0x8642U); + translator.StoreThumb2Code32(region, 14U, 0xFEDCBA98U); + + EXPECT_EQ(expected, bytes); +} + +// Detailed test of Encode/Decode: Check valid and invalid |disp| for various +// clean slate |code| cases. Also check |disp| and |code| binary components, +// which in AArch32Rel32Translator comments. +TEST(AArch32Rel32Translator, EncodeDecode) { + // A24 tests. + ArmTranslatorEncodeDecodeTest<AArch32Rel32Translator::AddrTraits_A24> + test_A24; + for (int cond = 0; cond <= 0x0E; ++cond) { + ArmRelInstruction<uint32_t> B_A1_cond("cccc1010 Siiiiiii iiiiiiii iiiiiiii", + kCleanSlateB_A1 | (cond << 28)); + ArmRelInstruction<uint32_t> BL_A1_cond( + "cccc1011 Siiiiiii iiiiiiii iiiiiiii", kCleanSlateBL_A1 | (cond << 28)); + test_A24.Run("SSSSSSSi iiiiiiii iiiiiiii iiiiii00", {"S", "i"}, + {B_A1_cond, BL_A1_cond}, + {0x01FFFFFC, -0x02000000, 0, 4, -4, 0x40, 0x44}, + {2, -2, 0x41, 0x42, 0x43, 0x02000000, -0x02000004}); + } + // BLX encoding A2, which has 2-byte alignment. + ArmRelInstruction<uint32_t> BLX_A2("1111101H Siiiiiii iiiiiiii iiiiiiii", + kCleanSlateBLX_A2); + test_A24.Run("SSSSSSSi iiiiiiii iiiiiiii iiiiiiH0", {"S", "i", "H"}, {BLX_A2}, + {0x01FFFFFC, 0x01FFFFFE, -0x02000000, 0, 2, -2, 4, 0x40, 0x42}, + {1, -1, 0x41, 0x43, 0x02000000, -0x02000002}); + + // T8 tests. + ArmTranslatorEncodeDecodeTest<AArch32Rel32Translator::AddrTraits_T8> test_T8; + for (int cond = 0; cond <= 0x0E; ++cond) { + ArmRelInstruction<uint16_t> B_T1_cond("1101cccc Siiiiiii", + kCleanSlateB_T1 | (cond << 8)); + test_T8.Run("SSSSSSSS SSSSSSSS SSSSSSSS iiiiiii0", {"S", "i"}, {B_T1_cond}, + {0x00FE, -0x0100, 0, 2, -2, 4, 0x40, 0x42}, + {1, -1, 0x41, 0x43, 0x0100, -0x0102}); + } + ArmRelInstruction<uint16_t> B_T1_invalid("11011111 ........", + kCleanSlateB_T1 | (0x0F << 8)); + test_T8.Run("........ ........ ........ ........", std::vector<std::string>(), + {B_T1_invalid}, std::vector<arm_disp_t>(), + {0x00FE, -0x0100, 0, 2, 4, 0x40, 0x41, 0x0100, -0x0102}); + + // T11 tests. + ArmTranslatorEncodeDecodeTest<AArch32Rel32Translator::AddrTraits_T11> + test_T11; + ArmRelInstruction<uint16_t> B_T2("11100Sii iiiiiiii", kCleanSlateB_T2); + test_T11.Run("SSSSSSSS SSSSSSSS SSSSSiii iiiiiii0", {"S", "i"}, {B_T2}, + {0x07FE, -0x0800, 0, 2, -2, 4, 0x40, 0x42}, + {1, -1, 0x41, 0x43, 0x0800, -0x0802}); + + // T20 tests. + ArmTranslatorEncodeDecodeTest<AArch32Rel32Translator::AddrTraits_T20> + test_T20; + for (int cond = 0; cond <= 0x0E; ++cond) { + ArmRelInstruction<uint32_t> B_T3_cond( + "11110Scc cciiiiii 10(J1)0(J2)jjj jjjjjjjj", + kCleanSlateB_T3 | (cond << 22)); + test_T20.Run("SSSSSSSS SSSS(J2)(J1)ii iiiijjjj jjjjjjj0", + {"S", "J2", "J1", "i", "j"}, {B_T3_cond}, + {0x000FFFFE, -0x00100000, 0, 2, -2, 4, 0x40, 0x42}, + {1, -1, 0x41, 0x43, 0x00100000, -0x00100002}); + } + ArmRelInstruction<uint32_t> B_T3_invalid( + "11110.11 11...... 10.0.... ........", kCleanSlateB_T3 | (0x0F << 22)); + test_T20.Run("........ ........ ........ ........", + std::vector<std::string>(), {B_T3_invalid}, + std::vector<arm_disp_t>(), + {0x000FFFFE, -0x00100000, 0, 2, 4, 0x40, 0x42, 1, 0x41, 0x43, + 0x00100000, -0x00100002}); + + // T24 tests. + ArmTranslatorEncodeDecodeTest<AArch32Rel32Translator::AddrTraits_T24> + test_T24; + // "Clean slate" means J1 = J2 = 1, so we include 0x00002800. + ArmRelInstruction<uint32_t> B_T4("11110Sii iiiiiiii 10(J1)1(J2)jjj jjjjjjjj", + kCleanSlateB_T4); + ArmRelInstruction<uint32_t> BL_T1("11110Sii iiiiiiii 11(J1)1(J2)jjj jjjjjjjj", + kCleanSlateBL_T1); + test_T24.Run("SSSSSSSS (I1)(I2)iiiiii iiiijjjj jjjjjjj0", + {"S", "i", "j"}, // Skip "J1", "J2", "I1", "I2" checks. + {B_T4, BL_T1}, + {0x00FFFFFE, -0x01000000, 0, 2, -2, 4, -4, 0x40, 0x42}, + {1, -1, 0x41, 0x43, 0x01000000, -0x01000002}); + + // For BLX encoding T2, |disp| must be multiple of 4. + ArmRelInstruction<uint32_t> BLX_T2( + "11110Sii iiiiiiii 11(J1)0(J2)jjj jjjjjjj0", kCleanSlateBLX_T2); + test_T24.Run( + "SSSSSSSS (I1)(I2)iiiiii iiiijjjj jjjjjj00", + {"S", "i", "j"}, // Skip "J1", "J2", "I1", "I2" checks. + {BLX_T2}, {0x00FFFFFC, -0x01000000, 0, 4, -4, 0x40}, + {1, -1, 2, -2, 0x41, 0x42, 0x43, 0x00FFFFFE, 0x01000000, -0x01000002}); +} + +TEST(AArch32Rel32Translator, WriteRead) { + std::vector<rva_t> aligned4; + std::vector<rva_t> misaligned4; + std::vector<rva_t> aligned2; + std::vector<rva_t> misaligned2; + for (rva_t rva = 0x1FFC; rva <= 0x2010; ++rva) { + ((rva % 4 == 0) ? aligned4 : misaligned4).push_back(rva); + ((rva % 2 == 0) ? aligned2 : misaligned2).push_back(rva); + } + CHECK_EQ(6U, aligned4.size()); + CHECK_EQ(15U, misaligned4.size()); + CHECK_EQ(11U, aligned2.size()); + CHECK_EQ(10U, misaligned2.size()); + + // Helpers to convert an instruction's RVA to PC. + auto pcArm = [](rva_t instr_rva) -> rva_t { return instr_rva + 8; }; + auto pcThumb2 = [](rva_t instr_rva) -> rva_t { return instr_rva + 4; }; + + // A24 tests. + ArmTranslatorWriteReadTest<AArch32Rel32Translator::AddrTraits_A24> test_A24; + for (uint32_t clean_slate_code : {kCleanSlateB_A1, kCleanSlateBL_A1}) { + test_A24.Accept(clean_slate_code, aligned4, aligned4); + test_A24.Reject(clean_slate_code, aligned4, misaligned4); + test_A24.Reject(clean_slate_code, misaligned4, aligned4); + test_A24.Reject(clean_slate_code, misaligned4, misaligned4); + // Signed (24 + 2)-bit range, 4-byte aligned: [-0x02000000, 0x01FFFFFC]. + test_A24.Accept(clean_slate_code, {0x15000000}, + {pcArm(0x13000000), pcArm(0x16FFFFFC)}); + test_A24.Reject(clean_slate_code, {0x15000000}, + {pcArm(0x13000000 - 4), pcArm(0x16FFFFFC + 4)}); + } + + // BLX complication: ARM -> THUMB2. + test_A24.Accept(kCleanSlateBLX_A2, aligned4, aligned2); + test_A24.Reject(kCleanSlateBLX_A2, aligned4, misaligned2); + test_A24.Reject(kCleanSlateBLX_A2, misaligned4, aligned2); + test_A24.Reject(kCleanSlateBLX_A2, misaligned4, misaligned2); + test_A24.Accept(kCleanSlateBLX_A2, {0x15000000}, + {pcArm(0x13000000), pcArm(0x16FFFFFE)}); + test_A24.Reject(kCleanSlateBLX_A2, {0x15000000}, + {pcArm(0x13000000 - 4), pcArm(0x13000000 - 2), + pcArm(0x16FFFFFE + 2), pcArm(0x16FFFFFE + 4)}); + + // T8 tests. + ArmTranslatorWriteReadTest<AArch32Rel32Translator::AddrTraits_T8> test_T8; + test_T8.Accept(kCleanSlateB_T1, aligned2, aligned2); + test_T8.Reject(kCleanSlateB_T1, aligned2, misaligned2); + test_T8.Reject(kCleanSlateB_T1, misaligned2, aligned2); + test_T8.Reject(kCleanSlateB_T1, misaligned2, misaligned2); + // Signed (8 + 1)-bit range, 2-byte aligned: [-0x0100, 0x00FE]. + test_T8.Accept(kCleanSlateB_T1, {0x10000500}, + {pcThumb2(0x10000400), pcThumb2(0x100005FE)}); + test_T8.Reject(kCleanSlateB_T1, {0x10000500}, + {pcThumb2(0x10000400 - 2), pcThumb2(0x100005FE + 2)}); + + // T11 tests. + ArmTranslatorWriteReadTest<AArch32Rel32Translator::AddrTraits_T11> test_T11; + test_T11.Accept(kCleanSlateB_T2, aligned2, aligned2); + test_T11.Reject(kCleanSlateB_T2, aligned2, misaligned2); + test_T11.Reject(kCleanSlateB_T2, misaligned2, aligned2); + test_T11.Reject(kCleanSlateB_T2, misaligned2, misaligned2); + // Signed (11 + 1)-bit range, 2-byte aligned: [-0x0800, 0x07FE]. + test_T11.Accept(kCleanSlateB_T2, {0x10003000}, + {pcThumb2(0x10002800), pcThumb2(0x100037FE)}); + test_T11.Reject(kCleanSlateB_T2, {0x10003000}, + {pcThumb2(0x10002800 - 2), pcThumb2(0x100037FE + 2)}); + + // T20 tests. + ArmTranslatorWriteReadTest<AArch32Rel32Translator::AddrTraits_T20> test_T20; + test_T20.Accept(kCleanSlateB_T3, aligned2, aligned2); + test_T20.Reject(kCleanSlateB_T3, aligned2, misaligned2); + test_T20.Reject(kCleanSlateB_T3, misaligned2, aligned2); + test_T20.Reject(kCleanSlateB_T3, misaligned2, misaligned2); + // Signed (20 + 1)-bit range, 2-byte aligned: [-0x00100000, 0x000FFFFE]. + test_T20.Accept(kCleanSlateB_T3, {0x10300000}, + {pcThumb2(0x10200000), pcThumb2(0x103FFFFE)}); + test_T20.Reject(kCleanSlateB_T3, {0x10300000}, + {pcThumb2(0x10200000 - 2), pcThumb2(0x103FFFFE + 2)}); + + // T24 tests. + ArmTranslatorWriteReadTest<AArch32Rel32Translator::AddrTraits_T24> test_T24; + for (uint32_t clean_slate_code : {kCleanSlateB_T4, kCleanSlateBL_T1}) { + test_T24.Accept(clean_slate_code, aligned2, aligned2); + test_T24.Reject(clean_slate_code, aligned2, misaligned2); + test_T24.Reject(clean_slate_code, misaligned2, aligned2); + test_T24.Reject(clean_slate_code, misaligned2, misaligned2); + // Signed (24 + 1)-bit range, 2-byte aligned: [-0x01000000, 0x00FFFFFE]. + test_T24.Accept(clean_slate_code, {0x16000000}, + {pcThumb2(0x15000000), pcThumb2(0x16FFFFFE)}); + test_T24.Reject(clean_slate_code, {0x16000000}, + {pcThumb2(0x15000000 - 2), pcThumb2(0x16FFFFFE + 2)}); + } + + // BLX complication: THUMB2 -> ARM. + test_T24.Accept(kCleanSlateBLX_T2, aligned2, aligned4); + test_T24.Reject(kCleanSlateBLX_T2, aligned2, misaligned4); + test_T24.Reject(kCleanSlateBLX_T2, misaligned2, aligned4); + test_T24.Reject(kCleanSlateBLX_T2, misaligned2, misaligned4); + test_T24.Accept(kCleanSlateBLX_T2, {0x16000000}, + {pcThumb2(0x15000000), pcThumb2(0x16FFFFFC)}); + test_T24.Reject(kCleanSlateBLX_T2, {0x16000000}, + {pcThumb2(0x15000000 - 4), pcThumb2(0x15000000 - 2), + pcThumb2(0x16FFFFFC + 2), pcThumb2(0x16FFFFFC + 4)}); +} + +// Typical usage in |target_rva| extraction. +TEST(AArch32Rel32Translator, Main) { + // ARM mode (32-bit). + // 00103050: 00 01 02 EA B 00183458 ; B encoding A1 (cond = AL). + { + rva_t instr_rva = 0x00103050U; + AArch32Rel32Translator translator; + std::vector<uint8_t> bytes = {0x00, 0x01, 0x02, 0xEA}; + MutableBufferView region(&bytes[0], bytes.size()); + uint32_t code = translator.FetchArmCode32(region, 0U); + EXPECT_EQ(0xEA020100U, code); + + // |code| <-> |disp|. + arm_disp_t disp = 0; + EXPECT_EQ(kArmAlign4, translator.DecodeA24(code, &disp)); + EXPECT_EQ(+0x00080400, disp); + + uint32_t code_from_disp = kCleanSlateBAL_A1; + EXPECT_TRUE(translator.EncodeA24(disp, &code_from_disp)); + EXPECT_EQ(code, code_from_disp); + + // |code| <-> |target_rva|. + rva_t target_rva = kInvalidRva; + EXPECT_TRUE(translator.ReadA24(instr_rva, code, &target_rva)); + // 0x00103050 + 8 + 0x00080400. + EXPECT_EQ(0x00183458U, target_rva); + + uint32_t code_from_rva = kCleanSlateBAL_A1; + EXPECT_TRUE(translator.WriteA24(instr_rva, target_rva, &code_from_rva)); + EXPECT_EQ(code, code_from_rva); + } + + // THUMB2 mode (16-bit). + // 001030A2: F3 E7 B 0010308C ; B encoding T2. + { + rva_t instr_rva = 0x001030A2U; + AArch32Rel32Translator translator; + std::vector<uint8_t> bytes = {0xF3, 0xE7}; + MutableBufferView region(&bytes[0], bytes.size()); + uint16_t code = translator.FetchThumb2Code16(region, 0U); + // Sii iiiiiiii = 111 11110011 = -1101 = -0x0D. + EXPECT_EQ(0xE7F3U, code); + + // |code| <-> |disp|. + arm_disp_t disp = 0; + EXPECT_EQ(kArmAlign2, translator.DecodeT11(code, &disp)); + EXPECT_EQ(-0x0000001A, disp); // -0x0D * 2 = -0x1A. + + uint16_t code_from_disp = kCleanSlateB_T2; + EXPECT_TRUE(translator.EncodeT11(disp, &code_from_disp)); + EXPECT_EQ(code, code_from_disp); + + // |code| <-> |target_rva|. + rva_t target_rva = kInvalidRva; + EXPECT_TRUE(translator.ReadT11(instr_rva, code, &target_rva)); + // 0x001030A2 + 4 - 0x0000001A. + EXPECT_EQ(0x0010308CU, target_rva); + + uint16_t code_from_rva = kCleanSlateB_T2; + EXPECT_TRUE(translator.WriteT11(instr_rva, target_rva, &code_from_rva)); + EXPECT_EQ(code, code_from_rva); + } + + // THUMB2 mode (32-bit). + // 001030A2: 00 F0 01 FA BL 001034A8 ; BL encoding T1. + { + rva_t instr_rva = 0x001030A2U; + AArch32Rel32Translator translator; + std::vector<uint8_t> bytes = {0x00, 0xF0, 0x01, 0xFA}; + MutableBufferView region(&bytes[0], bytes.size()); + uint32_t code = translator.FetchThumb2Code32(region, 0U); + EXPECT_EQ(0xF000FA01U, code); + + // |code| <-> |disp|. + arm_disp_t disp = 0; + EXPECT_EQ(kArmAlign2, translator.DecodeT24(code, &disp)); + EXPECT_EQ(+0x00000402, disp); + + uint32_t code_from_disp = kCleanSlateBL_T1; + EXPECT_TRUE(translator.EncodeT24(disp, &code_from_disp)); + EXPECT_EQ(code, code_from_disp); + + // |code| <-> |target_rva|. + rva_t target_rva = kInvalidRva; + EXPECT_TRUE(translator.ReadT24(instr_rva, code, &target_rva)); + // 0x001030A2 + 4 + 0x00000002. + EXPECT_EQ(0x001034A8U, target_rva); + + uint32_t code_from_rva = kCleanSlateBL_T1; + EXPECT_TRUE(translator.WriteT24(instr_rva, target_rva, &code_from_rva)); + EXPECT_EQ(code, code_from_rva); + } +} + +TEST(AArch32Rel32Translator, BLXComplication) { + auto run_test = [](rva_t instr_rva, + std::vector<uint8_t> bytes, // Pass by value. + uint32_t expected_code, arm_disp_t expected_disp, + uint32_t clean_slate_code, rva_t expected_target_rva) { + AArch32Rel32Translator translator; + MutableBufferView region(&bytes[0], bytes.size()); + uint32_t code = translator.FetchThumb2Code32(region, 0U); + EXPECT_EQ(expected_code, code); + + // |code| <-> |disp|. + arm_disp_t disp = 0; + EXPECT_TRUE(translator.DecodeT24(code, &disp)); + EXPECT_EQ(expected_disp, disp); + + uint32_t code_from_disp = clean_slate_code; + EXPECT_TRUE(translator.EncodeT24(disp, &code_from_disp)); + EXPECT_EQ(code, code_from_disp); + + // |code| <-> |target_rva|. + rva_t target_rva = kInvalidRva; + EXPECT_TRUE(translator.ReadT24(instr_rva, code, &target_rva)); + EXPECT_EQ(expected_target_rva, target_rva); + + uint32_t code_from_rva = clean_slate_code; + EXPECT_TRUE(translator.WriteT24(instr_rva, target_rva, &code_from_rva)); + EXPECT_EQ(code, code_from_rva); + }; + + // No complication, 4-byte aligned. + // 001030A0: 01 F0 06 B0 B 005040B0 ; B encoding T4. + run_test(0x001030A0U, // Multiple of 4. + {0x01, 0xF0, 0x06, 0xB0}, 0xF001B006U, 0x0040100C, kCleanSlateB_T4, + // "Canonical" |target_rva|: 0x001030A0 + 4 + 0x0040100C. + 0x005040B0U); + + // No complication, not 4-byte aligned. + // 001030A2: 01 F0 06 B0 B 005040B2 ; B encoding T4. + run_test(0x001030A2U, // Shift by 2: Not multiple of 4. + {0x01, 0xF0, 0x06, 0xB0}, 0xF001B006U, 0x0040100C, kCleanSlateB_T4, + // Shifted by 2: 0x001030A2 + 4 + 0x0040100C. + 0x005040B2U); + + // Repeat the above, but use BLX instead of B. + + // BLX complication, 4-byte aligned. + // 001030A0: 01 F0 06 E0 BLX 005040B0 ; BLX encoding T2. + run_test(0x001030A0U, // Multiple of 4. + {0x01, 0xF0, 0x06, 0xE0}, 0xF001E006U, 0x0040100C, kCleanSlateBLX_T2, + // Canonical again: align_down_4(0x001030A0 + 4 + 0x0040100C). + 0x005040B0U); + + // BLX complication, not 4-byte aligned. + // 001030A2: 01 F0 06 E0 BLX 005040B0 ; BLX encoding T2. + run_test(0x001030A2U, // Shift by 2: Not multiple of 4. + {0x01, 0xF0, 0x06, 0xE0}, 0xF001E006U, 0x0040100C, kCleanSlateBLX_T2, + // No shift: align_down_4(0x001030A2 + 4 + 0x0040100C). + 0x005040B0U); +} + +TEST(AArch64Rel32Translator, FetchStore) { + std::vector<uint8_t> bytes = {0x10, 0x32, 0x54, 0x76, 0x98, 0xBA, 0xDC, 0xFE}; + std::vector<uint8_t> expected = {0xAB, 0x33, 0x22, 0x11, + 0x69, 0x5A, 0xFF, 0x00}; + MutableBufferView region(&bytes[0], bytes.size()); + AArch64Rel32Translator translator; + EXPECT_EQ(0x76543210U, translator.FetchCode32(region, 0U)); + EXPECT_EQ(0xFEDCBA98U, translator.FetchCode32(region, 4U)); + + translator.StoreCode32(region, 0U, 0x112233ABU); + translator.StoreCode32(region, 4U, 0x00FF5A69); + EXPECT_EQ(expected, bytes); +} + +TEST(AArch64Rel32Translator, EncodeDecode) { + // Immd14 tests. + ArmTranslatorEncodeDecodeTest<AArch64Rel32Translator::AddrTraits_Immd14> + test_immd14; + for (int b40 : {0, 1, 7, 31}) { + uint32_t b40_mask = b40 << 19; + for (int Rt : {0, 1, 15, 30}) { + uint32_t mask = b40_mask | Rt; + ArmRelInstruction<uint32_t> TBZw_Rt("00110110 bbbbbSii iiiiiiii iiittttt", + kCleanSlate64TBZw | mask); + ArmRelInstruction<uint32_t> TBZz_Rt("10110110 bbbbbSii iiiiiiii iiittttt", + kCleanSlate64TBZz | mask); + ArmRelInstruction<uint32_t> TBNZw_Rt( + "00110111 bbbbbSii iiiiiiii iiittttt", kCleanSlate64TBNZw | mask); + ArmRelInstruction<uint32_t> TBNZz_Rt( + "10110111 bbbbbSii iiiiiiii iiittttt", kCleanSlate64TBNZz | mask); + test_immd14.Run("SSSSSSSS SSSSSSSS Siiiiiii iiiiii00", {"S", "i"}, + {TBZw_Rt, TBZz_Rt, TBNZw_Rt, TBNZz_Rt}, + {0x00007FFC, -0x00008000, 0, 4, -4, 0x40, 0x44}, + {2, -2, 0x41, 0x42, 0x43, 0x00008000, -0x00008004}); + } + } + + // Immd19 tests. + ArmTranslatorEncodeDecodeTest<AArch64Rel32Translator::AddrTraits_Immd19> + test_immd19; + for (int cond = 0; cond <= 0x0E; ++cond) { + ArmRelInstruction<uint32_t> B_cond("01010100 Siiiiiii iiiiiiii iii0cccc", + kCleanSlate64Bcond | cond); + test_immd19.Run("SSSSSSSS SSSSiiii iiiiiiii iiiiii00", {"S", "i"}, {B_cond}, + {0x000FFFFC, -0x00100000, 0, 4, -4, 0x40, 0x44}, + {2, -2, 0x41, 0x42, 0x43, 0x00100000, -0x00100004}); + } + for (int Rt : {0, 1, 15, 30}) { + ArmRelInstruction<uint32_t> CBZw_Rt("00110100 Siiiiiii iiiiiiii iiittttt", + kCleanSlate64CBZw | Rt); + ArmRelInstruction<uint32_t> CBZz_Rt("10110100 Siiiiiii iiiiiiii iiittttt", + kCleanSlate64CBZz | Rt); + ArmRelInstruction<uint32_t> CBNZw_Rt("00110101 Siiiiiii iiiiiiii iiittttt", + kCleanSlate64CBNZw | Rt); + ArmRelInstruction<uint32_t> CBNZz_Rt("10110101 Siiiiiii iiiiiiii iiittttt", + kCleanSlate64CBNZz | Rt); + test_immd19.Run("SSSSSSSS SSSSiiii iiiiiiii iiiiii00", {"S", "i"}, + {CBZw_Rt, CBZz_Rt, CBNZw_Rt, CBNZz_Rt}, + {0x000FFFFC, -0x00100000, 0, 4, -4, 0x40, 0x44}, + {2, -2, 0x41, 0x42, 0x43, 0x00100000, -0x00100004}); + } + + // Immd26 tests. + ArmTranslatorEncodeDecodeTest<AArch64Rel32Translator::AddrTraits_Immd26> + test_immd26; + ArmRelInstruction<uint32_t> B("000101Si iiiiiiii iiiiiiii iiiiiiii", + kCleanSlate64B); + ArmRelInstruction<uint32_t> BL("100101Si iiiiiiii iiiiiiii iiiiiiii", + kCleanSlate64BL); + test_immd26.Run("SSSSSiii iiiiiiii iiiiiiii iiiiii00", {"S", "i"}, {B, BL}, + {0x07FFFFFC, -0x08000000, 0, 4, -4, 0x40, 0x44}, + {2, -2, 0x41, 0x42, 0x43, 0x08000000, -0x08000004}); +} + +TEST(AArch64Rel32Translator, WriteRead) { + std::vector<rva_t> aligned4; + std::vector<rva_t> misaligned4; + for (rva_t rva = 0x1FFC; rva <= 0x2010; ++rva) { + ((rva % 4 == 0) ? aligned4 : misaligned4).push_back(rva); + } + CHECK_EQ(6U, aligned4.size()); + CHECK_EQ(15U, misaligned4.size()); + + // Helper to convert an instruction's RVA to PC. + auto pcAArch64 = [](rva_t instr_rva) -> rva_t { return instr_rva; }; + + // Immd14 tests. + ArmTranslatorWriteReadTest<AArch64Rel32Translator::AddrTraits_Immd14> + test_immd14; + for (uint32_t clean_slate_code : {kCleanSlate64TBZw, kCleanSlate64TBZz, + kCleanSlate64TBNZw, kCleanSlate64TBNZz}) { + test_immd14.Accept(clean_slate_code, aligned4, aligned4); + test_immd14.Reject(clean_slate_code, aligned4, misaligned4); + test_immd14.Reject(clean_slate_code, misaligned4, aligned4); + test_immd14.Reject(clean_slate_code, misaligned4, misaligned4); + // Signed (14 + 2)-bit range, 4-byte aligned: [-0x00008000, 0x00007FFC]. + test_immd14.Accept(clean_slate_code, {0x10040000}, + {pcAArch64(0x10038000), pcAArch64(0x10047FFC)}); + test_immd14.Reject(clean_slate_code, {0x15000000}, + {pcAArch64(0x10038000 - 4), pcAArch64(0x10047FFC + 4)}); + } + + // Immd19 tests. + ArmTranslatorWriteReadTest<AArch64Rel32Translator::AddrTraits_Immd19> + test_immd19; + for (uint32_t clean_slate_code : + {kCleanSlate64Bcond, kCleanSlate64CBZw, kCleanSlate64CBZz, + kCleanSlate64CBNZw, kCleanSlate64CBNZz}) { + test_immd19.Accept(clean_slate_code, aligned4, aligned4); + test_immd19.Reject(clean_slate_code, aligned4, misaligned4); + test_immd19.Reject(clean_slate_code, misaligned4, aligned4); + test_immd19.Reject(clean_slate_code, misaligned4, misaligned4); + // Signed (19 + 2)-bit range, 4-byte aligned: [-0x00100000, 0x000FFFFC]. + test_immd19.Accept(clean_slate_code, {0x10300000}, + {pcAArch64(0x10200000), pcAArch64(0x103FFFFC)}); + test_immd19.Reject(clean_slate_code, {0x10300000}, + {pcAArch64(0x10200000 - 4), pcAArch64(0x103FFFFC + 4)}); + } + + // Immd26 tests. + ArmTranslatorWriteReadTest<AArch64Rel32Translator::AddrTraits_Immd26> + test_immd26; + for (uint32_t clean_slate_code : {kCleanSlate64B, kCleanSlate64BL}) { + test_immd26.Accept(clean_slate_code, aligned4, aligned4); + test_immd26.Reject(clean_slate_code, aligned4, misaligned4); + test_immd26.Reject(clean_slate_code, misaligned4, aligned4); + test_immd26.Reject(clean_slate_code, misaligned4, misaligned4); + // Signed (26 + 2)-bit range, 4-byte aligned: [-0x08000000, 0x07FFFFFC]. + test_immd26.Accept(clean_slate_code, {0x30000000}, + {pcAArch64(0x28000000), pcAArch64(0x37FFFFFC)}); + test_immd26.Reject(clean_slate_code, {0x30000000}, + {pcAArch64(0x28000000 - 4), pcAArch64(0x37FFFFFC + 4)}); + } +} + +// Typical usage in |target_rva| extraction. +TEST(AArch64Rel32Translator, Main) { + // 00103050: 02 01 02 14 B 00183458 + rva_t instr_rva = 0x00103050U; + AArch64Rel32Translator translator; + std::vector<uint8_t> bytes = {0x02, 0x01, 0x02, 0x14}; + MutableBufferView region(&bytes[0], bytes.size()); + uint32_t code = translator.FetchCode32(region, 0U); + EXPECT_EQ(0x14020102U, code); + + // |code| <-> |disp|. + arm_disp_t disp = 0; + EXPECT_TRUE(translator.DecodeImmd26(code, &disp)); + EXPECT_EQ(+0x00080408, disp); + + uint32_t code_from_disp = kCleanSlate64B; + EXPECT_TRUE(translator.EncodeImmd26(disp, &code_from_disp)); + EXPECT_EQ(code, code_from_disp); + + // |code| <-> |target_rva|. + rva_t target_rva = kInvalidRva; + EXPECT_TRUE(translator.ReadImmd26(instr_rva, code, &target_rva)); + // 0x00103050 + 0 + 0x00080408. + EXPECT_EQ(0x00183458U, target_rva); + + uint32_t code_from_rva = kCleanSlate64B; + EXPECT_TRUE(translator.WriteImmd26(instr_rva, target_rva, &code_from_rva)); + EXPECT_EQ(code, code_from_rva); +} + +} // namespace zucchini diff --git a/binary_data_histogram.cc b/binary_data_histogram.cc new file mode 100644 index 0000000..7f6ece8 --- /dev/null +++ b/binary_data_histogram.cc @@ -0,0 +1,91 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/binary_data_histogram.h" + +#include <algorithm> +#include <cmath> +#include <limits> + +#include "base/check_op.h" +#include "base/format_macros.h" +#include "base/strings/stringprintf.h" + +namespace zucchini { + +/******** OutlierDetector ********/ + +OutlierDetector::OutlierDetector() = default; + +OutlierDetector::~OutlierDetector() = default; + +// For BinaryDataHistogram, |sample| is typically in interval [0, 1]. +void OutlierDetector::Add(double sample) { + ++n_; + sum_ += sample; + sum_of_squares_ += sample * sample; +} + +void OutlierDetector::Prepare() { + if (n_ > 0) { + mean_ = sum_ / n_; + standard_deviation_ = ::sqrt((sum_of_squares_ - sum_ * mean_) / + std::max(static_cast<size_t>(1), n_ - 1)); + } +} + +std::string OutlierDetector::RenderStats() { + return base::StringPrintf("Mean = %.5f, StdDev = %.5f over %" PRIuS + " samples", + mean_, standard_deviation_, n_); +} + +// Constants are chosen for BinaryDataHistogram, where |sample| is typically in +// [0, 1]. +int OutlierDetector::DecideOutlier(double sample) { + // Lower bound to avoid divide-by-zero and penalizing tight clusters. + constexpr double kMinTolerance = 0.1; + // Number of standard deviations away from mean for value to become outlier. + constexpr double kSigmaBound = 1.9; + if (n_ <= 1) + return 0; + double tolerance = std::max(kMinTolerance, standard_deviation_); + double num_sigma = (sample - mean_) / tolerance; + return num_sigma > kSigmaBound ? 1 : num_sigma < -kSigmaBound ? -1 : 0; +} + +/******** BinaryDataHistogram ********/ + +BinaryDataHistogram::BinaryDataHistogram() = default; + +BinaryDataHistogram::~BinaryDataHistogram() = default; + +bool BinaryDataHistogram::Compute(ConstBufferView region) { + DCHECK(!histogram_); + // Binary data with size < 2 are invalid. + if (region.size() < sizeof(uint16_t)) + return false; + DCHECK_LE(region.size(), + static_cast<size_t>(std::numeric_limits<int32_t>::max())); + + histogram_ = std::make_unique<int32_t[]>(kNumBins); + size_ = region.size(); + // Number of 2-byte intervals fully contained in |region|. + size_t bound = size_ - sizeof(uint16_t) + 1; + for (size_t i = 0; i < bound; ++i) + ++histogram_[region.read<uint16_t>(i)]; + return true; +} + +double BinaryDataHistogram::Distance(const BinaryDataHistogram& other) const { + DCHECK(IsValid() && other.IsValid()); + // Compute Manhattan (L1) distance between respective histograms. + double total_diff = 0; + for (int i = 0; i < kNumBins; ++i) + total_diff += std::abs(histogram_[i] - other.histogram_[i]); + // Normalize by total size, so result lies in [0, 1]. + return total_diff / (size_ + other.size_); +} + +} // namespace zucchini diff --git a/binary_data_histogram.h b/binary_data_histogram.h new file mode 100644 index 0000000..201f90a --- /dev/null +++ b/binary_data_histogram.h @@ -0,0 +1,90 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_BINARY_DATA_HISTOGRAM_H_ +#define COMPONENTS_ZUCCHINI_BINARY_DATA_HISTOGRAM_H_ + +#include <stddef.h> +#include <stdint.h> + +#include <memory> +#include <string> + +#include "components/zucchini/buffer_view.h" + +namespace zucchini { + +// A class to detect outliers in a list of doubles using Chauvenet's criterion: +// Compute mean and standard deviation of observations, then determine whether +// a query value lies beyond a fixed number of standard deviations (sigmas) from +// the mean. The purpose of this test is to reduce the chance of false-positive +// ensemble matches. +class OutlierDetector { + public: + OutlierDetector(); + OutlierDetector(const OutlierDetector&) = delete; + const OutlierDetector& operator=(const OutlierDetector&) = delete; + ~OutlierDetector(); + + // Incorporates |sample| into mean and standard deviation. + void Add(double sample); + + // Prepares basic statistics for DecideOutlier() calls. Should be called after + // all samples have been added. + void Prepare(); + + // Renders current statistics as strings for logging. + std::string RenderStats(); + + // Heuristically decides whether |sample| is an outlier. Returns 1 if |sample| + // is "too high", 0 if |sample| is "normal", and -1 if |sample| is "too low". + // Must be called after Prepare(). + int DecideOutlier(double sample); + + private: + size_t n_ = 0; + double sum_ = 0; + double sum_of_squares_ = 0; + double mean_ = 0; + double standard_deviation_ = 0; +}; + +// A class to compute similarity score between binary data. The heuristic here +// preprocesses input data to a size-65536 histogram, counting the frequency of +// consecutive 2-byte sequences. Therefore data with lengths < 2 are considered +// invalid -- but this is okay for Zucchini's use case. +class BinaryDataHistogram { + public: + BinaryDataHistogram(); + BinaryDataHistogram(const BinaryDataHistogram&) = delete; + const BinaryDataHistogram& operator=(const BinaryDataHistogram&) = delete; + ~BinaryDataHistogram(); + + // Attempts to compute the histogram, returns true iff successful. + bool Compute(ConstBufferView region); + + bool IsValid() const { return static_cast<bool>(histogram_); } + + // Returns distance to another histogram (heuristics). If two binaries are + // identical then their histogram distance is 0. However, the converse is not + // true in general. For example, "aba" and "bab" are different, but their + // histogram distance is 0 (both histograms are {"ab": 1, "ba": 1}). + double Distance(const BinaryDataHistogram& other) const; + + private: + enum { kNumBins = 1 << (sizeof(uint16_t) * 8) }; + static_assert(kNumBins == 65536, "Incorrect constant computation."); + + // Size, in bytes, of the data over which the histogram was computed. + size_t size_ = 0; + + // 2^16 buckets holding counts of all 2-byte sequences in the data. The counts + // are stored as signed values to simplify computing the distance between two + // histograms. + std::unique_ptr<int32_t[]> histogram_; +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_BINARY_DATA_HISTOGRAM_H_ diff --git a/binary_data_histogram_unittest.cc b/binary_data_histogram_unittest.cc new file mode 100644 index 0000000..ca71010 --- /dev/null +++ b/binary_data_histogram_unittest.cc @@ -0,0 +1,132 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/binary_data_histogram.h" + +#include <stddef.h> + +#include <memory> +#include <vector> + +#include "components/zucchini/buffer_view.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +TEST(OutlierDetectorTest, Basic) { + auto make_detector = [](const std::vector<double>& values) { + auto detector = std::make_unique<OutlierDetector>(); + for (double v : values) + detector->Add(v); + detector->Prepare(); + return detector; + }; + + std::unique_ptr<OutlierDetector> detector; + // No data: Should at least not cause error. + detector = make_detector({}); + EXPECT_EQ(0, detector->DecideOutlier(0.0)); + // Single point: Trivially inert. + detector = make_detector({0.5}); + EXPECT_EQ(0, detector->DecideOutlier(0.1)); + EXPECT_EQ(0, detector->DecideOutlier(0.5)); + EXPECT_EQ(0, detector->DecideOutlier(0.9)); + // Two identical points: StdDev is 0, so falls back to built-in tolerance. + detector = make_detector({0.5, 0.5}); + EXPECT_EQ(-1, detector->DecideOutlier(0.3)); + EXPECT_EQ(0, detector->DecideOutlier(0.499)); + EXPECT_EQ(0, detector->DecideOutlier(0.5)); + EXPECT_EQ(0, detector->DecideOutlier(0.501)); + EXPECT_EQ(1, detector->DecideOutlier(0.7)); + // Two separate points: Outliner test is pretty lax. + detector = make_detector({0.4, 0.6}); + EXPECT_EQ(-1, detector->DecideOutlier(0.2)); + EXPECT_EQ(0, detector->DecideOutlier(0.3)); + EXPECT_EQ(0, detector->DecideOutlier(0.5)); + EXPECT_EQ(0, detector->DecideOutlier(0.7)); + EXPECT_EQ(1, detector->DecideOutlier(0.8)); + // Sharpen distribution by clustering toward norm: Now test is stricter. + detector = make_detector({0.4, 0.47, 0.48, 0.49, 0.50, 0.51, 0.52, 0.6}); + EXPECT_EQ(-1, detector->DecideOutlier(0.3)); + EXPECT_EQ(0, detector->DecideOutlier(0.4)); + EXPECT_EQ(0, detector->DecideOutlier(0.5)); + EXPECT_EQ(0, detector->DecideOutlier(0.6)); + EXPECT_EQ(1, detector->DecideOutlier(0.7)); + // Shift numbers around: Mean is 0.3, and data order scrambled. + detector = make_detector({0.28, 0.2, 0.31, 0.4, 0.29, 0.32, 0.27, 0.30}); + EXPECT_EQ(-1, detector->DecideOutlier(0.0)); + EXPECT_EQ(-1, detector->DecideOutlier(0.1)); + EXPECT_EQ(0, detector->DecideOutlier(0.2)); + EXPECT_EQ(0, detector->DecideOutlier(0.3)); + EXPECT_EQ(0, detector->DecideOutlier(0.4)); + EXPECT_EQ(1, detector->DecideOutlier(0.5)); + EXPECT_EQ(1, detector->DecideOutlier(1.0)); + // Typical usage: Potential outlier would be part of original input data! + detector = make_detector({0.3, 0.29, 0.31, 0.0, 0.3, 0.32, 0.3, 0.29, 0.6}); + EXPECT_EQ(-1, detector->DecideOutlier(0.0)); + EXPECT_EQ(0, detector->DecideOutlier(0.28)); + EXPECT_EQ(0, detector->DecideOutlier(0.29)); + EXPECT_EQ(0, detector->DecideOutlier(0.3)); + EXPECT_EQ(0, detector->DecideOutlier(0.31)); + EXPECT_EQ(0, detector->DecideOutlier(0.32)); + EXPECT_EQ(1, detector->DecideOutlier(0.6)); +} + +TEST(BinaryDataHistogramTest, Basic) { + constexpr double kUninitScore = -1; + + constexpr uint8_t kTestData[] = {2, 137, 42, 0, 0, 0, 7, 11, 1, 11, 255}; + const size_t n = sizeof(kTestData); + ConstBufferView region(kTestData, n); + + std::vector<BinaryDataHistogram> prefix_histograms(n + 1); // Short to long. + std::vector<BinaryDataHistogram> suffix_histograms(n + 1); // Long to short. + + for (size_t i = 0; i <= n; ++i) { + ConstBufferView prefix(region.begin(), i); + ConstBufferView suffix(region.begin() + i, n - i); + // If regions are smaller than 2 bytes then it is invalid. Else valid. + EXPECT_EQ(prefix.size() >= 2, prefix_histograms[i].Compute(prefix)); + EXPECT_EQ(suffix.size() >= 2, suffix_histograms[i].Compute(suffix)); + // IsValid() returns the same results. + EXPECT_EQ(prefix.size() >= 2, prefix_histograms[i].IsValid()); + EXPECT_EQ(suffix.size() >= 2, suffix_histograms[i].IsValid()); + } + + // Full-prefix = full-suffix = full data. + EXPECT_EQ(0.0, prefix_histograms[n].Distance(suffix_histograms[0])); + EXPECT_EQ(0.0, suffix_histograms[0].Distance(prefix_histograms[n])); + + // Testing heuristics without overreliance on implementation details. + + // Strict prefixes, in increasing size. Compare against full data. + double prev_prefix_score = kUninitScore; + for (size_t i = 2; i < n; ++i) { + double score = prefix_histograms[i].Distance(prefix_histograms[n]); + // Positivity. + EXPECT_GT(score, 0.0); + // Symmetry. + EXPECT_EQ(score, prefix_histograms[n].Distance(prefix_histograms[i])); + // Distance should decrease as prefix gets nearer to full data. + if (prev_prefix_score != kUninitScore) + EXPECT_LT(score, prev_prefix_score); + prev_prefix_score = score; + } + + // Strict suffixes, in decreasing size. Compare against full data. + double prev_suffix_score = -1; + for (size_t i = 1; i <= n - 2; ++i) { + double score = suffix_histograms[i].Distance(suffix_histograms[0]); + // Positivity. + EXPECT_GT(score, 0.0); + // Symmetry. + EXPECT_EQ(score, suffix_histograms[0].Distance(suffix_histograms[i])); + // Distance should increase as suffix gets farther from full data. + if (prev_suffix_score != kUninitScore) + EXPECT_GT(score, prev_suffix_score); + prev_suffix_score = score; + } +} + +} // namespace zucchini diff --git a/buffer_sink.cc b/buffer_sink.cc new file mode 100644 index 0000000..5b89e3a --- /dev/null +++ b/buffer_sink.cc @@ -0,0 +1,11 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/buffer_sink.h" + +namespace zucchini { + +BufferSink::BufferSink(MutableBufferView buffer) : MutableBufferView(buffer) {} + +} // namespace zucchini diff --git a/buffer_sink.h b/buffer_sink.h new file mode 100644 index 0000000..24798af --- /dev/null +++ b/buffer_sink.h @@ -0,0 +1,68 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_BUFFER_SINK_H_ +#define COMPONENTS_ZUCCHINI_BUFFER_SINK_H_ + +#include <stdint.h> + +#include <algorithm> +#include <iterator> + +#include "base/check_op.h" +#include "components/zucchini/buffer_view.h" + +namespace zucchini { + +// BufferSink acts like an output stream with convenience methods to serialize +// data into a contiguous sequence of raw data. The underlying MutableBufferView +// emulates a cursor to track current write position, and guards against buffer +// overrun. Where applicable, BufferSink should be passed by pointer to maintain +// cursor progress across writes. +class BufferSink : public MutableBufferView { + public: + using iterator = MutableBufferView::iterator; + + using MutableBufferView::MutableBufferView; + BufferSink() = default; + explicit BufferSink(MutableBufferView buffer); + BufferSink(const BufferSink&) = default; + BufferSink& operator=(BufferSink&&) = default; + + // If sufficient space is available, writes the binary representation of + // |value| starting at the cursor, while advancing the cursor beyond the + // written region, and returns true. Otherwise returns false. + template <class T> + bool PutValue(const T& value) { + DCHECK_NE(begin(), nullptr); + if (Remaining() < sizeof(T)) + return false; + *reinterpret_cast<T*>(begin()) = value; + remove_prefix(sizeof(T)); + return true; + } + + // If sufficient space is available, writes the raw bytes [|first|, |last|) + // starting at the cursor, while advancing the cursor beyond the written + // region, and returns true. Otherwise returns false. + template <class It> + bool PutRange(It first, It last) { + static_assert(sizeof(typename std::iterator_traits<It>::value_type) == + sizeof(uint8_t), + "value_type should fit in uint8_t"); + DCHECK_NE(begin(), nullptr); + DCHECK(last >= first); + if (Remaining() < size_type(last - first)) + return false; + std::copy(first, last, begin()); + remove_prefix(last - first); + return true; + } + + size_type Remaining() const { return size(); } +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_BUFFER_SINK_H_ diff --git a/buffer_sink_unittest.cc b/buffer_sink_unittest.cc new file mode 100644 index 0000000..33b788e --- /dev/null +++ b/buffer_sink_unittest.cc @@ -0,0 +1,71 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/buffer_sink.h" + +#include <stddef.h> +#include <stdint.h> + +#include <vector> + +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +constexpr uint8_t kUninit = 0xFF; + +class BufferSinkTest : public testing::Test { + protected: + BufferSinkTest() + : buffer_(10, kUninit), sink_(buffer_.data(), buffer_.size()) {} + + std::vector<uint8_t> buffer_; + BufferSink sink_; +}; + +TEST_F(BufferSinkTest, PutValue) { + EXPECT_EQ(size_t(10), sink_.Remaining()); + + EXPECT_TRUE(sink_.PutValue(uint32_t(0x76543210))); + EXPECT_EQ(size_t(6), sink_.Remaining()); + + EXPECT_TRUE(sink_.PutValue(uint32_t(0xFEDCBA98))); + EXPECT_EQ(size_t(2), sink_.Remaining()); + + EXPECT_FALSE(sink_.PutValue(uint32_t(0x00))); + EXPECT_EQ(size_t(2), sink_.Remaining()); + + EXPECT_TRUE(sink_.PutValue(uint16_t(0x0010))); + EXPECT_EQ(size_t(0), sink_.Remaining()); + + // Assuming little-endian architecture. + EXPECT_EQ(std::vector<uint8_t>( + {0x10, 0x32, 0x54, 0x76, 0x98, 0xBA, 0xDC, 0xFE, 0x10, 0x00}), + buffer_); +} + +TEST_F(BufferSinkTest, PutRange) { + std::vector<uint8_t> range = {0x10, 0x32, 0x54, 0x76, 0x98, 0xBA, + 0xDC, 0xFE, 0x10, 0x00, 0x42}; + + EXPECT_EQ(size_t(10), sink_.Remaining()); + EXPECT_FALSE(sink_.PutRange(range.begin(), range.end())); + EXPECT_EQ(size_t(10), sink_.Remaining()); + + EXPECT_TRUE(sink_.PutRange(range.begin(), range.begin() + 8)); + EXPECT_EQ(size_t(2), sink_.Remaining()); + EXPECT_EQ(std::vector<uint8_t>({0x10, 0x32, 0x54, 0x76, 0x98, 0xBA, 0xDC, + 0xFE, kUninit, kUninit}), + buffer_); + + EXPECT_FALSE(sink_.PutRange(range.begin(), range.begin() + 4)); + EXPECT_EQ(size_t(2), sink_.Remaining()); + + // range is not written + EXPECT_EQ(std::vector<uint8_t>({0x10, 0x32, 0x54, 0x76, 0x98, 0xBA, 0xDC, + 0xFE, kUninit, kUninit}), + buffer_); +} + +} // namespace zucchini diff --git a/buffer_source.cc b/buffer_source.cc new file mode 100644 index 0000000..d72d329 --- /dev/null +++ b/buffer_source.cc @@ -0,0 +1,105 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/buffer_source.h" + +#include <algorithm> + +#include "components/zucchini/algorithm.h" + +namespace zucchini { + +BufferSource::BufferSource(ConstBufferView buffer) : ConstBufferView(buffer) {} + +BufferSource& BufferSource::Skip(size_type n) { + remove_prefix(std::min(n, Remaining())); + return *this; +} + +bool BufferSource::CheckNextBytes(std::initializer_list<uint8_t> bytes) const { + if (Remaining() < bytes.size()) + return false; + return std::mismatch(bytes.begin(), bytes.end(), begin()).first == + bytes.end(); +} + +bool BufferSource::ConsumeBytes(std::initializer_list<uint8_t> bytes) { + if (!CheckNextBytes(bytes)) + return false; + remove_prefix(bytes.size()); + return true; +} + +bool BufferSource::GetRegion(size_type count, ConstBufferView* buffer) { + DCHECK_NE(begin(), nullptr); + if (Remaining() < count) + return false; + *buffer = ConstBufferView(begin(), count); + remove_prefix(count); + return true; +} + +// [0aaaaaaa] => 00000000'00000000'00000000'0aaaaaaa +// [1aaaaaaa 0bbbbbbb] => 00000000'00000000'00bbbbbb'baaaaaaa +// [1aaaaaaa 1bbbbbbb 0ccccccc] => 00000000'000ccccc'ccbbbbbb'baaaaaaa +// [1aaaaaaa 1bbbbbbb 1ccccccc 0ddddddd] => 0000dddd'dddccccc'ccbbbbbb'baaaaaaa +// [1aaaaaaa 1bbbbbbb 1ccccccc 1ddddddd 0???eeee] +// => eeeedddd'dddccccc'ccbbbbbb'baaaaaaa +// Note that "???" is discarded. Meanwhile, 1???eeee is invalid. +bool BufferSource::GetUleb128(uint32_t* ret) { + int shift_lim = + static_cast<int>(std::min<size_type>(kMaxLeb128Size, size())) * 7; + const_iterator cur = cbegin(); + uint32_t value = 0U; + for (int shift = 0; shift < shift_lim; shift += 7, ++cur) { + uint32_t b = *cur; + // When |shift == 28|, |(b & 0x7F) << shift| discards the "???" bits. + value |= static_cast<uint32_t>(b & 0x7F) << shift; + if (!(b & 0x80)) { + *ret = value; + seek(cur + 1); + return true; + } + } + return false; +} + +// [0Saaaaaa] => SSSSSSSS'SSSSSSSS'SSSSSSSS'SSaaaaaa +// [1aaaaaaa 0Sbbbbbb] => SSSSSSSS'SSSSSSSS'SSSbbbbb'baaaaaaa +// [1aaaaaaa 1bbbbbbb 0Scccccc] => SSSSSSSS'SSSScccc'ccbbbbbb'baaaaaaa +// [1aaaaaaa 1bbbbbbb 1ccccccc 0Sdddddd] => SSSSSddd'dddccccc'ccbbbbbb'baaaaaaa +// [1aaaaaaa 1bbbbbbb 1ccccccc 1ddddddd 0???Seee] +// => Seeedddd'dddccccc'ccbbbbbb'baaaaaaa +// Note that "???" is discarded. Meanwhile, 1???eeee is invalid. +bool BufferSource::GetSleb128(int32_t* ret) { + int shift_lim = + static_cast<int>(std::min<size_type>(kMaxLeb128Size, size())) * 7; + const_iterator cur = cbegin(); + int32_t value = 0; + for (int shift = 0; shift < shift_lim; shift += 7, ++cur) { + uint32_t b = *cur; + // When |shift == 28|, |(b & 0x7F) << shift| discards the "???" bits. + value |= static_cast<int32_t>(static_cast<uint32_t>(b & 0x7F) << shift); + if (!(b & 0x80)) { + *ret = (shift == 28) ? value : SignExtend(shift + 6, value); + seek(cur + 1); + return true; + } + } + return false; +} + +bool BufferSource::SkipLeb128() { + int lim = static_cast<int>(std::min<size_type>(kMaxLeb128Size, size())); + const_iterator cur = cbegin(); + for (int i = 0; i < lim; ++i, ++cur) { + if (!(*cur & 0x80)) { + seek(cur + 1); + return true; + } + } + return false; +} + +} // namespace zucchini diff --git a/buffer_source.h b/buffer_source.h new file mode 100644 index 0000000..7426d4e --- /dev/null +++ b/buffer_source.h @@ -0,0 +1,141 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_BUFFER_SOURCE_H_ +#define COMPONENTS_ZUCCHINI_BUFFER_SOURCE_H_ + +#include <stddef.h> +#include <stdint.h> + +#include <initializer_list> +#include <type_traits> + +#include "base/check_op.h" +#include "components/zucchini/buffer_view.h" + +namespace zucchini { + +// BufferSource acts like an input stream with convenience methods to parse data +// from a contiguous sequence of raw data. The underlying ConstBufferView +// emulates a cursor to track current read position, and guards against buffer +// overrun. Where applicable, BufferSource should be passed by pointer to +// maintain cursor progress across reads. +class BufferSource : public ConstBufferView { + public: + // LEB128 info: http://dwarfstd.org/doc/dwarf-2.0.0.pdf , Section 7.6. + enum : size_t { kMaxLeb128Size = 5 }; + + static BufferSource FromRange(const_iterator first, const_iterator last) { + return BufferSource(ConstBufferView::FromRange(first, last)); + } + + using ConstBufferView::ConstBufferView; + BufferSource() = default; + explicit BufferSource(ConstBufferView buffer); + BufferSource(const BufferSource&) = default; + BufferSource& operator=(BufferSource&&) = default; + + // Moves the cursor forward by |n| bytes, or to the end if data is exhausted. + // Returns a reference to *this, to allow chaining, e.g.: + // if (!buffer_source.Skip(1024).GetValue<uint32_t>(&value)) { + // ... // Handle error. + // } + // Notice that Skip() defers error handling to GetValue(). + BufferSource& Skip(size_type n); + + // Returns true if |value| matches data starting at the cursor when + // reinterpreted as the integral type |T|. + template <class T> + bool CheckNextValue(const T& value) const { + static_assert(std::is_integral<T>::value, + "Value type must be an integral type"); + DCHECK_NE(begin(), nullptr); + if (Remaining() < sizeof(T)) + return false; + return value == *reinterpret_cast<const T*>(begin()); + } + + // Returns true if the next bytes.size() bytes at the cursor match those in + // |bytes|. + bool CheckNextBytes(std::initializer_list<uint8_t> bytes) const; + + // Same as CheckNextBytes(), but moves the cursor by bytes.size() if read is + // successfull. + bool ConsumeBytes(std::initializer_list<uint8_t> bytes); + + // Tries to reinterpret data as type |T|, starting at the cursor and to write + // the result into |value|, while moving the cursor forward by sizeof(T). + // Returns true if sufficient data is available, and false otherwise. + template <class T> + bool GetValue(T* value) { + static_assert(std::is_standard_layout<T>::value, + "Value type must be a standard layout type"); + + DCHECK_NE(begin(), nullptr); + if (Remaining() < sizeof(T)) + return false; + *value = *reinterpret_cast<const T*>(begin()); + remove_prefix(sizeof(T)); + return true; + } + + // Tries to reinterpret data as type |T| at the cursor and to return a + // reinterpreted pointer of type |T| pointing into the underlying data, while + // moving the cursor forward by sizeof(T). Returns nullptr if insufficient + // data is available. + template <class T> + const T* GetPointer() { + static_assert(std::is_standard_layout<T>::value, + "Value type must be a standard layout type"); + + DCHECK_NE(begin(), nullptr); + if (Remaining() < sizeof(T)) + return nullptr; + const T* ptr = reinterpret_cast<const T*>(begin()); + remove_prefix(sizeof(T)); + return ptr; + } + + // Tries to reinterpret data as an array of type |T| with |count| elements, + // starting at the cursor, and to return a reinterpreted pointer of type |T| + // pointing into the underlying data, while advancing the cursor beyond the + // array. Returns nullptr if insufficient data is available. + template <class T> + const T* GetArray(size_t count) { + static_assert(std::is_standard_layout<T>::value, + "Value type must be a standard layout type"); + + if (Remaining() / sizeof(T) < count) + return nullptr; + const T* array = reinterpret_cast<const T*>(begin()); + remove_prefix(count * sizeof(T)); + return array; + } + + // If sufficient data is available, assigns |buffer| to point to a region of + // |size| bytes starting at the cursor, while advancing the cursor beyond the + // region, and returns true. Otherwise returns false. + bool GetRegion(size_type size, ConstBufferView* buffer); + + // Reads an Unsigned Little Endian Base 128 (uleb128) int at |first_|. If + // successful, writes the result to |value|, advances |first_|, and returns + // true. Otherwise returns false. + bool GetUleb128(uint32_t* value); + + // Reads a Signed Little Endian Base 128 (sleb128) int at |first_|. If + // successful, writes the result to |value|, advances |first_|, and returns + // true. Otherwise returns false. + bool GetSleb128(int32_t* value); + + // Reads uleb128 / sleb128 at |first_| but discards the result. If successful, + // advances |first_| and returns true. Otherwise returns false. + bool SkipLeb128(); + + // Returns the number of bytes remaining from cursor until end. + size_type Remaining() const { return size(); } +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_BUFFER_SOURCE_H_ diff --git a/buffer_source_unittest.cc b/buffer_source_unittest.cc new file mode 100644 index 0000000..8cb8b3e --- /dev/null +++ b/buffer_source_unittest.cc @@ -0,0 +1,347 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/buffer_source.h" + +#include <stddef.h> +#include <stdint.h> + +#include <iterator> +#include <string> +#include <tuple> +#include <vector> + +#include "components/zucchini/test_utils.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +using vec = std::vector<uint8_t>; + +class BufferSourceTest : public testing::Test { + protected: + std::vector<uint8_t> bytes_ = ParseHexString("10 32 54 76 98 BA DC FE 10 00"); + + BufferSource source_ = {bytes_.data(), bytes_.size()}; +}; + +TEST_F(BufferSourceTest, Skip) { + EXPECT_EQ(bytes_.size(), source_.Remaining()); + source_.Skip(2); + EXPECT_EQ(bytes_.size() - 2, source_.Remaining()); + source_.Skip(10); // Skipping past end just moves cursor to end. + EXPECT_EQ(size_t(0), source_.Remaining()); +} + +TEST_F(BufferSourceTest, CheckNextBytes) { + EXPECT_TRUE(source_.CheckNextBytes({0x10, 0x32, 0x54, 0x76})); + source_.Skip(4); + EXPECT_TRUE(source_.CheckNextBytes({0x98, 0xBA, 0xDC, 0xFE})); + + // Cursor has not advanced, so check fails. + EXPECT_FALSE(source_.CheckNextBytes({0x10, 0x00})); + + source_.Skip(4); + EXPECT_EQ(size_t(2), source_.Remaining()); + + // Goes beyond end by 2 bytes. + EXPECT_FALSE(source_.CheckNextBytes({0x10, 0x00, 0x00, 0x00})); + EXPECT_EQ(size_t(2), source_.Remaining()); +} + +TEST_F(BufferSourceTest, ConsumeBytes) { + EXPECT_FALSE(source_.ConsumeBytes({0x10, 0x00})); + EXPECT_EQ(bytes_.size(), source_.Remaining()); + EXPECT_TRUE(source_.ConsumeBytes({0x10, 0x32, 0x54, 0x76})); + EXPECT_EQ(size_t(6), source_.Remaining()); + EXPECT_TRUE(source_.ConsumeBytes({0x98, 0xBA, 0xDC, 0xFE})); + EXPECT_EQ(size_t(2), source_.Remaining()); + + // Goes beyond end by 2 bytes. + EXPECT_FALSE(source_.ConsumeBytes({0x10, 0x00, 0x00, 0x00})); + EXPECT_EQ(size_t(2), source_.Remaining()); +} + +TEST_F(BufferSourceTest, CheckNextValue) { + EXPECT_TRUE(source_.CheckNextValue(uint32_t(0x76543210))); + EXPECT_FALSE(source_.CheckNextValue(uint32_t(0x0))); + EXPECT_TRUE(source_.CheckNextValue(uint64_t(0xFEDCBA9876543210))); + EXPECT_FALSE(source_.CheckNextValue(uint64_t(0x0))); + + source_.Skip(8); + EXPECT_EQ(size_t(2), source_.Remaining()); + + // Goes beyond end by 2 bytes. + EXPECT_FALSE(source_.CheckNextValue(uint32_t(0x1000))); +} + +// Supported by MSVC, g++, and clang++. +// Ensures no gaps in packing. +#pragma pack(push, 1) +struct ValueType { + uint32_t a; + uint16_t b; +}; +#pragma pack(pop) + +TEST_F(BufferSourceTest, GetValueIntegral) { + uint32_t value = 0; + EXPECT_TRUE(source_.GetValue(&value)); + EXPECT_EQ(uint32_t(0x76543210), value); + EXPECT_EQ(size_t(6), source_.Remaining()); + + EXPECT_TRUE(source_.GetValue(&value)); + EXPECT_EQ(uint32_t(0xFEDCBA98), value); + EXPECT_EQ(size_t(2), source_.Remaining()); + + EXPECT_FALSE(source_.GetValue(&value)); + EXPECT_EQ(size_t(2), source_.Remaining()); +} + +TEST_F(BufferSourceTest, GetValueAggregate) { + ValueType value = {}; + EXPECT_TRUE(source_.GetValue(&value)); + EXPECT_EQ(uint32_t(0x76543210), value.a); + EXPECT_EQ(uint32_t(0xBA98), value.b); + EXPECT_EQ(size_t(4), source_.Remaining()); +} + +TEST_F(BufferSourceTest, GetRegion) { + ConstBufferView region; + EXPECT_TRUE(source_.GetRegion(0, ®ion)); + EXPECT_EQ(bytes_.size(), source_.Remaining()); + EXPECT_TRUE(region.empty()); + + EXPECT_TRUE(source_.GetRegion(2, ®ion)); + EXPECT_EQ(size_t(2), region.size()); + EXPECT_EQ(vec({0x10, 0x32}), vec(region.begin(), region.end())); + EXPECT_EQ(size_t(8), source_.Remaining()); + + EXPECT_FALSE(source_.GetRegion(bytes_.size(), ®ion)); + EXPECT_EQ(size_t(8), source_.Remaining()); + // |region| is left untouched. + EXPECT_EQ(vec({0x10, 0x32}), vec(region.begin(), region.end())); + EXPECT_EQ(size_t(2), region.size()); +} + +TEST_F(BufferSourceTest, GetPointerIntegral) { + const uint32_t* ptr = source_.GetPointer<uint32_t>(); + EXPECT_NE(nullptr, ptr); + EXPECT_EQ(uint32_t(0x76543210), *ptr); + EXPECT_EQ(size_t(6), source_.Remaining()); + + ptr = source_.GetPointer<uint32_t>(); + EXPECT_NE(nullptr, ptr); + EXPECT_EQ(uint32_t(0xFEDCBA98), *ptr); + EXPECT_EQ(size_t(2), source_.Remaining()); + + EXPECT_EQ(nullptr, source_.GetPointer<uint32_t>()); + EXPECT_EQ(size_t(2), source_.Remaining()); +} + +TEST_F(BufferSourceTest, GetPointerAggregate) { + const ValueType* ptr = source_.GetPointer<ValueType>(); + EXPECT_NE(nullptr, ptr); + EXPECT_EQ(uint32_t(0x76543210), ptr->a); + EXPECT_EQ(uint32_t(0xBA98), ptr->b); + EXPECT_EQ(size_t(4), source_.Remaining()); +} + +TEST_F(BufferSourceTest, GetArrayIntegral) { + EXPECT_EQ(nullptr, source_.GetArray<uint32_t>(3)); + + const uint32_t* ptr = source_.GetArray<uint32_t>(2); + EXPECT_NE(nullptr, ptr); + EXPECT_EQ(uint32_t(0x76543210), ptr[0]); + EXPECT_EQ(uint32_t(0xFEDCBA98), ptr[1]); + EXPECT_EQ(size_t(2), source_.Remaining()); +} + +TEST_F(BufferSourceTest, GetArrayAggregate) { + const ValueType* ptr = source_.GetArray<ValueType>(2); + EXPECT_EQ(nullptr, ptr); + + ptr = source_.GetArray<ValueType>(1); + + EXPECT_NE(nullptr, ptr); + EXPECT_EQ(uint32_t(0x76543210), ptr[0].a); + EXPECT_EQ(uint32_t(0xBA98), ptr[0].b); + EXPECT_EQ(size_t(4), source_.Remaining()); +} + +TEST_F(BufferSourceTest, GetUleb128) { + using size_type = BufferSource::size_type; + // Result = {success, value, bytes_consumed}. + using Result = std::tuple<bool, uint32_t, size_type>; + + constexpr uint32_t kUnInit = 0xCCCCCCCC; // Arbitrary value. + constexpr Result kBad{false, kUnInit, 0U}; + + auto run = [](const std::string hex_string) -> Result { + std::vector<uint8_t> bytes = ParseHexString(hex_string); + BufferSource source(ConstBufferView{bytes.data(), bytes.size()}); + BufferSource::iterator base = source.begin(); + // Initialize |value| to |kUnInit| to ensure no write on failure. + uint32_t value = kUnInit; + bool success = source.GetUleb128(&value); + return {success, value, source.begin() - base}; + }; + + auto good = [](uint32_t value, size_type bytes_consumed) -> Result { + return Result{true, value, bytes_consumed}; + }; + + EXPECT_EQ(good(0x0U, 1U), run("00")); + EXPECT_EQ(good(0x20U, 1U), run("20")); + EXPECT_EQ(good(0x42U, 1U), run("42")); + EXPECT_EQ(good(0x7FU, 1U), run("7F")); + EXPECT_EQ(kBad, run("80")); // Out of data. + EXPECT_EQ(good(0x0U, 2U), run("80 00")); // Redundant code. + EXPECT_EQ(good(0x80U, 2U), run("80 01")); + EXPECT_EQ(good(0x7FU, 2U), run("FF 00")); // Redundant (unsigned). + EXPECT_EQ(good(0x3FFFU, 2U), run("FF 7F")); + EXPECT_EQ(good(0x0U, 1U), run("00 80")); // Only reads byte 0. + EXPECT_EQ(kBad, run("80 80")); // Out of data. + EXPECT_EQ(kBad, run("F1 88")); // Out of data. + EXPECT_EQ(good(0x0U, 3U), run("80 80 00")); // Redundant code. + EXPECT_EQ(good(0x4000U, 3U), run("80 80 01")); + EXPECT_EQ(good(0x00100000U, 3U), run("80 80 40")); + EXPECT_EQ(good(0x001FFFFFU, 3U), run("FF FF 7F")); + EXPECT_EQ(good(0x0U, 1U), run("00 00 80")); // Only reads byte 0. + EXPECT_EQ(kBad, run("80 80 80")); // Out of data. + EXPECT_EQ(kBad, run("AB CD EF")); // Out of data. + EXPECT_EQ(good(0x0U, 4U), run("80 80 80 00")); // Redundant code. + EXPECT_EQ(good(0x00100000U, 4U), run("80 80 C0 00")); + EXPECT_EQ(good(0x00200000U, 4U), run("80 80 80 01")); + EXPECT_EQ(good(0x08000000U, 4U), run("80 80 80 40")); + EXPECT_EQ(good(0x001FC07FU, 4U), run("FF 80 FF 00")); + EXPECT_EQ(good(0x0U, 5U), run("80 80 80 80 00")); // Redundant code. + EXPECT_EQ(good(0x10000000U, 5U), run("80 80 80 80 01")); + EXPECT_EQ(good(0x10204081U, 5U), run("81 81 81 81 01")); + EXPECT_EQ(good(0x7FFFFFFFU, 5U), run("FF FF FF FF 07")); + EXPECT_EQ(good(0x80000000U, 5U), run("80 80 80 80 08")); + EXPECT_EQ(good(0xFFFFFFFFU, 5U), run("FF FF FF FF 0F")); + EXPECT_EQ(kBad, run("FF FF FF FF 80")); // Too long / out of data. + EXPECT_EQ(good(0x0FFFFFFFU, 5U), run("FF FF FF FF 10")); // "1" discarded. + EXPECT_EQ(good(0x00000000U, 5U), run("80 80 80 80 20")); // "2" discarded. + EXPECT_EQ(good(0xA54A952AU, 5U), run("AA AA AA AA 7A")); // "7" discarded. + EXPECT_EQ(kBad, run("FF FF FF FF FF 00")); // Too long. +} + +TEST_F(BufferSourceTest, GetSleb128) { + using size_type = BufferSource::size_type; + // Result = {success, value, bytes_consumed}. + using Result = std::tuple<bool, int32_t, size_type>; + + constexpr int32_t kUnInit = 0xCCCCCCCC; // Arbitrary value. + constexpr Result kBad{false, kUnInit, 0U}; + + auto run = [](const std::string hex_string) -> Result { + std::vector<uint8_t> bytes = ParseHexString(hex_string); + BufferSource source(ConstBufferView{bytes.data(), bytes.size()}); + BufferSource::iterator base = source.begin(); + // Initialize |value| to |kUnInit| to ensure no write on failure. + int32_t value = kUnInit; + bool success = source.GetSleb128(&value); + return {success, value, source.begin() - base}; + }; + + auto good = [](int32_t value, size_type bytes_consumed) -> Result { + return Result{true, value, bytes_consumed}; + }; + + EXPECT_EQ(good(0x0, 1U), run("00")); + EXPECT_EQ(good(0x20U, 1U), run("20")); + EXPECT_EQ(good(-0x3E, 1U), run("42")); + EXPECT_EQ(good(-0x1, 1U), run("7F")); + EXPECT_EQ(kBad, run("80")); // Out of data. + EXPECT_EQ(good(0x0, 2U), run("80 00")); // Redundant code. + EXPECT_EQ(good(0x80, 2U), run("80 01")); + EXPECT_EQ(good(0x7F, 2U), run("FF 00")); // Not redudnant. + EXPECT_EQ(good(-0x1, 2U), run("FF 7F")); // Redundant code. + EXPECT_EQ(good(0x0, 1U), run("00 80")); // Only reads byte 0. + EXPECT_EQ(kBad, run("80 80")); // Out of data. + EXPECT_EQ(kBad, run("F1 88")); // Out of data. + EXPECT_EQ(good(0x0, 3U), run("80 80 00")); // Redundant code. + EXPECT_EQ(good(0x4000, 3U), run("80 80 01")); + EXPECT_EQ(good(-0x100000, 3U), run("80 80 40")); + EXPECT_EQ(good(-0x1, 3U), run("FF FF 7F")); // Redundant code. + EXPECT_EQ(good(0x0, 1U), run("00 00 80")); // Only reads byte 0. + EXPECT_EQ(kBad, run("80 80 80")); // Out of data. + EXPECT_EQ(kBad, run("AB CD EF")); // Out of data. + EXPECT_EQ(good(0x0, 4U), run("80 80 80 00")); // Redundant code. + EXPECT_EQ(good(0x00100000, 4U), run("80 80 C0 00")); + EXPECT_EQ(good(0x00200000, 4U), run("80 80 80 01")); + EXPECT_EQ(good(-static_cast<int32_t>(0x08000000), 4U), run("80 80 80 40")); + EXPECT_EQ(good(0x001FC07F, 4U), run("FF 80 FF 00")); + EXPECT_EQ(good(0x0, 5U), run("80 80 80 80 00")); // Redundant code. + EXPECT_EQ(good(0x10000000, 5U), run("80 80 80 80 01")); + EXPECT_EQ(good(0x10204081, 5U), run("81 81 81 81 01")); + EXPECT_EQ(good(0x7FFFFFFF, 5U), run("FF FF FF FF 07")); + EXPECT_EQ(good(-static_cast<int32_t>(0x80000000), 5U), run("80 80 80 80 08")); + EXPECT_EQ(good(-0x1, 5U), run("FF FF FF FF 0F")); // Redundant code. + EXPECT_EQ(kBad, run("FF FF FF FF 80")); // Too long / out of data. + EXPECT_EQ(good(0x0FFFFFFF, 5U), run("FF FF FF FF 10")); // "1" discarded. + EXPECT_EQ(good(0x00000000, 5U), run("80 80 80 80 20")); // "2" discarded. + EXPECT_EQ(good(-0x5AB56AD6, 5U), run("AA AA AA AA 7A")); // "7" discarded. + EXPECT_EQ(kBad, run("FF FF FF FF FF 00")); // Too long. +} + +TEST_F(BufferSourceTest, SkipLeb128) { + using size_type = BufferSource::size_type; + // Result = {success, value, bytes_consumed}. + using Result = std::tuple<bool, size_type>; + + constexpr Result kBad{false, 0U}; + + auto run = [](const std::string hex_string) -> Result { + std::vector<uint8_t> bytes = ParseHexString(hex_string); + BufferSource source(ConstBufferView{bytes.data(), bytes.size()}); + BufferSource::iterator base = source.begin(); + bool success = source.SkipLeb128(); + return {success, source.begin() - base}; + }; + + auto good = [](size_type bytes_consumed) -> Result { + return Result{true, bytes_consumed}; + }; + + EXPECT_EQ(good(1U), run("00")); + EXPECT_EQ(good(1U), run("20")); + EXPECT_EQ(good(1U), run("42")); + EXPECT_EQ(good(1U), run("7F")); + EXPECT_EQ(kBad, run("80")); // Out of data. + EXPECT_EQ(good(2U), run("80 00")); // Redundant code. + EXPECT_EQ(good(2U), run("80 01")); + EXPECT_EQ(good(2U), run("FF 00")); // Redundant (unsigned). + EXPECT_EQ(good(2U), run("FF 7F")); + EXPECT_EQ(good(1U), run("00 80")); // Only reads byte 0. + EXPECT_EQ(kBad, run("80 80")); // Out of data. + EXPECT_EQ(kBad, run("F1 88")); // Out of data. + EXPECT_EQ(good(3U), run("80 80 00")); // Redundant code. + EXPECT_EQ(good(3U), run("80 80 01")); + EXPECT_EQ(good(3U), run("80 80 40")); + EXPECT_EQ(good(3U), run("FF FF 7F")); + EXPECT_EQ(good(1U), run("00 00 80")); // Only reads byte 0. + EXPECT_EQ(kBad, run("80 80 80")); // Out of data. + EXPECT_EQ(kBad, run("AB CD EF")); // Out of data. + EXPECT_EQ(good(4U), run("80 80 80 00")); // Redundant code. + EXPECT_EQ(good(4U), run("80 80 C0 00")); + EXPECT_EQ(good(4U), run("80 80 80 01")); + EXPECT_EQ(good(4U), run("80 80 80 40")); + EXPECT_EQ(good(4U), run("FF 80 FF 00")); + EXPECT_EQ(good(5U), run("80 80 80 80 00")); // Redundant code. + EXPECT_EQ(good(5U), run("80 80 80 80 01")); + EXPECT_EQ(good(5U), run("81 81 81 81 01")); + EXPECT_EQ(good(5U), run("FF FF FF FF 07")); + EXPECT_EQ(good(5U), run("80 80 80 80 08")); + EXPECT_EQ(good(5U), run("FF FF FF FF 0F")); + EXPECT_EQ(kBad, run("FF FF FF FF 80")); // Too long / out of data. + EXPECT_EQ(good(5U), run("FF FF FF FF 10")); // "1" discarded. + EXPECT_EQ(good(5U), run("80 80 80 80 20")); // "2" discarded. + EXPECT_EQ(good(5U), run("AA AA AA AA 7A")); // "7" discarded. + EXPECT_EQ(kBad, run("FF FF FF FF FF 00")); // Too long. +} + +} // namespace zucchini diff --git a/buffer_view.h b/buffer_view.h new file mode 100644 index 0000000..661e3c3 --- /dev/null +++ b/buffer_view.h @@ -0,0 +1,217 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_BUFFER_VIEW_H_ +#define COMPONENTS_ZUCCHINI_BUFFER_VIEW_H_ + +#include <stddef.h> +#include <stdint.h> + +#include <algorithm> +#include <type_traits> + +#include "base/check_op.h" +#include "components/zucchini/algorithm.h" + +namespace zucchini { + +// Describes a region within a buffer, with starting offset and size. +struct BufferRegion { + // The region data are stored as |offset| and |size|, but often it is useful + // to represent it as an interval [lo(), hi()) = [offset, offset + size). + size_t lo() const { return offset; } + size_t hi() const { return offset + size; } + + // Returns whether the Region fits in |[0, container_size)|. Special case: + // a size-0 region starting at |container_size| fits. + bool FitsIn(size_t container_size) const { + return offset <= container_size && container_size - offset >= size; + } + + // Returns |v| clipped to the inclusive range |[lo(), hi()]|. + size_t InclusiveClamp(size_t v) const { + return zucchini::InclusiveClamp(v, lo(), hi()); + } + + // Region data use size_t to match BufferViewBase::size_type, to make it + // convenient to index into buffer view. + size_t offset; + size_t size; +}; + +namespace internal { + +// TODO(huangs): Rename to BasicBufferView. +// BufferViewBase should not be used directly; it is an implementation used for +// both BufferView and MutableBufferView. +template <class T> +class BufferViewBase { + public: + using value_type = T; + using reference = T&; + using pointer = T*; + using iterator = T*; + using const_iterator = typename std::add_const<T>::type*; + using size_type = std::size_t; + using difference_type = std::ptrdiff_t; + + static BufferViewBase FromRange(iterator first, iterator last) { + DCHECK_GE(last, first); + BufferViewBase ret; + ret.first_ = first; + ret.last_ = last; + return ret; + } + + BufferViewBase() = default; + + BufferViewBase(iterator first, size_type size) + : first_(first), last_(first_ + size) { + DCHECK_GE(last_, first_); + } + + template <class U> + BufferViewBase(const BufferViewBase<U>& that) + : first_(that.begin()), last_(that.end()) {} + + template <class U> + BufferViewBase(BufferViewBase<U>&& that) + : first_(that.begin()), last_(that.end()) {} + + BufferViewBase(const BufferViewBase&) = default; + BufferViewBase& operator=(const BufferViewBase&) = default; + + // Iterators + + iterator begin() const { return first_; } + iterator end() const { return last_; } + const_iterator cbegin() const { return begin(); } + const_iterator cend() const { return end(); } + + // Capacity + + bool empty() const { return first_ == last_; } + size_type size() const { return last_ - first_; } + + // Returns whether the buffer is large enough to cover |region|. + bool covers(const BufferRegion& region) const { + return region.FitsIn(size()); + } + + // Returns whether the buffer is large enough to cover an array starting at + // |offset| with |num| elements, each taking |elt_size| bytes. + bool covers_array(size_t offset, size_t num, size_t elt_size) { + DCHECK_GT(elt_size, 0U); + // Use subtraction and division to avoid overflow. + return offset <= size() && (size() - offset) / elt_size >= num; + } + + // Element access + + // Returns the raw value at specified location |pos|. + // If |pos| is not within the range of the buffer, the process is terminated. + reference operator[](size_type pos) const { + CHECK_LT(pos, size()); + return first_[pos]; + } + + // Returns a sub-buffer described by |region|. + BufferViewBase operator[](BufferRegion region) const { + DCHECK_LE(region.offset, size()); + DCHECK_LE(region.size, size() - region.offset); + return {begin() + region.offset, region.size}; + } + + template <class U> + const U& read(size_type pos) const { + // TODO(huangs): Use can_access<U>(pos) after fixing can_access(). + CHECK_LE(sizeof(U), size()); + CHECK_LE(pos, size() - sizeof(U)); + return *reinterpret_cast<const U*>(begin() + pos); + } + + template <class U> + void write(size_type pos, const U& value) { + // TODO(huangs): Use can_access<U>(pos) after fixing can_access(). + CHECK_LE(sizeof(U), size()); + CHECK_LE(pos, size() - sizeof(U)); + *reinterpret_cast<U*>(begin() + pos) = value; + } + + // Returns a mutable reference to an object type U whose raw storage starts + // at location |pos|. + template <class U> + U& modify(size_type pos) { + // TODO(huangs): Use can_access<U>(pos) after fixing can_access(). + CHECK_LE(sizeof(U), size()); + CHECK_LE(pos, size() - sizeof(U)); + return *reinterpret_cast<U*>(begin() + pos); + } + + template <class U> + bool can_access(size_type pos) const { + return pos < size() && size() - pos >= sizeof(U); + } + + // Returns a BufferRegion describing the full view, with offset = 0. If the + // BufferViewBase is derived from another, this does *not* return the + // original region used for its definition (hence "local"). + BufferRegion local_region() const { return BufferRegion{0, size()}; } + + bool equals(BufferViewBase other) const { + return size() == other.size() && std::equal(begin(), end(), other.begin()); + } + + // Modifiers + + void shrink(size_type new_size) { + DCHECK_LE(first_ + new_size, last_); + last_ = first_ + new_size; + } + + // Moves the start of the view forward by n bytes. + void remove_prefix(size_type n) { + DCHECK_LE(n, size()); + first_ += n; + } + + // Moves the start of the view to |it|, which is in range [begin(), end()). + void seek(iterator it) { + DCHECK_GE(it, begin()); + DCHECK_LE(it, end()); + first_ = it; + } + + // Given |origin| that contains |*this|, minimally increase |first_| (possibly + // by 0) so that |first_ <= last_|, and |first_ - origin.first_| is a multiple + // of |alignment|. On success, updates |first_| and returns true. Otherwise + // returns false. + bool AlignOn(BufferViewBase origin, size_type alignment) { + DCHECK_GT(alignment, 0U); + DCHECK_LE(origin.first_, first_); + DCHECK_GE(origin.last_, last_); + size_type aligned_size = + AlignCeil(static_cast<size_type>(first_ - origin.first_), alignment); + if (aligned_size > static_cast<size_type>(last_ - origin.first_)) + return false; + first_ = origin.first_ + aligned_size; + return true; + } + + private: + iterator first_ = nullptr; + iterator last_ = nullptr; +}; + +} // namespace internal + +// Classes to encapsulate a contiguous sequence of raw data, without owning the +// encapsulated memory regions. These are intended to be used as value types. + +using ConstBufferView = internal::BufferViewBase<const uint8_t>; +using MutableBufferView = internal::BufferViewBase<uint8_t>; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_BUFFER_VIEW_H_ diff --git a/buffer_view_unittest.cc b/buffer_view_unittest.cc new file mode 100644 index 0000000..30170d7 --- /dev/null +++ b/buffer_view_unittest.cc @@ -0,0 +1,298 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/buffer_view.h" + +#include <stddef.h> +#include <stdint.h> + +#include <iterator> +#include <type_traits> +#include <vector> + +#include "base/test/gtest_util.h" +#include "components/zucchini/test_utils.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +class BufferViewTest : public testing::Test { + protected: + // Some tests might modify this. + std::vector<uint8_t> bytes_ = ParseHexString("10 32 54 76 98 BA DC FE 10 00"); +}; + +TEST_F(BufferViewTest, Size) { + for (size_t len = 0; len <= bytes_.size(); ++len) { + EXPECT_EQ(len, ConstBufferView(bytes_.data(), len).size()); + EXPECT_EQ(len, MutableBufferView(bytes_.data(), len).size()); + } +} + +TEST_F(BufferViewTest, Empty) { + // Empty view. + EXPECT_TRUE(ConstBufferView(bytes_.data(), 0).empty()); + EXPECT_TRUE(MutableBufferView(bytes_.data(), 0).empty()); + + for (size_t len = 1; len <= bytes_.size(); ++len) { + EXPECT_FALSE(ConstBufferView(bytes_.data(), len).empty()); + EXPECT_FALSE(MutableBufferView(bytes_.data(), len).empty()); + } +} + +TEST_F(BufferViewTest, FromRange) { + constexpr size_t kSize = 10; + uint8_t raw_data[kSize] = {0x10, 0x32, 0x54, 0x76, 0x98, + 0xBA, 0xDC, 0xFE, 0x10, 0x00}; + ConstBufferView buffer = + ConstBufferView::FromRange(std::begin(raw_data), std::end(raw_data)); + EXPECT_EQ(bytes_.size(), buffer.size()); + EXPECT_EQ(std::begin(raw_data), buffer.begin()); + + MutableBufferView mutable_buffer = + MutableBufferView::FromRange(std::begin(raw_data), std::end(raw_data)); + EXPECT_EQ(bytes_.size(), mutable_buffer.size()); + EXPECT_EQ(std::begin(raw_data), mutable_buffer.begin()); + + EXPECT_DCHECK_DEATH( + ConstBufferView::FromRange(std::end(raw_data), std::begin(raw_data))); + + EXPECT_DCHECK_DEATH(MutableBufferView::FromRange(std::begin(raw_data) + 1, + std::begin(raw_data))); +} + +TEST_F(BufferViewTest, Subscript) { + ConstBufferView view(bytes_.data(), bytes_.size()); + + EXPECT_EQ(0x10, view[0]); + static_assert(!std::is_assignable<decltype(view[0]), uint8_t>::value, + "BufferView values should not be mutable."); + + MutableBufferView mutable_view(bytes_.data(), bytes_.size()); + + EXPECT_EQ(bytes_.data(), &mutable_view[0]); + mutable_view[0] = 42; + EXPECT_EQ(42, mutable_view[0]); +} + +TEST_F(BufferViewTest, SubRegion) { + ConstBufferView view(bytes_.data(), bytes_.size()); + + ConstBufferView sub_view = view[{2, 4}]; + EXPECT_EQ(view.begin() + 2, sub_view.begin()); + EXPECT_EQ(size_t(4), sub_view.size()); +} + +TEST_F(BufferViewTest, Shrink) { + ConstBufferView buffer(bytes_.data(), bytes_.size()); + + buffer.shrink(bytes_.size()); + EXPECT_EQ(bytes_.size(), buffer.size()); + buffer.shrink(2); + EXPECT_EQ(size_t(2), buffer.size()); + EXPECT_DCHECK_DEATH(buffer.shrink(bytes_.size())); +} + +TEST_F(BufferViewTest, Read) { + ConstBufferView buffer(bytes_.data(), bytes_.size()); + + EXPECT_EQ(0x10U, buffer.read<uint8_t>(0)); + EXPECT_EQ(0x54U, buffer.read<uint8_t>(2)); + + EXPECT_EQ(0x3210U, buffer.read<uint16_t>(0)); + EXPECT_EQ(0x7654U, buffer.read<uint16_t>(2)); + + EXPECT_EQ(0x76543210U, buffer.read<uint32_t>(0)); + EXPECT_EQ(0xBA987654U, buffer.read<uint32_t>(2)); + + EXPECT_EQ(0xFEDCBA9876543210ULL, buffer.read<uint64_t>(0)); + + EXPECT_EQ(0x00, buffer.read<uint8_t>(9)); + EXPECT_DEATH(buffer.read<uint8_t>(10), ""); + + EXPECT_EQ(0x0010FEDCU, buffer.read<uint32_t>(6)); + EXPECT_DEATH(buffer.read<uint32_t>(7), ""); +} + +TEST_F(BufferViewTest, Write) { + MutableBufferView buffer(bytes_.data(), bytes_.size()); + + buffer.write<uint32_t>(0, 0x01234567); + buffer.write<uint32_t>(4, 0x89ABCDEF); + EXPECT_EQ(ParseHexString("67 45 23 01 EF CD AB 89 10 00"), + std::vector<uint8_t>(buffer.begin(), buffer.end())); + + buffer.write<uint8_t>(9, 0xFF); + EXPECT_DEATH(buffer.write<uint8_t>(10, 0xFF), ""); + + buffer.write<uint32_t>(6, 0xFFFFFFFF); + EXPECT_DEATH(buffer.write<uint32_t>(7, 0xFFFFFFFF), ""); +} + +TEST_F(BufferViewTest, Modify) { + struct TestStruct { + uint32_t a; + uint32_t b; + }; + + MutableBufferView buffer(bytes_.data(), bytes_.size()); + + buffer.modify<TestStruct>(0).a = 0x01234567; + buffer.modify<TestStruct>(0).b = 0x89ABCDEF; + EXPECT_EQ(ParseHexString("67 45 23 01 EF CD AB 89 10 00"), + std::vector<uint8_t>(buffer.begin(), buffer.end())); + + buffer.modify<uint8_t>(9); + EXPECT_DEATH(buffer.modify<uint8_t>(10), ""); + + buffer.modify<uint32_t>(6); + EXPECT_DEATH(buffer.modify<uint32_t>(7), ""); +} + +TEST_F(BufferViewTest, CanAccess) { + MutableBufferView buffer(bytes_.data(), bytes_.size()); + EXPECT_TRUE(buffer.can_access<uint32_t>(0)); + EXPECT_TRUE(buffer.can_access<uint32_t>(6)); + EXPECT_FALSE(buffer.can_access<uint32_t>(7)); + EXPECT_FALSE(buffer.can_access<uint32_t>(10)); + EXPECT_FALSE(buffer.can_access<uint32_t>(0xFFFFFFFFU)); + + EXPECT_TRUE(buffer.can_access<uint8_t>(0)); + EXPECT_TRUE(buffer.can_access<uint8_t>(7)); + EXPECT_TRUE(buffer.can_access<uint8_t>(9)); + EXPECT_FALSE(buffer.can_access<uint8_t>(10)); + EXPECT_FALSE(buffer.can_access<uint8_t>(0xFFFFFFFF)); +} + +TEST_F(BufferViewTest, LocalRegion) { + ConstBufferView view(bytes_.data(), bytes_.size()); + + BufferRegion region = view.local_region(); + EXPECT_EQ(0U, region.offset); + EXPECT_EQ(bytes_.size(), region.size); +} + +TEST_F(BufferViewTest, Covers) { + EXPECT_TRUE(ConstBufferView().covers({0, 0})); + EXPECT_FALSE(ConstBufferView().covers({0, 1})); + + ConstBufferView view(bytes_.data(), bytes_.size()); + + EXPECT_TRUE(view.covers({0, 0})); + EXPECT_TRUE(view.covers({0, 1})); + EXPECT_TRUE(view.covers({0, bytes_.size()})); + EXPECT_FALSE(view.covers({0, bytes_.size() + 1})); + EXPECT_FALSE(view.covers({1, bytes_.size()})); + + EXPECT_TRUE(view.covers({bytes_.size() - 1, 0})); + EXPECT_TRUE(view.covers({bytes_.size() - 1, 1})); + EXPECT_FALSE(view.covers({bytes_.size() - 1, 2})); + EXPECT_TRUE(view.covers({bytes_.size(), 0})); + EXPECT_FALSE(view.covers({bytes_.size(), 1})); + EXPECT_FALSE(view.covers({bytes_.size() + 1, 0})); + EXPECT_FALSE(view.covers({bytes_.size() + 1, 1})); + + EXPECT_FALSE(view.covers({1, size_t(-1)})); + EXPECT_FALSE(view.covers({size_t(-1), 1})); + EXPECT_FALSE(view.covers({size_t(-1), size_t(-1)})); +} + +TEST_F(BufferViewTest, CoversArray) { + ConstBufferView view(bytes_.data(), bytes_.size()); + + for (uint32_t i = 1; i <= bytes_.size(); ++i) { + EXPECT_TRUE(view.covers_array(0, 1, i)); + EXPECT_TRUE(view.covers_array(0, i, 1)); + EXPECT_TRUE(view.covers_array(0, i, bytes_.size() / i)); + EXPECT_TRUE(view.covers_array(0, bytes_.size() / i, i)); + if (i < bytes_.size()) { + EXPECT_TRUE(view.covers_array(i, 1, bytes_.size() - i)); + EXPECT_TRUE(view.covers_array(i, bytes_.size() - i, 1)); + } + EXPECT_TRUE(view.covers_array(bytes_.size() - (bytes_.size() / i) * i, 1, + bytes_.size() / i)); + } + + EXPECT_TRUE(view.covers_array(0, 0, bytes_.size())); + EXPECT_TRUE(view.covers_array(bytes_.size() - 1, 0, bytes_.size())); + EXPECT_TRUE(view.covers_array(bytes_.size(), 0, bytes_.size())); + EXPECT_TRUE(view.covers_array(0, 0, 0x10000)); + EXPECT_TRUE(view.covers_array(bytes_.size() - 1, 0, 0x10000)); + EXPECT_TRUE(view.covers_array(bytes_.size(), 0, 0x10000)); + + EXPECT_FALSE(view.covers_array(0, 1, bytes_.size() + 1)); + EXPECT_FALSE(view.covers_array(0, 2, bytes_.size())); + EXPECT_FALSE(view.covers_array(0, bytes_.size() + 11, 1)); + EXPECT_FALSE(view.covers_array(0, bytes_.size(), 2)); + EXPECT_FALSE(view.covers_array(1, bytes_.size(), 1)); + + EXPECT_FALSE(view.covers_array(bytes_.size(), 1, 1)); + EXPECT_TRUE(view.covers_array(bytes_.size(), 0, 1)); + EXPECT_FALSE(view.covers_array(0, 0x10000, 0x10000)); +} + +TEST_F(BufferViewTest, Equals) { + // Almost identical to |bytes_|, except at 2 places: v v + std::vector<uint8_t> bytes2 = ParseHexString("10 32 54 76 98 AB CD FE 10 00"); + ConstBufferView view1(bytes_.data(), bytes_.size()); + ConstBufferView view2(&bytes2[0], bytes2.size()); + + EXPECT_TRUE(view1.equals(view1)); + EXPECT_TRUE(view2.equals(view2)); + EXPECT_FALSE(view1.equals(view2)); + EXPECT_FALSE(view2.equals(view1)); + + EXPECT_TRUE((view1[{0, 0}]).equals(view2[{0, 0}])); + EXPECT_TRUE((view1[{0, 0}]).equals(view2[{5, 0}])); + EXPECT_TRUE((view1[{0, 5}]).equals(view2[{0, 5}])); + EXPECT_FALSE((view1[{0, 6}]).equals(view2[{0, 6}])); + EXPECT_FALSE((view1[{0, 7}]).equals(view1[{0, 6}])); + EXPECT_TRUE((view1[{5, 3}]).equals(view1[{5, 3}])); + EXPECT_FALSE((view1[{5, 1}]).equals(view1[{5, 3}])); + EXPECT_TRUE((view2[{0, 1}]).equals(view2[{8, 1}])); + EXPECT_FALSE((view2[{1, 1}]).equals(view2[{8, 1}])); +} + +TEST_F(BufferViewTest, AlignOn) { + using size_type = ConstBufferView::size_type; + ConstBufferView image(bytes_.data(), bytes_.size()); + ConstBufferView view = image; + ASSERT_EQ(10U, view.size()); + + auto get_pos = [&image, &view]() -> size_type { + EXPECT_TRUE(view.begin() >= image.begin()); // Iterator compare. + return static_cast<size_type>(view.begin() - image.begin()); + }; + + EXPECT_EQ(0U, get_pos()); + view.remove_prefix(1U); + EXPECT_EQ(1U, get_pos()); + view.remove_prefix(4U); + EXPECT_EQ(5U, get_pos()); + + // Align. + EXPECT_TRUE(view.AlignOn(image, 1U)); // Trival case. + EXPECT_EQ(5U, get_pos()); + + EXPECT_TRUE(view.AlignOn(image, 2U)); + EXPECT_EQ(6U, get_pos()); + EXPECT_TRUE(view.AlignOn(image, 2U)); + EXPECT_EQ(6U, get_pos()); + + EXPECT_TRUE(view.AlignOn(image, 4U)); + EXPECT_EQ(8U, get_pos()); + EXPECT_TRUE(view.AlignOn(image, 2U)); + EXPECT_EQ(8U, get_pos()); + + view.remove_prefix(1U); + EXPECT_EQ(9U, get_pos()); + + // Pos is at 9, align to 4 would yield 12, but size is 10, so this fails. + EXPECT_FALSE(view.AlignOn(image, 4U)); + EXPECT_EQ(9U, get_pos()); + EXPECT_TRUE(view.AlignOn(image, 2U)); + EXPECT_EQ(10U, get_pos()); +} + +} // namespace zucchini diff --git a/crc32.cc b/crc32.cc new file mode 100644 index 0000000..1c45dfe --- /dev/null +++ b/crc32.cc @@ -0,0 +1,43 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/crc32.h" + +#include <array> + +#include "base/check_op.h" + +namespace zucchini { + +namespace { + +std::array<uint32_t, 256> MakeCrc32Table() { + constexpr uint32_t kCrc32Poly = 0xEDB88320; + + std::array<uint32_t, 256> crc32Table; + for (uint32_t i = 0; i < 256; ++i) { + uint32_t r = i; + for (int j = 0; j < 8; ++j) + r = (r >> 1) ^ (kCrc32Poly & ~((r & 1) - 1)); + crc32Table[i] = r; + } + return crc32Table; +} + +} // namespace + +// Minimalistic CRC-32 implementation for Zucchini usage. Adapted from LZMA SDK +// (found at third_party/lzma_sdk/7zCrc.c), which is public domain. +uint32_t CalculateCrc32(const uint8_t* first, const uint8_t* last) { + DCHECK_GE(last, first); + + static const std::array<uint32_t, 256> kCrc32Table = MakeCrc32Table(); + + uint32_t ret = 0xFFFFFFFF; + for (; first != last; ++first) + ret = kCrc32Table[(ret ^ *first) & 0xFF] ^ (ret >> 8); + return ret ^ 0xFFFFFFFF; +} + +} // namespace zucchini @@ -0,0 +1,17 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_CRC32_H_ +#define COMPONENTS_ZUCCHINI_CRC32_H_ + +#include <stdint.h> + +namespace zucchini { + +// Calculates CRC-32 of the given range [|first|, |last|). +uint32_t CalculateCrc32(const uint8_t* first, const uint8_t* last); + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_CRC32_H_ diff --git a/crc32_unittest.cc b/crc32_unittest.cc new file mode 100644 index 0000000..5ec85a8 --- /dev/null +++ b/crc32_unittest.cc @@ -0,0 +1,47 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/crc32.h" + +#include <stdint.h> + +#include <iterator> + +#include "base/test/gtest_util.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +constexpr uint8_t bytes[] = {0x10, 0x32, 0x54, 0x76, 0x98, + 0xBA, 0xDC, 0xFE, 0x10, 0x00}; + +TEST(Crc32Test, All) { + // Results can be verified with any CRC-32 calculator found online. + + // Empty region. + EXPECT_EQ(0x00000000U, CalculateCrc32(std::begin(bytes), std::begin(bytes))); + + // Single byte. + EXPECT_EQ(0xCFB5FFE9U, + CalculateCrc32(std::begin(bytes), std::begin(bytes) + 1)); + + // Same byte (0x10) appearing at different location. + EXPECT_EQ(0xCFB5FFE9U, + CalculateCrc32(std::begin(bytes) + 8, std::begin(bytes) + 9)); + + // Single byte of 0. + EXPECT_EQ(0xD202EF8DU, + CalculateCrc32(std::begin(bytes) + 9, std::end(bytes))); + + // Whole region. + EXPECT_EQ(0xA86FD7D6U, CalculateCrc32(std::begin(bytes), std::end(bytes))); + + // Whole region excluding 0 at end. + EXPECT_EQ(0x0762F38BU, + CalculateCrc32(std::begin(bytes), std::begin(bytes) + 9)); + + EXPECT_DCHECK_DEATH(CalculateCrc32(std::begin(bytes) + 1, std::begin(bytes))); +} + +} // namespace zucchini diff --git a/disassembler.cc b/disassembler.cc new file mode 100644 index 0000000..4a210ac --- /dev/null +++ b/disassembler.cc @@ -0,0 +1,52 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/disassembler.h" + +#include "base/check_op.h" + +namespace zucchini { + +/******** EmptyReferenceReader ********/ + +absl::optional<Reference> EmptyReferenceReader::GetNext() { + return absl::nullopt; +} + +/******** EmptyReferenceWriter ********/ + +void EmptyReferenceWriter::PutNext(Reference /* reference */) {} + +/******** ReferenceGroup ********/ + +std::unique_ptr<ReferenceReader> ReferenceGroup::GetReader( + offset_t lower, + offset_t upper, + Disassembler* disasm) const { + DCHECK_LE(lower, upper); + DCHECK_LE(upper, disasm->size()); + return (disasm->*reader_factory_)(lower, upper); +} + +std::unique_ptr<ReferenceReader> ReferenceGroup::GetReader( + Disassembler* disasm) const { + return (disasm->*reader_factory_)(0, static_cast<offset_t>(disasm->size())); +} + +std::unique_ptr<ReferenceWriter> ReferenceGroup::GetWriter( + MutableBufferView image, + Disassembler* disasm) const { + DCHECK_EQ(image.begin(), disasm->image().begin()); + DCHECK_EQ(image.size(), disasm->size()); + return (disasm->*writer_factory_)(image); +} + +/******** Disassembler ********/ + +Disassembler::Disassembler(int num_equivalence_iterations) + : num_equivalence_iterations_(num_equivalence_iterations) {} + +Disassembler::~Disassembler() = default; + +} // namespace zucchini diff --git a/disassembler.h b/disassembler.h new file mode 100644 index 0000000..48ee0fb --- /dev/null +++ b/disassembler.h @@ -0,0 +1,154 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_DISASSEMBLER_H_ +#define COMPONENTS_ZUCCHINI_DISASSEMBLER_H_ + +#include <stddef.h> + +#include <memory> +#include <string> +#include <vector> + +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/image_utils.h" + +namespace zucchini { + +// A vacuous ReferenceReader that produces no references. +class EmptyReferenceReader : public ReferenceReader { + public: + absl::optional<Reference> GetNext() override; +}; + +// A vacuous EmptyReferenceWriter that does not write. +class EmptyReferenceWriter : public ReferenceWriter { + public: + void PutNext(Reference reference) override; +}; + +// Disassembler needs to be declared before ReferenceGroup because the latter +// contains member pointers based on the former, and we use a compiler flag, +// -fcomplete-member-pointers, which enforces that member pointer base types are +// complete. This flag helps prevent us from running into problems in the +// Microsoft C++ ABI (see https://crbug.com/847724). + +class ReferenceGroup; + +// A Disassembler is used to encapsulate architecture specific operations, to: +// - Describe types of references found in the architecture using traits. +// - Extract references contained in an image file. +// - Correct target for some references. +class Disassembler { + public: + // Attempts to parse |image| and create an architecture-specifc Disassembler, + // as determined by DIS, which is inherited from Disassembler. Returns an + // instance of DIS if successful, and null otherwise. + template <class DIS> + static std::unique_ptr<DIS> Make(ConstBufferView image) { + auto disasm = std::make_unique<DIS>(); + if (!disasm->Parse(image)) + return nullptr; + return disasm; + } + + Disassembler(const Disassembler&) = delete; + const Disassembler& operator=(const Disassembler&) = delete; + virtual ~Disassembler(); + + // Returns the type of executable handled by the Disassembler. + virtual ExecutableType GetExeType() const = 0; + + // Returns a more detailed description of the executable type. + virtual std::string GetExeTypeString() const = 0; + + // Creates and returns a vector that contains all groups of references. + // Groups must be aggregated by pool. + virtual std::vector<ReferenceGroup> MakeReferenceGroups() const = 0; + + ConstBufferView image() const { return image_; } + size_t size() const { return image_.size(); } + + int num_equivalence_iterations() const { return num_equivalence_iterations_; } + + protected: + explicit Disassembler(int num_equivalence_iterations); + + // Parses |image| and initializes internal states. Returns true on success. + // This must be called once and before any other operation. + virtual bool Parse(ConstBufferView image) = 0; + + // Raw image data. After Parse(), a Disassembler should shrink this to contain + // only the portion containing the executable file it recognizes. + ConstBufferView image_; + + // The number of iterations to run for equivalence map generation. This should + // roughly be the max length of reference indirection chains. + int num_equivalence_iterations_; +}; + +// A ReferenceGroup is associated with a specific |type| and has convenience +// methods to obtain readers and writers for that type. A ReferenceGroup does +// not store references; it is a lightweight class that communicates with the +// disassembler to operate on them. +class ReferenceGroup { + public: + // Member function pointer used to obtain a ReferenceReader. + using ReaderFactory = std::unique_ptr<ReferenceReader> ( + Disassembler::*)(offset_t lower, offset_t upper); + + // Member function pointer used to obtain a ReferenceWriter. + using WriterFactory = std::unique_ptr<ReferenceWriter> (Disassembler::*)( + MutableBufferView image); + + // RefinedGeneratorFactory and RefinedReceptorFactory don't have to be + // identical to GeneratorFactory and ReceptorFactory, but they must be + // convertible. As a result, they can be pointer to member function of a + // derived Disassembler. + template <class RefinedReaderFactory, class RefinedWriterFactory> + ReferenceGroup(ReferenceTypeTraits traits, + RefinedReaderFactory reader_factory, + RefinedWriterFactory writer_factory) + : traits_(traits), + reader_factory_(static_cast<ReaderFactory>(reader_factory)), + writer_factory_(static_cast<WriterFactory>(writer_factory)) {} + + // Returns a reader for all references in the binary. + // Invalidates any other writer or reader previously obtained for |disasm|. + std::unique_ptr<ReferenceReader> GetReader(Disassembler* disasm) const; + + // Returns a reader for references whose bytes are entirely contained in + // |[lower, upper)|. + // Invalidates any other writer or reader previously obtained for |disasm|. + std::unique_ptr<ReferenceReader> GetReader(offset_t lower, + offset_t upper, + Disassembler* disasm) const; + + // Returns a writer for references in |image|, assuming that |image| was the + // same one initially parsed by |disasm|. + // Invalidates any other writer or reader previously obtained for |disasm|. + std::unique_ptr<ReferenceWriter> GetWriter(MutableBufferView image, + Disassembler* disasm) const; + + // Returns traits describing the reference type. + const ReferenceTypeTraits& traits() const { return traits_; } + + // Shorthand for traits().width. + offset_t width() const { return traits().width; } + + // Shorthand for traits().type_tag. + TypeTag type_tag() const { return traits().type_tag; } + + // Shorthand for traits().pool_tag. + PoolTag pool_tag() const { return traits().pool_tag; } + + private: + ReferenceTypeTraits traits_; + ReaderFactory reader_factory_ = nullptr; + WriterFactory writer_factory_ = nullptr; +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_DISASSEMBLER_H_ diff --git a/disassembler_dex.cc b/disassembler_dex.cc new file mode 100644 index 0000000..5b25c50 --- /dev/null +++ b/disassembler_dex.cc @@ -0,0 +1,1670 @@ +// Copyright 2018 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/disassembler_dex.h" + +#include <stddef.h> +#include <stdlib.h> + +#include <algorithm> +#include <cctype> +#include <cmath> +#include <iterator> +#include <set> +#include <utility> + +#include "base/bind.h" +#include "base/callback.h" +#include "base/logging.h" +#include "base/numerics/checked_math.h" +#include "base/numerics/safe_conversions.h" +#include "base/strings/stringprintf.h" +#include "components/zucchini/buffer_source.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/io_utils.h" +#include "third_party/abseil-cpp/absl/types/optional.h" + +namespace zucchini { + +namespace { + +// A DEX item specified by an offset, if absent, has a sentinel value of 0 since +// 0 is never a valid item offset (it points to magic at start of DEX). +constexpr offset_t kDexSentinelOffset = 0U; + +// A DEX item specified by an index, if absent, has a sentinel value of +// NO_INDEX = 0xFFFFFFFF. This is represented as an offset_t for uniformity. +constexpr offset_t kDexSentinelIndexAsOffset = 0xFFFFFFFFU; + +static_assert(kDexSentinelIndexAsOffset != kInvalidOffset, + "Sentinel should not be confused with invalid offset."); + +// Size of a Dalvik instruction unit. Need to cast to signed int because +// sizeof() gives size_t, which dominates when operated on ptrdiff_t, then +// wrecks havoc for base::checked_cast<int16_t>(). +constexpr int kInstrUnitSize = static_cast<int>(sizeof(uint16_t)); + +// Checks if |offset| is byte aligned to 32 bits or 4 bytes. +bool Is32BitAligned(offset_t offset) { + return offset % 4 == 0; +} + +// Returns a lower bound for the size of an item of type |type_item_code|. +// - For fixed-length items (e.g., kTypeFieldIdItem) this is the exact size. +// - For variant-length items (e.g., kTypeCodeItem), returns a value that is +// known to be less than the item length (e.g., header size). +// - For items not handled by this function, returns 1 for sanity check. +size_t GetItemBaseSize(uint16_t type_item_code) { + switch (type_item_code) { + case dex::kTypeStringIdItem: + return sizeof(dex::StringIdItem); + case dex::kTypeTypeIdItem: + return sizeof(dex::TypeIdItem); + case dex::kTypeProtoIdItem: + return sizeof(dex::ProtoIdItem); + case dex::kTypeFieldIdItem: + return sizeof(dex::FieldIdItem); + case dex::kTypeMethodIdItem: + return sizeof(dex::MethodIdItem); + case dex::kTypeClassDefItem: + return sizeof(dex::ClassDefItem); + // No need to handle dex::kTypeMapList. + case dex::kTypeTypeList: + return sizeof(uint32_t); // Variable-length. + case dex::kTypeAnnotationSetRefList: + return sizeof(uint32_t); // Variable-length. + case dex::kTypeAnnotationSetItem: + return sizeof(uint32_t); // Variable-length. + case dex::kTypeCodeItem: + return sizeof(dex::CodeItem); // Variable-length. + case dex::kTypeAnnotationsDirectoryItem: + return sizeof(dex::AnnotationsDirectoryItem); // Variable-length. + default: + return 1U; // Unhandled item. For sanity check assume size >= 1. + } +} + +/******** CodeItemParser ********/ + +// A parser to extract successive code items from a DEX image whose header has +// been parsed. +class CodeItemParser { + public: + using size_type = BufferSource::size_type; + + explicit CodeItemParser(ConstBufferView image) : image_(image) {} + + // Initializes the parser, returns true on success and false on error. + bool Init(const dex::MapItem& code_map_item) { + // Sanity check to quickly fail if |code_map_item.offset| or + // |code_map_item.size| is too large. This is a heuristic because code item + // sizes need to be parsed (sizeof(dex::CodeItem) is a lower bound). + if (!image_.covers_array(code_map_item.offset, code_map_item.size, + sizeof(dex::CodeItem))) { + return false; + } + source_ = std::move(BufferSource(image_).Skip(code_map_item.offset)); + return true; + } + + // Extracts the header of the next code item, and skips the variable-length + // data. Returns the offset of the code item if successful. Otherwise returns + // kInvalidOffset, and thereafter the parser becomes valid. For reference, + // here's a pseudo-struct of a complete code item: + // + // struct code_item { + // // 4-byte aligned here. + // // 16-byte header defined (dex::CodeItem). + // uint16_t registers_size; + // uint16_t ins_size; + // uint16_t outs_size; + // uint16_t tries_size; + // uint32_t debug_info_off; + // uint32_t insns_size; + // + // // Variable-length data follow. + // uint16_t insns[insns_size]; // Instruction bytes. + // uint16_t padding[(tries_size > 0 && insns_size % 2 == 1) ? 1 : 0]; + // + // if (tries_size > 0) { + // // 4-byte aligned here. + // struct try_item { // dex::TryItem. + // uint32_t start_addr; + // uint16_t insn_count; + // uint16_t handler_off; + // } tries[tries_size]; + // + // struct encoded_catch_handler_list { + // uleb128 handlers_size; + // struct encoded_catch_handler { + // sleb128 encoded_catch_handler_size; + // struct encoded_type_addr_pair { + // uleb128 type_idx; + // uleb128 addr; + // } handlers[abs(encoded_catch_handler_size)]; + // if (encoded_catch_handler_size <= 0) { + // uleb128 catch_all_addr; + // } + // } handlers_list[handlers_size]; + // } handlers_group; // Confusingly called "handlers" in DEX doc. + // } + // + // // Padding to 4-bytes align next code_item *only if more exist*. + // } + offset_t GetNext() { + // Read header CodeItem. + if (!source_.AlignOn(image_, 4U)) + return kInvalidOffset; + const offset_t code_item_offset = + base::checked_cast<offset_t>(source_.begin() - image_.begin()); + const auto* code_item = source_.GetPointer<const dex::CodeItem>(); + if (!code_item) + return kInvalidOffset; + DCHECK(Is32BitAligned(code_item_offset)); + + // TODO(huangs): Fail if |code_item->insns_size == 0| (Constraint A1). + // Skip instruction bytes. + if (!source_.GetArray<uint16_t>(code_item->insns_size)) + return kInvalidOffset; + // Skip padding if present. + if (code_item->tries_size > 0 && !source_.AlignOn(image_, 4U)) + return kInvalidOffset; + + // Skip tries[] and handlers_group to arrive at the next code item. Parsing + // is nontrivial due to use of uleb128 / sleb128. + if (code_item->tries_size > 0) { + // Skip (try_item) tries[]. + if (!source_.GetArray<dex::TryItem>(code_item->tries_size)) + return kInvalidOffset; + + // Skip handlers_group. + uint32_t handlers_size = 0; + if (!source_.GetUleb128(&handlers_size)) + return kInvalidOffset; + // Sanity check to quickly reject excessively large |handlers_size|. + if (source_.Remaining() < static_cast<size_type>(handlers_size)) + return kInvalidOffset; + + // Skip (encoded_catch_handler) handlers_list[]. + for (uint32_t k = 0; k < handlers_size; ++k) { + int32_t encoded_catch_handler_size = 0; + if (!source_.GetSleb128(&encoded_catch_handler_size)) + return kInvalidOffset; + const size_type abs_size = std::abs(encoded_catch_handler_size); + if (source_.Remaining() < abs_size) // Sanity check. + return kInvalidOffset; + // Skip (encoded_type_addr_pair) handlers[]. + for (size_type j = 0; j < abs_size; ++j) { + if (!source_.SkipLeb128() || !source_.SkipLeb128()) + return kInvalidOffset; + } + // Skip catch_all_addr. + if (encoded_catch_handler_size <= 0) { + if (!source_.SkipLeb128()) + return kInvalidOffset; + } + } + } + // Success! |code_item->insns_size| is validated, but its content is still + // considered unsafe and requires validation. + return code_item_offset; + } + + // Given |code_item_offset| that points to the start of a valid code item in + // |image|, returns |insns| bytes as ConstBufferView. + static ConstBufferView GetCodeItemInsns(ConstBufferView image, + offset_t code_item_offset) { + BufferSource source(BufferSource(image).Skip(code_item_offset)); + const auto* code_item = source.GetPointer<const dex::CodeItem>(); + DCHECK(code_item); + BufferRegion insns{0, code_item->insns_size * kInstrUnitSize}; + DCHECK(source.covers(insns)); + return source[insns]; + } + + private: + ConstBufferView image_; + BufferSource source_; +}; + +/******** InstructionParser ********/ + +// A class that successively reads |code_item| for Dalvik instructions, which +// are found at |insns|, spanning |insns_size| uint16_t "units". These units +// store instructions followed by optional non-instruction "payload". Finding +// payload boundary requires parsing: On finding an instruction that uses (and +// points to) payload, the boundary is updated. +class InstructionParser { + public: + struct Value { + offset_t instr_offset; + const dex::Instruction* instr = nullptr; // null for unknown instructions. + }; + + // Returns pointer to DEX Instruction data for |opcode|, or null if |opcode| + // is unknown. An internal initialize-on-first-use table is used for fast + // lookup. + const dex::Instruction* FindDalvikInstruction(uint8_t opcode) { + static bool is_init = false; + static const dex::Instruction* instruction_table[256]; + if (!is_init) { + is_init = true; + std::fill(std::begin(instruction_table), std::end(instruction_table), + nullptr); + for (const dex::Instruction& instr : dex::kByteCode) { + std::fill(instruction_table + instr.opcode, + instruction_table + instr.opcode + instr.variant, &instr); + } + } + return instruction_table[opcode]; + } + + InstructionParser() = default; + + InstructionParser(ConstBufferView image, offset_t base_offset) + : image_begin_(image.begin()), + insns_(CodeItemParser::GetCodeItemInsns(image, base_offset)), + payload_boundary_(insns_.end()) {} + + // Reads the next instruction. On success, makes the data read available via + // value() and returns true. Otherwise (done or found error) returns false. + bool ReadNext() { + // Do not scan past payload boundary. + if (insns_.begin() >= payload_boundary_) + return false; + + const offset_t instr_offset = + base::checked_cast<offset_t>(insns_.begin() - image_begin_); + const uint8_t op = insns_.read<uint8_t>(0); + const dex::Instruction* instr = FindDalvikInstruction(op); + + // Stop on finding unknown instructions. ODEX files might trigger this. + if (!instr) { + LOG(WARNING) << "Unknown Dalvik instruction detected at " + << AsHex<8>(instr_offset) << "."; + return false; + } + + const int instr_length_units = instr->layout; + const size_t instr_length_bytes = instr_length_units * kInstrUnitSize; + if (insns_.size() < instr_length_bytes) + return false; + + // Handle instructions with variable-length data payload (31t). + if (instr->opcode == 0x26 || // fill-array-data + instr->opcode == 0x2B || // packed-switch + instr->opcode == 0x2C) { // sparse-switch + const int32_t unsafe_payload_rel_units = insns_.read<int32_t>(2); + // Payload must be in current code item, after current instruction. + if (unsafe_payload_rel_units < instr_length_units || + static_cast<uint32_t>(unsafe_payload_rel_units) >= + insns_.size() / kInstrUnitSize) { + LOG(WARNING) << "Invalid payload found."; + return false; + } + // Update boundary between instructions and payload. + const ConstBufferView::const_iterator payload_it = + insns_.begin() + unsafe_payload_rel_units * kInstrUnitSize; + payload_boundary_ = std::min(payload_boundary_, payload_it); + } + + insns_.remove_prefix(instr_length_bytes); + value_ = {instr_offset, instr}; + return true; + } + + const Value& value() const { return value_; } + + private: + ConstBufferView::const_iterator image_begin_; + ConstBufferView insns_; + ConstBufferView::const_iterator payload_boundary_; + Value value_; +}; + +/******** InstructionReferenceReader ********/ + +// A class to visit |code_items|, parse instructions, and emit embedded +// References of a type determined by |filter_| and |mapper_|. Only References +// located in |[lo, hi)| are emitted. |lo| and |hi| are assumed to never +// straddle the body of a Reference. +class InstructionReferenceReader : public ReferenceReader { + public: + // A function that takes a parsed Dalvik instruction and decides whether it + // contains a specific type of Reference. If true, then returns the Reference + // location. Otherwise returns kInvalidOffset. + using Filter = + base::RepeatingCallback<offset_t(const InstructionParser::Value&)>; + // A function that takes Reference location from |filter_| to extract the + // stored target. If valid, returns it. Otherwise returns kInvalidOffset. + using Mapper = base::RepeatingCallback<offset_t(offset_t)>; + + InstructionReferenceReader(ConstBufferView image, + offset_t lo, + offset_t hi, + const std::vector<offset_t>& code_item_offsets, + Filter&& filter, + Mapper&& mapper) + : image_(image), + lo_(lo), + hi_(hi), + end_it_(code_item_offsets.end()), + filter_(std::move(filter)), + mapper_(std::move(mapper)) { + const auto begin_it = code_item_offsets.begin(); + // Use binary search to find the code item that contains |lo_|. + auto comp = [](offset_t test_offset, offset_t code_item_offset) { + return test_offset < code_item_offset; + }; + cur_it_ = std::upper_bound(begin_it, end_it_, lo_, comp); + if (cur_it_ != begin_it) + --cur_it_; + parser_ = InstructionParser(image_, *cur_it_); + } + + // ReferenceReader: + absl::optional<Reference> GetNext() override { + while (true) { + while (parser_.ReadNext()) { + const auto& v = parser_.value(); + DCHECK_NE(v.instr, nullptr); + if (v.instr_offset >= hi_) + return absl::nullopt; + const offset_t location = filter_.Run(v); + if (location == kInvalidOffset || location < lo_) + continue; + // The general check is |location + reference_width > hi_|. However, by + // assumption |hi_| and |lo_| do not straddle the body of a Reference. + // So |reference_width| is unneeded. + if (location >= hi_) + return absl::nullopt; + offset_t target = mapper_.Run(location); + if (target != kInvalidOffset) + return Reference{location, target}; + else + LOG(WARNING) << "Invalid target at " << AsHex<8>(location) << "."; + } + ++cur_it_; + if (cur_it_ == end_it_) + return absl::nullopt; + parser_ = InstructionParser(image_, *cur_it_); + } + } + + private: + const ConstBufferView image_; + const offset_t lo_; + const offset_t hi_; + const std::vector<offset_t>::const_iterator end_it_; + const Filter filter_; + const Mapper mapper_; + std::vector<offset_t>::const_iterator cur_it_; + InstructionParser parser_; +}; + +/******** ItemReferenceReader ********/ + +// A class to visit fixed-size item elements (determined by |item_size|) and +// emit a "member variable of interest" (MVI, determined by |rel_location| and +// |mapper|) as Reference. Only MVIs lying in |[lo, hi)| are emitted. |lo| and +// |hi| are assumed to never straddle the body of a Reference. +class ItemReferenceReader : public ReferenceReader { + public: + // A function that takes an MVI's location and emit its target offset. + using Mapper = base::RepeatingCallback<offset_t(offset_t)>; + + // |item_size| is the size of a fixed-size item. |rel_location| is the + // relative location of MVI from the start of the item containing it. + ItemReferenceReader(offset_t lo, + offset_t hi, + const dex::MapItem& map_item, + size_t item_size, + size_t rel_location, + Mapper&& mapper) + : hi_(hi), + item_base_offset_(base::checked_cast<offset_t>(map_item.offset)), + num_items_(base::checked_cast<uint32_t>(map_item.size)), + item_size_(base::checked_cast<uint32_t>(item_size)), + rel_location_(base::checked_cast<uint32_t>(rel_location)), + mapper_(std::move(mapper)) { + static_assert(sizeof(decltype(map_item.offset)) <= sizeof(offset_t), + "map_item.offset too large."); + static_assert(sizeof(decltype(map_item.size)) <= sizeof(offset_t), + "map_item.size too large."); + if (!item_base_offset_) { + // Empty item: Assign |cur_idx| to |num_items_| to skip everything. + cur_idx_ = num_items_; + } else if (lo < item_base_offset_) { + cur_idx_ = 0; + } else if (lo < OffsetOfIndex(num_items_)) { + cur_idx_ = (lo - item_base_offset_) / item_size_; + // Fine-tune: Advance if |lo| lies beyond the MVI. + if (lo > OffsetOfIndex(cur_idx_) + rel_location_) + ++cur_idx_; + } else { + cur_idx_ = num_items_; + } + } + + // ReferenceReader: + absl::optional<Reference> GetNext() override { + while (cur_idx_ < num_items_) { + const offset_t item_offset = OffsetOfIndex(cur_idx_); + const offset_t location = item_offset + rel_location_; + // The general check is |location + reference_width > hi_|. However, by + // assumption |hi_| and |lo_| do not straddle the body of a Reference. So + // |reference_width| is unneeded. + if (location >= hi_) + break; + const offset_t target = mapper_.Run(location); + + // kDexSentinelOffset (0) may appear for the following: + // - ProtoIdItem: parameters_off. + // - ClassDefItem: interfaces_off, annotations_off, class_data_off, + // static_values_off. + // - AnnotationsDirectoryItem: class_annotations_off. + // - AnnotationSetRefItem: annotations_off. + // kDexSentinelIndexAsOffset (0xFFFFFFFF) may appear for the following: + // - ClassDefItem: superclass_idx, source_file_idx. + if (target == kDexSentinelOffset || target == kDexSentinelIndexAsOffset) { + ++cur_idx_; + continue; + } + + if (target == kInvalidOffset) { + LOG(WARNING) << "Invalid item target at " << AsHex<8>(location) << "."; + break; + } + ++cur_idx_; + return Reference{location, target}; + } + return absl::nullopt; + } + + private: + offset_t OffsetOfIndex(uint32_t idx) { + return base::checked_cast<uint32_t>(item_base_offset_ + idx * item_size_); + } + + const offset_t hi_; + const offset_t item_base_offset_; + const uint32_t num_items_; + const uint32_t item_size_; + const uint32_t rel_location_; + const Mapper mapper_; + offset_t cur_idx_ = 0; +}; + +// Parses a flattened jagged list of lists of items that looks like: +// NTTT|NTT|NTTTT|N|NTT... +// where |N| is an uint32_t representing the number of items in each sub-list, +// and "T" is a fixed-size item (|item_width|) of type "T". On success, stores +// the offset of each |T| into |item_offsets|, and returns true. Otherwise +// (e.g., on finding any structural problem) returns false. +bool ParseItemOffsets(ConstBufferView image, + const dex::MapItem& map_item, + size_t item_width, + std::vector<offset_t>* item_offsets) { + // Sanity check: |image| should at least fit |map_item.size| copies of "N". + if (!image.covers_array(map_item.offset, map_item.size, sizeof(uint32_t))) + return false; + BufferSource source = std::move(BufferSource(image).Skip(map_item.offset)); + item_offsets->clear(); + for (uint32_t i = 0; i < map_item.size; ++i) { + if (!source.AlignOn(image, 4U)) + return false; + uint32_t unsafe_size; + if (!source.GetValue<uint32_t>(&unsafe_size)) + return false; + DCHECK(Is32BitAligned( + base::checked_cast<offset_t>(source.begin() - image.begin()))); + if (!source.covers_array(0, unsafe_size, item_width)) + return false; + for (uint32_t j = 0; j < unsafe_size; ++j) { + item_offsets->push_back( + base::checked_cast<offset_t>(source.begin() - image.begin())); + source.Skip(item_width); + } + } + return true; +} + +// Parses AnnotationDirectoryItems of the format (using RegEx) "(AF*M*P*)*", +// where: +// A = AnnotationsDirectoryItem (contains class annotation), +// F = FieldAnnotation, +// M = MethodAnnotation, +// P = ParameterAnnotation. +// On success, stores the offsets of each class, field, method and parameter +// annotation for each item into |*_annotation_offsets|. Otherwise on finding +// structural issues returns false. +bool ParseAnnotationsDirectoryItems( + ConstBufferView image, + const dex::MapItem& annotations_directory_map_item, + std::vector<offset_t>* annotations_directory_item_offsets, + std::vector<offset_t>* field_annotation_offsets, + std::vector<offset_t>* method_annotation_offsets, + std::vector<offset_t>* parameter_annotation_offsets) { + // Sanity check: |image| should at least fit + // |annotations_directory_map_item.size| copies of "A". + if (!image.covers_array(annotations_directory_map_item.offset, + annotations_directory_map_item.size, + sizeof(dex::AnnotationsDirectoryItem))) { + return false; + } + BufferSource source = std::move( + BufferSource(image).Skip(annotations_directory_map_item.offset)); + annotations_directory_item_offsets->clear(); + field_annotation_offsets->clear(); + method_annotation_offsets->clear(); + parameter_annotation_offsets->clear(); + + // Helper to process sublists. + auto parse_list = [&source, image](uint32_t unsafe_size, size_t item_width, + std::vector<offset_t>* item_offsets) { + DCHECK(Is32BitAligned( + base::checked_cast<offset_t>(source.begin() - image.begin()))); + if (!source.covers_array(0, unsafe_size, item_width)) + return false; + item_offsets->reserve(item_offsets->size() + unsafe_size); + for (uint32_t i = 0; i < unsafe_size; ++i) { + item_offsets->push_back( + base::checked_cast<offset_t>(source.begin() - image.begin())); + source.Skip(item_width); + } + return true; + }; + + annotations_directory_item_offsets->reserve( + annotations_directory_map_item.size); + for (uint32_t i = 0; i < annotations_directory_map_item.size; ++i) { + if (!source.AlignOn(image, 4U)) + return false; + // Parse header. + annotations_directory_item_offsets->push_back( + base::checked_cast<offset_t>(source.begin() - image.begin())); + dex::AnnotationsDirectoryItem unsafe_annotations_directory_item; + if (!source.GetValue(&unsafe_annotations_directory_item)) + return false; + // Parse sublists. + if (!(parse_list(unsafe_annotations_directory_item.fields_size, + sizeof(dex::FieldAnnotation), field_annotation_offsets) && + parse_list(unsafe_annotations_directory_item.annotated_methods_size, + sizeof(dex::MethodAnnotation), + method_annotation_offsets) && + parse_list( + unsafe_annotations_directory_item.annotated_parameters_size, + sizeof(dex::ParameterAnnotation), + parameter_annotation_offsets))) { + return false; + } + } + return true; +} + +/******** CachedItemListReferenceReader ********/ + +// A class that takes sorted |item_offsets|, and emits all member variable of +// interest (MVIs) that fall inside |[lo, hi)|. The MVI of each item has +// location of |rel_location| from item offset, and has target extracted with +// |mapper| (which performs validation). By the "atomicity assumption", +// [|lo, hi)| never cut across an MVI. +class CachedItemListReferenceReader : public ReferenceReader { + public: + // A function that takes an MVI's location and emit its target offset. + using Mapper = base::RepeatingCallback<offset_t(offset_t)>; + + CachedItemListReferenceReader(offset_t lo, + offset_t hi, + uint32_t rel_location, + const std::vector<offset_t>& item_offsets, + Mapper&& mapper) + : hi_(hi), + rel_location_(rel_location), + end_it_(item_offsets.cend()), + mapper_(mapper) { + cur_it_ = std::upper_bound(item_offsets.cbegin(), item_offsets.cend(), lo); + // Adding |rel_location_| is necessary as references can be offset from the + // start of the item. + if (cur_it_ != item_offsets.begin() && *(cur_it_ - 1) + rel_location_ >= lo) + --cur_it_; + } + CachedItemListReferenceReader(const CachedItemListReferenceReader&) = delete; + const CachedItemListReferenceReader& operator=( + const CachedItemListReferenceReader&) = delete; + + // ReferenceReader: + absl::optional<Reference> GetNext() override { + while (cur_it_ < end_it_) { + const offset_t location = *cur_it_ + rel_location_; + if (location >= hi_) // Check is simplified by atomicity assumption. + break; + const offset_t target = mapper_.Run(location); + if (target == kInvalidOffset) { + LOG(WARNING) << "Invalid item target at " << AsHex<8>(location) << "."; + break; + } + ++cur_it_; + + // kDexSentinelOffset is a sentinel for; + // - AnnotationsDirectoryItem: class_annotations_off + if (target == kDexSentinelOffset) + continue; + return Reference{location, target}; + } + return absl::nullopt; + } + + private: + const offset_t hi_; + const uint32_t rel_location_; + const std::vector<offset_t>::const_iterator end_it_; + const Mapper mapper_; + std::vector<offset_t>::const_iterator cur_it_; +}; + +// Reads an INT index at |location| in |image| and translates the index to the +// offset of a fixed-size item specified by |target_map_item| and +// |target_item_size|. Returns the target offset if valid, or kInvalidOffset +// otherwise. This is compatible with +// CachedReferenceListReferenceReader::Mapper, +// InstructionReferenceReader::Mapper, and ItemReferenceReader::Mapper. +template <typename INT> +static offset_t ReadTargetIndex(ConstBufferView image, + const dex::MapItem& target_map_item, + size_t target_item_size, + offset_t location) { + static_assert(sizeof(INT) <= sizeof(offset_t), + "INT may not fit into offset_t."); + const offset_t unsafe_idx = image.read<INT>(location); + // kDexSentinalIndexAsOffset (0xFFFFFFFF) is a sentinel for + // - ClassDefItem: superclass_idx, source_file_idx. + if (unsafe_idx == kDexSentinelIndexAsOffset) + return unsafe_idx; + if (unsafe_idx >= target_map_item.size) + return kInvalidOffset; + return target_map_item.offset + + base::checked_cast<offset_t>(unsafe_idx * target_item_size); +} + +// Reads uint32_t value in |image| at (valid) |location| and checks whether it +// is a safe offset of a fixed-size item. Returns the target offset (possibly a +// sentinel) if valid, or kInvalidOffset otherwise. This is compatible with +// CachedReferenceListReferenceReader::Mapper, +// InstructionReferenceReader::Mapper, and ItemReferenceReader::Mapper. +static offset_t ReadTargetOffset32(ConstBufferView image, offset_t location) { + const offset_t unsafe_target = + static_cast<offset_t>(image.read<uint32_t>(location)); + // Skip and don't validate kDexSentinelOffset as it is indicative of an + // empty reference. + if (unsafe_target == kDexSentinelOffset) + return unsafe_target; + + // TODO(huangs): Check that |unsafe_target| is within the correct data + // section. + if (unsafe_target >= image.size()) + return kInvalidOffset; + return unsafe_target; +} + +/******** ReferenceWriterAdaptor ********/ + +// A ReferenceWriter that adapts a callback that performs type-specific +// Reference writes. +class ReferenceWriterAdaptor : public ReferenceWriter { + public: + using Writer = base::RepeatingCallback<void(Reference, MutableBufferView)>; + + ReferenceWriterAdaptor(MutableBufferView image, Writer&& writer) + : image_(image), writer_(std::move(writer)) {} + + // ReferenceWriter: + void PutNext(Reference ref) override { writer_.Run(ref, image_); } + + private: + MutableBufferView image_; + Writer writer_; +}; + +// Helper that's compatible with ReferenceWriterAdaptor::Writer. +// Given that |ref.target| points to the start of a fixed size DEX item (e.g., +// FieldIdItem), translates |ref.target| to item index, and writes the result to +// |ref.location| as |INT|. +template <typename INT> +static void WriteTargetIndex(const dex::MapItem& target_map_item, + size_t target_item_size, + Reference ref, + MutableBufferView image) { + const size_t unsafe_idx = + (ref.target - target_map_item.offset) / target_item_size; + // Verify that index is within bound. + if (unsafe_idx >= target_map_item.size) { + LOG(ERROR) << "Target index out of bounds at: " << AsHex<8>(ref.location) + << "."; + return; + } + // Verify that |ref.target| points to start of item. + DCHECK_EQ(ref.target, target_map_item.offset + unsafe_idx * target_item_size); + image.write<INT>(ref.location, base::checked_cast<INT>(unsafe_idx)); +} + +// Buffer for ReadDexHeader() to optionally return results. +struct ReadDexHeaderResults { + BufferSource source; + const dex::HeaderItem* header; + int dex_version; +}; + +// Returns whether |image| points to a DEX file. If this is a possibility and +// |opt_results| is not null, then uses it to pass extracted data to enable +// further parsing. +bool ReadDexHeader(ConstBufferView image, ReadDexHeaderResults* opt_results) { + // This part needs to be fairly efficient since it may be called many times. + BufferSource source(image); + const dex::HeaderItem* header = source.GetPointer<dex::HeaderItem>(); + if (!header) + return false; + if (header->magic[0] != 'd' || header->magic[1] != 'e' || + header->magic[2] != 'x' || header->magic[3] != '\n' || + header->magic[7] != '\0') { + return false; + } + + // Magic matches: More detailed tests can be conducted. + int dex_version = 0; + for (int i = 4; i < 7; ++i) { + if (!isdigit(header->magic[i])) + return false; + dex_version = dex_version * 10 + (header->magic[i] - '0'); + } + + // Only support DEX versions 35 and 37. + // TODO(huangs): Handle version 38. + if (dex_version != 35 && dex_version != 37) + return false; + + if (header->file_size > image.size() || + header->file_size < sizeof(dex::HeaderItem) || + header->map_off < sizeof(dex::HeaderItem)) { + return false; + } + + if (opt_results) + *opt_results = {source, header, dex_version}; + return true; +} + +} // namespace + +/******** DisassemblerDex ********/ + +DisassemblerDex::DisassemblerDex() : Disassembler(4) {} + +DisassemblerDex::~DisassemblerDex() = default; + +// static. +bool DisassemblerDex::QuickDetect(ConstBufferView image) { + return ReadDexHeader(image, nullptr); +} + +ExecutableType DisassemblerDex::GetExeType() const { + return kExeTypeDex; +} + +std::string DisassemblerDex::GetExeTypeString() const { + return base::StringPrintf("DEX (version %d)", dex_version_); +} + +std::vector<ReferenceGroup> DisassemblerDex::MakeReferenceGroups() const { + // Must follow DisassemblerDex::ReferenceType order. Initialized on first use. + return { + {{4, TypeTag(kTypeIdToDescriptorStringId), PoolTag(kStringId)}, + &DisassemblerDex::MakeReadTypeIdToDescriptorStringId32, + &DisassemblerDex::MakeWriteStringId32}, + {{4, TypeTag(kProtoIdToShortyStringId), PoolTag(kStringId)}, + &DisassemblerDex::MakeReadProtoIdToShortyStringId32, + &DisassemblerDex::MakeWriteStringId32}, + {{4, TypeTag(kFieldIdToNameStringId), PoolTag(kStringId)}, + &DisassemblerDex::MakeReadFieldToNameStringId32, + &DisassemblerDex::MakeWriteStringId32}, + {{4, TypeTag(kMethodIdToNameStringId), PoolTag(kStringId)}, + &DisassemblerDex::MakeReadMethodIdToNameStringId32, + &DisassemblerDex::MakeWriteStringId32}, + {{4, TypeTag(kClassDefToSourceFileStringId), PoolTag(kStringId)}, + &DisassemblerDex::MakeReadClassDefToSourceFileStringId32, + &DisassemblerDex::MakeWriteStringId32}, + {{2, TypeTag(kCodeToStringId16), PoolTag(kStringId)}, + &DisassemblerDex::MakeReadCodeToStringId16, + &DisassemblerDex::MakeWriteStringId16}, + {{4, TypeTag(kCodeToStringId32), PoolTag(kStringId)}, + &DisassemblerDex::MakeReadCodeToStringId32, + &DisassemblerDex::MakeWriteStringId32}, + {{4, TypeTag(kProtoIdToReturnTypeId), PoolTag(kTypeId)}, + &DisassemblerDex::MakeReadProtoIdToReturnTypeId32, + &DisassemblerDex::MakeWriteTypeId32}, + {{2, TypeTag(kFieldIdToClassTypeId), PoolTag(kTypeId)}, + &DisassemblerDex::MakeReadFieldToClassTypeId16, + &DisassemblerDex::MakeWriteTypeId16}, + {{2, TypeTag(kFieldIdToTypeId), PoolTag(kTypeId)}, + &DisassemblerDex::MakeReadFieldToTypeId16, + &DisassemblerDex::MakeWriteTypeId16}, + {{2, TypeTag(kMethodIdToClassTypeId), PoolTag(kTypeId)}, + &DisassemblerDex::MakeReadMethodIdToClassTypeId16, + &DisassemblerDex::MakeWriteTypeId16}, + {{4, TypeTag(kClassDefToClassTypeId), PoolTag(kTypeId)}, + &DisassemblerDex::MakeReadClassDefToClassTypeId32, + &DisassemblerDex::MakeWriteTypeId32}, + {{4, TypeTag(kClassDefToSuperClassTypeId), PoolTag(kTypeId)}, + &DisassemblerDex::MakeReadClassDefToSuperClassTypeId32, + &DisassemblerDex::MakeWriteTypeId32}, + {{2, TypeTag(kTypeListToTypeId), PoolTag(kTypeId)}, + &DisassemblerDex::MakeReadTypeListToTypeId16, + &DisassemblerDex::MakeWriteTypeId16}, + {{2, TypeTag(kCodeToTypeId), PoolTag(kTypeId)}, + &DisassemblerDex::MakeReadCodeToTypeId16, + &DisassemblerDex::MakeWriteTypeId16}, + {{2, TypeTag(kMethodIdToProtoId), PoolTag(kProtoId)}, + &DisassemblerDex::MakeReadMethodIdToProtoId16, + &DisassemblerDex::MakeWriteProtoId16}, + {{2, TypeTag(kCodeToFieldId), PoolTag(kFieldId)}, + &DisassemblerDex::MakeReadCodeToFieldId16, + &DisassemblerDex::MakeWriteFieldId16}, + {{4, TypeTag(kAnnotationsDirectoryToFieldId), PoolTag(kFieldId)}, + &DisassemblerDex::MakeReadAnnotationsDirectoryToFieldId32, + &DisassemblerDex::MakeWriteFieldId32}, + {{2, TypeTag(kCodeToMethodId), PoolTag(kMethodId)}, + &DisassemblerDex::MakeReadCodeToMethodId16, + &DisassemblerDex::MakeWriteMethodId16}, + {{4, TypeTag(kAnnotationsDirectoryToMethodId), PoolTag(kMethodId)}, + &DisassemblerDex::MakeReadAnnotationsDirectoryToMethodId32, + &DisassemblerDex::MakeWriteMethodId32}, + {{4, TypeTag(kAnnotationsDirectoryToParameterMethodId), + PoolTag(kMethodId)}, + &DisassemblerDex::MakeReadAnnotationsDirectoryToParameterMethodId32, + &DisassemblerDex::MakeWriteMethodId32}, + {{4, TypeTag(kProtoIdToParametersTypeList), PoolTag(kTypeList)}, + &DisassemblerDex::MakeReadProtoIdToParametersTypeList, + &DisassemblerDex::MakeWriteAbs32}, + {{4, TypeTag(kClassDefToInterfacesTypeList), PoolTag(kTypeList)}, + &DisassemblerDex::MakeReadClassDefToInterfacesTypeList, + &DisassemblerDex::MakeWriteAbs32}, + {{4, TypeTag(kAnnotationsDirectoryToParameterAnnotationSetRef), + PoolTag(kAnnotationSetRefList)}, + &DisassemblerDex:: + MakeReadAnnotationsDirectoryToParameterAnnotationSetRef, + &DisassemblerDex::MakeWriteAbs32}, + {{4, TypeTag(kAnnotationSetRefListToAnnotationSet), + PoolTag(kAnnotionSet)}, + &DisassemblerDex::MakeReadAnnotationSetRefListToAnnotationSet, + &DisassemblerDex::MakeWriteAbs32}, + {{4, TypeTag(kAnnotationsDirectoryToClassAnnotationSet), + PoolTag(kAnnotionSet)}, + &DisassemblerDex::MakeReadAnnotationsDirectoryToClassAnnotationSet, + &DisassemblerDex::MakeWriteAbs32}, + {{4, TypeTag(kAnnotationsDirectoryToFieldAnnotationSet), + PoolTag(kAnnotionSet)}, + &DisassemblerDex::MakeReadAnnotationsDirectoryToFieldAnnotationSet, + &DisassemblerDex::MakeWriteAbs32}, + {{4, TypeTag(kAnnotationsDirectoryToMethodAnnotationSet), + PoolTag(kAnnotionSet)}, + &DisassemblerDex::MakeReadAnnotationsDirectoryToMethodAnnotationSet, + &DisassemblerDex::MakeWriteAbs32}, + {{4, TypeTag(kClassDefToClassData), PoolTag(kClassData)}, + &DisassemblerDex::MakeReadClassDefToClassData, + &DisassemblerDex::MakeWriteAbs32}, + {{1, TypeTag(kCodeToRelCode8), PoolTag(kCode)}, + &DisassemblerDex::MakeReadCodeToRelCode8, + &DisassemblerDex::MakeWriteRelCode8}, + {{2, TypeTag(kCodeToRelCode16), PoolTag(kCode)}, + &DisassemblerDex::MakeReadCodeToRelCode16, + &DisassemblerDex::MakeWriteRelCode16}, + {{4, TypeTag(kCodeToRelCode32), PoolTag(kCode)}, + &DisassemblerDex::MakeReadCodeToRelCode32, + &DisassemblerDex::MakeWriteRelCode32}, + {{4, TypeTag(kStringIdToStringData), PoolTag(kStringData)}, + &DisassemblerDex::MakeReadStringIdToStringData, + &DisassemblerDex::MakeWriteAbs32}, + {{4, TypeTag(kAnnotationSetToAnnotation), PoolTag(kAnnotation)}, + &DisassemblerDex::MakeReadAnnotationSetToAnnotation, + &DisassemblerDex::MakeWriteAbs32}, + {{4, TypeTag(kClassDefToStaticValuesEncodedArray), + PoolTag(kEncodedArray)}, + &DisassemblerDex::MakeReadClassDefToStaticValuesEncodedArray, + &DisassemblerDex::MakeWriteAbs32}, + {{4, TypeTag(kClassDefToAnnotationDirectory), + PoolTag(kAnnotationsDirectory)}, + &DisassemblerDex::MakeReadClassDefToAnnotationDirectory, + &DisassemblerDex::MakeWriteAbs32}, + }; +} + +std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadStringIdToStringData( + offset_t lo, + offset_t hi) { + // dex::StringIdItem::string_data_off mapper. + auto mapper = base::BindRepeating(ReadTargetOffset32, image_); + return std::make_unique<ItemReferenceReader>( + lo, hi, string_map_item_, sizeof(dex::StringIdItem), + offsetof(dex::StringIdItem, string_data_off), std::move(mapper)); +} + +std::unique_ptr<ReferenceReader> +DisassemblerDex::MakeReadTypeIdToDescriptorStringId32(offset_t lo, + offset_t hi) { + auto mapper = base::BindRepeating( + ReadTargetIndex<decltype(dex::TypeIdItem::descriptor_idx)>, image_, + string_map_item_, sizeof(dex::StringIdItem)); + return std::make_unique<ItemReferenceReader>( + lo, hi, type_map_item_, sizeof(dex::TypeIdItem), + offsetof(dex::TypeIdItem, descriptor_idx), std::move(mapper)); +} + +std::unique_ptr<ReferenceReader> +DisassemblerDex::MakeReadProtoIdToShortyStringId32(offset_t lo, offset_t hi) { + auto mapper = base::BindRepeating( + ReadTargetIndex<decltype(dex::ProtoIdItem::shorty_idx)>, image_, + string_map_item_, sizeof(dex::StringIdItem)); + return std::make_unique<ItemReferenceReader>( + lo, hi, proto_map_item_, sizeof(dex::ProtoIdItem), + offsetof(dex::ProtoIdItem, shorty_idx), std::move(mapper)); +} + +std::unique_ptr<ReferenceReader> +DisassemblerDex::MakeReadProtoIdToReturnTypeId32(offset_t lo, offset_t hi) { + auto mapper = base::BindRepeating( + ReadTargetIndex<decltype(dex::ProtoIdItem::return_type_idx)>, image_, + type_map_item_, sizeof(dex::TypeIdItem)); + return std::make_unique<ItemReferenceReader>( + lo, hi, proto_map_item_, sizeof(dex::ProtoIdItem), + offsetof(dex::ProtoIdItem, return_type_idx), std::move(mapper)); +} + +std::unique_ptr<ReferenceReader> +DisassemblerDex::MakeReadProtoIdToParametersTypeList(offset_t lo, offset_t hi) { + // dex::ProtoIdItem::parameters_off mapper. + auto mapper = base::BindRepeating(ReadTargetOffset32, image_); + return std::make_unique<ItemReferenceReader>( + lo, hi, proto_map_item_, sizeof(dex::ProtoIdItem), + offsetof(dex::ProtoIdItem, parameters_off), std::move(mapper)); +} + +std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadFieldToClassTypeId16( + offset_t lo, + offset_t hi) { + auto mapper = base::BindRepeating( + ReadTargetIndex<decltype(dex::FieldIdItem::class_idx)>, image_, + type_map_item_, sizeof(dex::TypeIdItem)); + return std::make_unique<ItemReferenceReader>( + lo, hi, field_map_item_, sizeof(dex::FieldIdItem), + offsetof(dex::FieldIdItem, class_idx), std::move(mapper)); +} + +std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadFieldToTypeId16( + offset_t lo, + offset_t hi) { + auto mapper = + base::BindRepeating(ReadTargetIndex<decltype(dex::FieldIdItem::type_idx)>, + image_, type_map_item_, sizeof(dex::TypeIdItem)); + return std::make_unique<ItemReferenceReader>( + lo, hi, field_map_item_, sizeof(dex::FieldIdItem), + offsetof(dex::FieldIdItem, type_idx), std::move(mapper)); +} + +std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadFieldToNameStringId32( + offset_t lo, + offset_t hi) { + auto mapper = + base::BindRepeating(ReadTargetIndex<decltype(dex::FieldIdItem::name_idx)>, + image_, string_map_item_, sizeof(dex::StringIdItem)); + return std::make_unique<ItemReferenceReader>( + lo, hi, field_map_item_, sizeof(dex::FieldIdItem), + offsetof(dex::FieldIdItem, name_idx), std::move(mapper)); +} + +std::unique_ptr<ReferenceReader> +DisassemblerDex::MakeReadMethodIdToClassTypeId16(offset_t lo, offset_t hi) { + auto mapper = base::BindRepeating( + ReadTargetIndex<decltype(dex::MethodIdItem::class_idx)>, image_, + type_map_item_, sizeof(dex::TypeIdItem)); + return std::make_unique<ItemReferenceReader>( + lo, hi, method_map_item_, sizeof(dex::MethodIdItem), + offsetof(dex::MethodIdItem, class_idx), std::move(mapper)); +} + +std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadMethodIdToProtoId16( + offset_t lo, + offset_t hi) { + auto mapper = base::BindRepeating( + ReadTargetIndex<decltype(dex::MethodIdItem::proto_idx)>, image_, + proto_map_item_, sizeof(dex::ProtoIdItem)); + return std::make_unique<ItemReferenceReader>( + lo, hi, method_map_item_, sizeof(dex::MethodIdItem), + offsetof(dex::MethodIdItem, proto_idx), std::move(mapper)); +} + +std::unique_ptr<ReferenceReader> +DisassemblerDex::MakeReadMethodIdToNameStringId32(offset_t lo, offset_t hi) { + auto mapper = base::BindRepeating( + ReadTargetIndex<decltype(dex::MethodIdItem::name_idx)>, image_, + string_map_item_, sizeof(dex::StringIdItem)); + return std::make_unique<ItemReferenceReader>( + lo, hi, method_map_item_, sizeof(dex::MethodIdItem), + offsetof(dex::MethodIdItem, name_idx), std::move(mapper)); +} + +std::unique_ptr<ReferenceReader> +DisassemblerDex::MakeReadClassDefToClassTypeId32(offset_t lo, offset_t hi) { + auto mapper = base::BindRepeating( + ReadTargetIndex<decltype(dex::ClassDefItem::superclass_idx)>, image_, + type_map_item_, sizeof(dex::TypeIdItem)); + return std::make_unique<ItemReferenceReader>( + lo, hi, class_def_map_item_, sizeof(dex::ClassDefItem), + offsetof(dex::ClassDefItem, class_idx), std::move(mapper)); +} + +std::unique_ptr<ReferenceReader> +DisassemblerDex::MakeReadClassDefToSuperClassTypeId32(offset_t lo, + offset_t hi) { + auto mapper = base::BindRepeating( + ReadTargetIndex<decltype(dex::ClassDefItem::superclass_idx)>, image_, + type_map_item_, sizeof(dex::TypeIdItem)); + return std::make_unique<ItemReferenceReader>( + lo, hi, class_def_map_item_, sizeof(dex::ClassDefItem), + offsetof(dex::ClassDefItem, superclass_idx), std::move(mapper)); +} + +std::unique_ptr<ReferenceReader> +DisassemblerDex::MakeReadClassDefToInterfacesTypeList(offset_t lo, + offset_t hi) { + // dex::ClassDefItem::interfaces_off mapper. + auto mapper = base::BindRepeating(ReadTargetOffset32, image_); + return std::make_unique<ItemReferenceReader>( + lo, hi, class_def_map_item_, sizeof(dex::ClassDefItem), + offsetof(dex::ClassDefItem, interfaces_off), std::move(mapper)); +} + +std::unique_ptr<ReferenceReader> +DisassemblerDex::MakeReadClassDefToSourceFileStringId32(offset_t lo, + offset_t hi) { + auto mapper = base::BindRepeating( + ReadTargetIndex<decltype(dex::ClassDefItem::source_file_idx)>, image_, + string_map_item_, sizeof(dex::StringIdItem)); + return std::make_unique<ItemReferenceReader>( + lo, hi, class_def_map_item_, sizeof(dex::ClassDefItem), + offsetof(dex::ClassDefItem, source_file_idx), std::move(mapper)); +} + +std::unique_ptr<ReferenceReader> +DisassemblerDex::MakeReadClassDefToAnnotationDirectory(offset_t lo, + offset_t hi) { + // dex::ClassDefItem::annotations_off mapper. + auto mapper = base::BindRepeating(ReadTargetOffset32, image_); + return std::make_unique<ItemReferenceReader>( + lo, hi, class_def_map_item_, sizeof(dex::ClassDefItem), + offsetof(dex::ClassDefItem, annotations_off), std::move(mapper)); +} + +std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadClassDefToClassData( + offset_t lo, + offset_t hi) { + // dex::ClassDefItem::class_data_off mapper. + auto mapper = base::BindRepeating(ReadTargetOffset32, image_); + return std::make_unique<ItemReferenceReader>( + lo, hi, class_def_map_item_, sizeof(dex::ClassDefItem), + offsetof(dex::ClassDefItem, class_data_off), std::move(mapper)); +} + +std::unique_ptr<ReferenceReader> +DisassemblerDex::MakeReadClassDefToStaticValuesEncodedArray(offset_t lo, + offset_t hi) { + // dex::ClassDefItem::static_values_off mapper. + auto mapper = base::BindRepeating(ReadTargetOffset32, image_); + return std::make_unique<ItemReferenceReader>( + lo, hi, class_def_map_item_, sizeof(dex::ClassDefItem), + offsetof(dex::ClassDefItem, static_values_off), std::move(mapper)); +} + +std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadTypeListToTypeId16( + offset_t lo, + offset_t hi) { + auto mapper = + base::BindRepeating(ReadTargetIndex<decltype(dex::TypeItem::type_idx)>, + image_, type_map_item_, sizeof(dex::TypeIdItem)); + return std::make_unique<CachedItemListReferenceReader>( + lo, hi, offsetof(dex::TypeItem, type_idx), type_list_offsets_, + std::move(mapper)); +} + +std::unique_ptr<ReferenceReader> +DisassemblerDex::MakeReadAnnotationSetToAnnotation(offset_t lo, offset_t hi) { + // dex::AnnotationOffItem::annotation_off mapper. + auto mapper = base::BindRepeating(ReadTargetOffset32, image_); + return std::make_unique<CachedItemListReferenceReader>( + lo, hi, offsetof(dex::AnnotationOffItem, annotation_off), + annotation_set_offsets_, std::move(mapper)); +} + +std::unique_ptr<ReferenceReader> +DisassemblerDex::MakeReadAnnotationSetRefListToAnnotationSet(offset_t lo, + offset_t hi) { + // dex::AnnotationSetRefItem::annotations_off mapper. + auto mapper = base::BindRepeating(ReadTargetOffset32, image_); + return std::make_unique<CachedItemListReferenceReader>( + lo, hi, offsetof(dex::AnnotationSetRefItem, annotations_off), + annotation_set_ref_list_offsets_, std::move(mapper)); +} + +std::unique_ptr<ReferenceReader> +DisassemblerDex::MakeReadAnnotationsDirectoryToClassAnnotationSet(offset_t lo, + offset_t hi) { + // dex::AnnotationsDirectoryItem::class_annotations_off mapper. + auto mapper = base::BindRepeating(ReadTargetOffset32, image_); + return std::make_unique<CachedItemListReferenceReader>( + lo, hi, offsetof(dex::AnnotationsDirectoryItem, class_annotations_off), + annotations_directory_item_offsets_, std::move(mapper)); +} + +std::unique_ptr<ReferenceReader> +DisassemblerDex::MakeReadAnnotationsDirectoryToFieldId32(offset_t lo, + offset_t hi) { + auto mapper = base::BindRepeating( + ReadTargetIndex<decltype(dex::FieldAnnotation::field_idx)>, image_, + field_map_item_, sizeof(dex::FieldIdItem)); + return std::make_unique<CachedItemListReferenceReader>( + lo, hi, offsetof(dex::FieldAnnotation, field_idx), + annotations_directory_item_field_annotation_offsets_, std::move(mapper)); +} + +std::unique_ptr<ReferenceReader> +DisassemblerDex::MakeReadAnnotationsDirectoryToFieldAnnotationSet(offset_t lo, + offset_t hi) { + // dex::FieldAnnotation::annotations_off mapper. + auto mapper = base::BindRepeating(ReadTargetOffset32, image_); + return std::make_unique<CachedItemListReferenceReader>( + lo, hi, offsetof(dex::FieldAnnotation, annotations_off), + annotations_directory_item_field_annotation_offsets_, std::move(mapper)); +} + +std::unique_ptr<ReferenceReader> +DisassemblerDex::MakeReadAnnotationsDirectoryToMethodId32(offset_t lo, + offset_t hi) { + auto mapper = base::BindRepeating( + ReadTargetIndex<decltype(dex::MethodAnnotation::method_idx)>, image_, + method_map_item_, sizeof(dex::MethodIdItem)); + return std::make_unique<CachedItemListReferenceReader>( + lo, hi, offsetof(dex::MethodAnnotation, method_idx), + annotations_directory_item_method_annotation_offsets_, std::move(mapper)); +} + +std::unique_ptr<ReferenceReader> +DisassemblerDex::MakeReadAnnotationsDirectoryToMethodAnnotationSet( + offset_t lo, + offset_t hi) { + // dex::MethodAnnotation::annotations_off mapper. + auto mapper = base::BindRepeating(ReadTargetOffset32, image_); + return std::make_unique<CachedItemListReferenceReader>( + lo, hi, offsetof(dex::MethodAnnotation, annotations_off), + annotations_directory_item_method_annotation_offsets_, std::move(mapper)); +} + +std::unique_ptr<ReferenceReader> +DisassemblerDex::MakeReadAnnotationsDirectoryToParameterMethodId32( + offset_t lo, + offset_t hi) { + auto mapper = base::BindRepeating( + ReadTargetIndex<decltype(dex::ParameterAnnotation::method_idx)>, image_, + method_map_item_, sizeof(dex::MethodIdItem)); + return std::make_unique<CachedItemListReferenceReader>( + lo, hi, offsetof(dex::ParameterAnnotation, method_idx), + annotations_directory_item_parameter_annotation_offsets_, + std::move(mapper)); +} + +std::unique_ptr<ReferenceReader> +DisassemblerDex::MakeReadAnnotationsDirectoryToParameterAnnotationSetRef( + offset_t lo, + offset_t hi) { + // dex::ParameterAnnotation::annotations_off mapper. + auto mapper = base::BindRepeating(ReadTargetOffset32, image_); + return std::make_unique<CachedItemListReferenceReader>( + lo, hi, offsetof(dex::ParameterAnnotation, annotations_off), + annotations_directory_item_parameter_annotation_offsets_, + std::move(mapper)); +} + +std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadCodeToStringId16( + offset_t lo, + offset_t hi) { + auto filter = base::BindRepeating( + [](const InstructionParser::Value& value) -> offset_t { + if (value.instr->format == dex::FormatId::c && + (value.instr->opcode == 0x1A)) { // const-string + // BBBB from e.g., const-string vAA, string@BBBB. + return value.instr_offset + 2; + } + return kInvalidOffset; + }); + auto mapper = + base::BindRepeating(ReadTargetIndex<uint16_t>, image_, string_map_item_, + sizeof(dex::StringIdItem)); + return std::make_unique<InstructionReferenceReader>( + image_, lo, hi, code_item_offsets_, std::move(filter), std::move(mapper)); +} + +std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadCodeToStringId32( + offset_t lo, + offset_t hi) { + auto filter = base::BindRepeating( + [](const InstructionParser::Value& value) -> offset_t { + if (value.instr->format == dex::FormatId::c && + (value.instr->opcode == 0x1B)) { // const-string/jumbo + // BBBBBBBB from e.g., const-string/jumbo vAA, string@BBBBBBBB. + return value.instr_offset + 2; + } + return kInvalidOffset; + }); + auto mapper = + base::BindRepeating(ReadTargetIndex<uint32_t>, image_, string_map_item_, + sizeof(dex::StringIdItem)); + return std::make_unique<InstructionReferenceReader>( + image_, lo, hi, code_item_offsets_, std::move(filter), std::move(mapper)); +} + +std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadCodeToTypeId16( + offset_t lo, + offset_t hi) { + auto filter = base::BindRepeating( + [](const InstructionParser::Value& value) -> offset_t { + if (value.instr->format == dex::FormatId::c && + (value.instr->opcode == 0x1C || // const-class + value.instr->opcode == 0x1F || // check-cast + value.instr->opcode == 0x20 || // instance-of + value.instr->opcode == 0x22 || // new-instance + value.instr->opcode == 0x23 || // new-array + value.instr->opcode == 0x24 || // filled-new-array + value.instr->opcode == 0x25)) { // filled-new-array/range + // BBBB from e.g., const-class vAA, type@BBBB. + return value.instr_offset + 2; + } + return kInvalidOffset; + }); + auto mapper = base::BindRepeating(ReadTargetIndex<uint16_t>, image_, + type_map_item_, sizeof(dex::TypeIdItem)); + return std::make_unique<InstructionReferenceReader>( + image_, lo, hi, code_item_offsets_, std::move(filter), std::move(mapper)); +} + +std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadCodeToFieldId16( + offset_t lo, + offset_t hi) { + auto filter = base::BindRepeating( + [](const InstructionParser::Value& value) -> offset_t { + if (value.instr->format == dex::FormatId::c && + (value.instr->opcode == 0x52 || // iinstanceop (iget-*, iput-*) + value.instr->opcode == 0x60)) { // sstaticop (sget-*, sput-*) + // CCCC from e.g., iget vA, vB, field@CCCC. + return value.instr_offset + 2; + } + return kInvalidOffset; + }); + auto mapper = base::BindRepeating(ReadTargetIndex<uint16_t>, image_, + field_map_item_, sizeof(dex::FieldIdItem)); + return std::make_unique<InstructionReferenceReader>( + image_, lo, hi, code_item_offsets_, std::move(filter), std::move(mapper)); +} + +std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadCodeToMethodId16( + offset_t lo, + offset_t hi) { + auto filter = base::BindRepeating( + [](const InstructionParser::Value& value) -> offset_t { + if (value.instr->format == dex::FormatId::c && + (value.instr->opcode == 0x6E || // invoke-kind + value.instr->opcode == 0x74)) { // invoke-kind/range + // BBBB from e.g., invoke-virtual {vC, vD, vE, vF, vG}, meth@BBBB. + return value.instr_offset + 2; + } + return kInvalidOffset; + }); + auto mapper = + base::BindRepeating(ReadTargetIndex<uint16_t>, image_, method_map_item_, + sizeof(dex::MethodIdItem)); + return std::make_unique<InstructionReferenceReader>( + image_, lo, hi, code_item_offsets_, std::move(filter), std::move(mapper)); +} + +std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadCodeToRelCode8( + offset_t lo, + offset_t hi) { + auto filter = base::BindRepeating( + [](const InstructionParser::Value& value) -> offset_t { + if (value.instr->format == dex::FormatId::t && + value.instr->opcode == 0x28) { // goto + // +AA from e.g., goto +AA. + return value.instr_offset + 1; + } + return kInvalidOffset; + }); + auto mapper = base::BindRepeating( + [](DisassemblerDex* dis, offset_t location) { + // Address is relative to the current instruction, which begins 1 unit + // before |location|. This needs to be subtracted out. Also, store as + // int32_t so |unsafe_delta - 1| won't underflow! + int32_t unsafe_delta = dis->image_.read<int8_t>(location); + offset_t unsafe_target = static_cast<offset_t>( + location + (unsafe_delta - 1) * kInstrUnitSize); + // TODO(huangs): Check that |unsafe_target| stays within code item. + return unsafe_target; + }, + base::Unretained(this)); + return std::make_unique<InstructionReferenceReader>( + image_, lo, hi, code_item_offsets_, std::move(filter), std::move(mapper)); +} + +std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadCodeToRelCode16( + offset_t lo, + offset_t hi) { + auto filter = base::BindRepeating( + [](const InstructionParser::Value& value) -> offset_t { + if (value.instr->format == dex::FormatId::t && + (value.instr->opcode == 0x29 || // goto/16 + value.instr->opcode == 0x32 || // if-test + value.instr->opcode == 0x38)) { // if-testz + // +AAAA from e.g., goto/16 +AAAA. + return value.instr_offset + 2; + } + return kInvalidOffset; + }); + auto mapper = base::BindRepeating( + [](DisassemblerDex* dis, offset_t location) { + // Address is relative to the current instruction, which begins 1 unit + // before |location|. This needs to be subtracted out. Also, store as + // int32_t so |unsafe_delta - 1| won't underflow! + int32_t unsafe_delta = dis->image_.read<int16_t>(location); + offset_t unsafe_target = static_cast<offset_t>( + location + (unsafe_delta - 1) * kInstrUnitSize); + // TODO(huangs): Check that |unsafe_target| stays within code item. + return unsafe_target; + }, + base::Unretained(this)); + return std::make_unique<InstructionReferenceReader>( + image_, lo, hi, code_item_offsets_, std::move(filter), std::move(mapper)); +} + +std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadCodeToRelCode32( + offset_t lo, + offset_t hi) { + auto filter = base::BindRepeating( + [](const InstructionParser::Value& value) -> offset_t { + if (value.instr->format == dex::FormatId::t && + (value.instr->opcode == 0x26 || // fill-array-data + value.instr->opcode == 0x2A || // goto/32 + value.instr->opcode == 0x2B || // packed-switch + value.instr->opcode == 0x2C)) { // sparse-switch + // +BBBBBBBB from e.g., fill-array-data vAA, +BBBBBBBB. + // +AAAAAAAA from e.g., goto/32 +AAAAAAAA. + return value.instr_offset + 2; + } + return kInvalidOffset; + }); + auto mapper = base::BindRepeating( + [](DisassemblerDex* dis, offset_t location) { + // Address is relative to the current instruction, which begins 1 unit + // before |location|. This needs to be subtracted out. Use int64_t to + // avoid underflow and overflow. + int64_t unsafe_delta = dis->image_.read<int32_t>(location); + int64_t unsafe_target = location + (unsafe_delta - 1) * kInstrUnitSize; + + // TODO(huangs): Check that |unsafe_target| stays within code item. + offset_t checked_unsafe_target = + static_cast<offset_t>(base::CheckedNumeric<offset_t>(unsafe_target) + .ValueOrDefault(kInvalidOffset)); + return checked_unsafe_target < kOffsetBound ? checked_unsafe_target + : kInvalidOffset; + }, + base::Unretained(this)); + return std::make_unique<InstructionReferenceReader>( + image_, lo, hi, code_item_offsets_, std::move(filter), std::move(mapper)); +} + +std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteStringId16( + MutableBufferView image) { + auto writer = base::BindRepeating( + WriteTargetIndex<uint16_t>, string_map_item_, sizeof(dex::StringIdItem)); + return std::make_unique<ReferenceWriterAdaptor>(image, std::move(writer)); +} + +std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteStringId32( + MutableBufferView image) { + auto writer = base::BindRepeating( + WriteTargetIndex<uint32_t>, string_map_item_, sizeof(dex::StringIdItem)); + return std::make_unique<ReferenceWriterAdaptor>(image, std::move(writer)); +} + +std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteTypeId16( + MutableBufferView image) { + auto writer = base::BindRepeating(WriteTargetIndex<uint16_t>, type_map_item_, + sizeof(dex::TypeIdItem)); + return std::make_unique<ReferenceWriterAdaptor>(image, std::move(writer)); +} + +std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteTypeId32( + MutableBufferView image) { + auto writer = base::BindRepeating(WriteTargetIndex<uint32_t>, type_map_item_, + sizeof(dex::TypeIdItem)); + return std::make_unique<ReferenceWriterAdaptor>(image, std::move(writer)); +} + +std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteProtoId16( + MutableBufferView image) { + auto writer = base::BindRepeating(WriteTargetIndex<uint16_t>, proto_map_item_, + sizeof(dex::ProtoIdItem)); + return std::make_unique<ReferenceWriterAdaptor>(image, std::move(writer)); +} + +std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteFieldId16( + MutableBufferView image) { + auto writer = base::BindRepeating(WriteTargetIndex<uint16_t>, field_map_item_, + sizeof(dex::FieldIdItem)); + return std::make_unique<ReferenceWriterAdaptor>(image, std::move(writer)); +} + +std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteFieldId32( + MutableBufferView image) { + auto writer = base::BindRepeating(WriteTargetIndex<uint32_t>, field_map_item_, + sizeof(dex::FieldIdItem)); + return std::make_unique<ReferenceWriterAdaptor>(image, std::move(writer)); +} + +std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteMethodId16( + MutableBufferView image) { + auto writer = base::BindRepeating( + WriteTargetIndex<uint16_t>, method_map_item_, sizeof(dex::MethodIdItem)); + return std::make_unique<ReferenceWriterAdaptor>(image, std::move(writer)); +} + +std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteMethodId32( + MutableBufferView image) { + auto writer = base::BindRepeating( + WriteTargetIndex<uint32_t>, method_map_item_, sizeof(dex::MethodIdItem)); + return std::make_unique<ReferenceWriterAdaptor>(image, std::move(writer)); +} + +std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteRelCode8( + MutableBufferView image) { + auto writer = base::BindRepeating([](Reference ref, MutableBufferView image) { + ptrdiff_t unsafe_byte_diff = + static_cast<ptrdiff_t>(ref.target) - ref.location; + DCHECK_EQ(0, unsafe_byte_diff % kInstrUnitSize); + // |delta| is relative to start of instruction, which is 1 unit before + // |ref.location|. The subtraction above removed too much, so +1 to fix. + base::CheckedNumeric<int8_t> delta((unsafe_byte_diff / kInstrUnitSize) + 1); + if (!delta.IsValid()) { + LOG(ERROR) << "Invalid reference at: " << AsHex<8>(ref.location) << "."; + return; + } + image.write<int8_t>(ref.location, delta.ValueOrDie()); + }); + return std::make_unique<ReferenceWriterAdaptor>(image, std::move(writer)); +} + +std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteRelCode16( + MutableBufferView image) { + auto writer = base::BindRepeating([](Reference ref, MutableBufferView image) { + ptrdiff_t unsafe_byte_diff = + static_cast<ptrdiff_t>(ref.target) - ref.location; + DCHECK_EQ(0, unsafe_byte_diff % kInstrUnitSize); + // |delta| is relative to start of instruction, which is 1 unit before + // |ref.location|. The subtraction above removed too much, so +1 to fix. + base::CheckedNumeric<int16_t> delta((unsafe_byte_diff / kInstrUnitSize) + + 1); + if (!delta.IsValid()) { + LOG(ERROR) << "Invalid reference at: " << AsHex<8>(ref.location) << "."; + return; + } + image.write<int16_t>(ref.location, delta.ValueOrDie()); + }); + return std::make_unique<ReferenceWriterAdaptor>(image, std::move(writer)); +} + +std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteRelCode32( + MutableBufferView image) { + auto writer = base::BindRepeating([](Reference ref, MutableBufferView image) { + ptrdiff_t unsafe_byte_diff = + static_cast<ptrdiff_t>(ref.target) - ref.location; + DCHECK_EQ(0, unsafe_byte_diff % kInstrUnitSize); + // |delta| is relative to start of instruction, which is 1 unit before + // |ref.location|. The subtraction above removed too much, so +1 to fix. + base::CheckedNumeric<int32_t> delta((unsafe_byte_diff / kInstrUnitSize) + + 1); + if (!delta.IsValid()) { + LOG(ERROR) << "Invalid reference at: " << AsHex<8>(ref.location) << "."; + return; + } + image.write<int32_t>(ref.location, delta.ValueOrDie()); + }); + return std::make_unique<ReferenceWriterAdaptor>(image, std::move(writer)); +} + +std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteAbs32( + MutableBufferView image) { + auto writer = base::BindRepeating([](Reference ref, MutableBufferView image) { + image.write<uint32_t>(ref.location, ref.target); + }); + return std::make_unique<ReferenceWriterAdaptor>(image, std::move(writer)); +} + +bool DisassemblerDex::Parse(ConstBufferView image) { + image_ = image; + return ParseHeader(); +} + +bool DisassemblerDex::ParseHeader() { + ReadDexHeaderResults results; + if (!ReadDexHeader(image_, &results)) + return false; + + header_ = results.header; + dex_version_ = results.dex_version; + BufferSource source = results.source; + + // DEX header contains file size, so use it to resize |image_| right away. + image_.shrink(header_->file_size); + + // Read map list. This is not a fixed-size array, so instead of reading + // MapList directly, read |MapList::size| first, then visit elements in + // |MapList::list|. + static_assert( + offsetof(dex::MapList, list) == sizeof(decltype(dex::MapList::size)), + "MapList size error."); + source = std::move(BufferSource(image_).Skip(header_->map_off)); + decltype(dex::MapList::size) list_size = 0; + if (!source.GetValue(&list_size) || list_size > dex::kMaxItemListSize) + return false; + const auto* item_list = source.GetArray<const dex::MapItem>(list_size); + if (!item_list) + return false; + + // Read and validate map list, ensuring that required item types are present. + // - GetItemBaseSize() should have an entry for each item. + // - dex::kTypeCodeItem is actually not required; it's possible to have a DEX + // file with classes that have no code. However, this is unlikely to appear + // in application, so for simplicity we require DEX files to have code. + std::set<uint16_t> required_item_types = { + dex::kTypeStringIdItem, dex::kTypeTypeIdItem, dex::kTypeProtoIdItem, + dex::kTypeFieldIdItem, dex::kTypeMethodIdItem, dex::kTypeClassDefItem, + dex::kTypeTypeList, dex::kTypeCodeItem, + }; + for (offset_t i = 0; i < list_size; ++i) { + const dex::MapItem* item = &item_list[i]; + // Reject unreasonably large |item->size|. + size_t item_size = GetItemBaseSize(item->type); + // Confusing name: |item->size| is actually the number of items. + if (!image_.covers_array(item->offset, item->size, item_size)) + return false; + if (!map_item_map_.insert(std::make_pair(item->type, item)).second) + return false; // A given type must appear at most once. + required_item_types.erase(item->type); + } + // TODO(huangs): Replace this with guards throughout file. + if (!required_item_types.empty()) + return false; + + // Make local copies of main map items. + string_map_item_ = *map_item_map_[dex::kTypeStringIdItem]; + type_map_item_ = *map_item_map_[dex::kTypeTypeIdItem]; + proto_map_item_ = *map_item_map_[dex::kTypeProtoIdItem]; + field_map_item_ = *map_item_map_[dex::kTypeFieldIdItem]; + method_map_item_ = *map_item_map_[dex::kTypeMethodIdItem]; + class_def_map_item_ = *map_item_map_[dex::kTypeClassDefItem]; + type_list_map_item_ = *map_item_map_[dex::kTypeTypeList]; + code_map_item_ = *map_item_map_[dex::kTypeCodeItem]; + + // The following types are optional and may not be present in every DEX file. + if (map_item_map_.count(dex::kTypeAnnotationSetRefList)) { + annotation_set_ref_list_map_item_ = + *map_item_map_[dex::kTypeAnnotationSetRefList]; + } + if (map_item_map_.count(dex::kTypeAnnotationSetItem)) + annotation_set_map_item_ = *map_item_map_[dex::kTypeAnnotationSetItem]; + if (map_item_map_.count(dex::kTypeAnnotationsDirectoryItem)) { + annotations_directory_map_item_ = + *map_item_map_[dex::kTypeAnnotationsDirectoryItem]; + } + + // Iteratively parse variable length lists, annotations directory items, and + // code items blocks. Any failure would indicate invalid DEX. Success + // indicates that no structural problem is found. However, contained + // references data read from parsed items still require validation. + if (!(ParseItemOffsets(image_, type_list_map_item_, sizeof(dex::TypeItem), + &type_list_offsets_) && + ParseItemOffsets(image_, annotation_set_ref_list_map_item_, + sizeof(dex::AnnotationSetRefItem), + &annotation_set_ref_list_offsets_) && + ParseItemOffsets(image_, annotation_set_map_item_, + sizeof(dex::AnnotationOffItem), + &annotation_set_offsets_) && + ParseAnnotationsDirectoryItems( + image_, annotations_directory_map_item_, + &annotations_directory_item_offsets_, + &annotations_directory_item_field_annotation_offsets_, + &annotations_directory_item_method_annotation_offsets_, + &annotations_directory_item_parameter_annotation_offsets_))) { + return false; + } + CodeItemParser code_item_parser(image_); + if (!code_item_parser.Init(code_map_item_)) + return false; + code_item_offsets_.resize(code_map_item_.size); + for (size_t i = 0; i < code_map_item_.size; ++i) { + const offset_t code_item_offset = code_item_parser.GetNext(); + if (code_item_offset == kInvalidOffset) + return false; + code_item_offsets_[i] = code_item_offset; + } + // DEX files are required to have parsable code items. + return !code_item_offsets_.empty(); +} + +} // namespace zucchini diff --git a/disassembler_dex.h b/disassembler_dex.h new file mode 100644 index 0000000..2038a3c --- /dev/null +++ b/disassembler_dex.h @@ -0,0 +1,273 @@ +// Copyright 2018 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_DISASSEMBLER_DEX_H_ +#define COMPONENTS_ZUCCHINI_DISASSEMBLER_DEX_H_ + +#include <stdint.h> + +#include <map> +#include <memory> +#include <string> +#include <vector> + +#include "components/zucchini/disassembler.h" +#include "components/zucchini/image_utils.h" +#include "components/zucchini/type_dex.h" + +namespace zucchini { + +// For consistency, let "canonical order" of DEX data types be the order defined +// in https://source.android.com/devices/tech/dalvik/dex-format "Type Codes" +// section. + +class DisassemblerDex : public Disassembler { + public: + // Pools follow canonical order. + enum ReferencePool : uint8_t { + kStringId, + kTypeId, + kProtoId, + kFieldId, + kMethodId, + // kClassDef, // Unused + // kCallSiteId, // Unused + // kMethodHandle, // Unused + kTypeList, + kAnnotationSetRefList, + kAnnotionSet, + kClassData, + kCode, + kStringData, + kAnnotation, + kEncodedArray, + kAnnotationsDirectory, + // kCallSite, // Unused + kNumPools + }; + + // Types are grouped and ordered by target ReferencePool. This is required by + // Zucchini-apply, which visits references by type order and sequentially + // handles pools in the same order. Type-pool association is established in + // MakeReferenceGroups(), and verified by a unit test. + enum ReferenceType : uint8_t { + kTypeIdToDescriptorStringId, // kStringId + kProtoIdToShortyStringId, + kFieldIdToNameStringId, + kMethodIdToNameStringId, + kClassDefToSourceFileStringId, + kCodeToStringId16, + kCodeToStringId32, + + kProtoIdToReturnTypeId, // kTypeId + kFieldIdToClassTypeId, + kFieldIdToTypeId, + kMethodIdToClassTypeId, + kClassDefToClassTypeId, + kClassDefToSuperClassTypeId, + kTypeListToTypeId, + kCodeToTypeId, + + kMethodIdToProtoId, // kProtoId + + kCodeToFieldId, // kFieldId + kAnnotationsDirectoryToFieldId, + + kCodeToMethodId, // kMethodId + kAnnotationsDirectoryToMethodId, + kAnnotationsDirectoryToParameterMethodId, + + kProtoIdToParametersTypeList, // kTypeList + kClassDefToInterfacesTypeList, + + kAnnotationsDirectoryToParameterAnnotationSetRef, // kAnnotationSetRef, + + kAnnotationSetRefListToAnnotationSet, // kAnnotationSet, + kAnnotationsDirectoryToClassAnnotationSet, + kAnnotationsDirectoryToFieldAnnotationSet, + kAnnotationsDirectoryToMethodAnnotationSet, + + kClassDefToClassData, // kClassData + + kCodeToRelCode8, // kCode + kCodeToRelCode16, + kCodeToRelCode32, + + kStringIdToStringData, // kStringData + + kAnnotationSetToAnnotation, // kAnnotation + + kClassDefToStaticValuesEncodedArray, // kEncodedArrayItem + + kClassDefToAnnotationDirectory, // kAnnotationsDirectory + + // Intentionally ignored references (never appeared in test corpus). + // kMethodHandleToFieldId, + // kMethodHandleToMethodId, + // kCallSiteIdToCallSite, + + kNumTypes + }; + + DisassemblerDex(); + DisassemblerDex(const DisassemblerDex&) = delete; + const DisassemblerDex& operator=(const DisassemblerDex&) = delete; + ~DisassemblerDex() override; + + // Applies quick checks to determine if |image| *may* point to the start of an + // executable. Returns true on success. + static bool QuickDetect(ConstBufferView image); + + // Disassembler: + ExecutableType GetExeType() const override; + std::string GetExeTypeString() const override; + std::vector<ReferenceGroup> MakeReferenceGroups() const override; + + // Functions that return reference readers. These follow canonical order of + // *locations* (unlike targets for ReferenceType). This allows functions with + // similar parsing logic to appear togeter. + std::unique_ptr<ReferenceReader> MakeReadStringIdToStringData(offset_t lo, + offset_t hi); + std::unique_ptr<ReferenceReader> MakeReadTypeIdToDescriptorStringId32( + offset_t lo, + offset_t hi); + std::unique_ptr<ReferenceReader> MakeReadProtoIdToShortyStringId32( + offset_t lo, + offset_t hi); + std::unique_ptr<ReferenceReader> MakeReadProtoIdToReturnTypeId32(offset_t lo, + offset_t hi); + std::unique_ptr<ReferenceReader> MakeReadProtoIdToParametersTypeList( + offset_t lo, + offset_t hi); + std::unique_ptr<ReferenceReader> MakeReadFieldToClassTypeId16(offset_t lo, + offset_t hi); + std::unique_ptr<ReferenceReader> MakeReadFieldToTypeId16(offset_t lo, + offset_t hi); + std::unique_ptr<ReferenceReader> MakeReadFieldToNameStringId32(offset_t lo, + offset_t hi); + std::unique_ptr<ReferenceReader> MakeReadMethodIdToClassTypeId16(offset_t lo, + offset_t hi); + std::unique_ptr<ReferenceReader> MakeReadMethodIdToProtoId16(offset_t lo, + offset_t hi); + std::unique_ptr<ReferenceReader> MakeReadMethodIdToNameStringId32( + offset_t lo, + offset_t hi); + std::unique_ptr<ReferenceReader> MakeReadClassDefToClassTypeId32(offset_t lo, + offset_t hi); + std::unique_ptr<ReferenceReader> MakeReadClassDefToSuperClassTypeId32( + offset_t lo, + offset_t hi); + std::unique_ptr<ReferenceReader> MakeReadClassDefToInterfacesTypeList( + offset_t lo, + offset_t hi); + std::unique_ptr<ReferenceReader> MakeReadClassDefToSourceFileStringId32( + offset_t lo, + offset_t hi); + std::unique_ptr<ReferenceReader> MakeReadClassDefToAnnotationDirectory( + offset_t lo, + offset_t hi); + std::unique_ptr<ReferenceReader> MakeReadClassDefToClassData(offset_t lo, + offset_t hi); + std::unique_ptr<ReferenceReader> MakeReadClassDefToStaticValuesEncodedArray( + offset_t lo, + offset_t hi); + std::unique_ptr<ReferenceReader> MakeReadTypeListToTypeId16(offset_t lo, + offset_t hi); + std::unique_ptr<ReferenceReader> MakeReadAnnotationSetToAnnotation( + offset_t lo, + offset_t hi); + std::unique_ptr<ReferenceReader> MakeReadAnnotationSetRefListToAnnotationSet( + offset_t lo, + offset_t hi); + std::unique_ptr<ReferenceReader> + MakeReadAnnotationsDirectoryToClassAnnotationSet(offset_t lo, offset_t hi); + std::unique_ptr<ReferenceReader> MakeReadAnnotationsDirectoryToFieldId32( + offset_t lo, + offset_t hi); + std::unique_ptr<ReferenceReader> + MakeReadAnnotationsDirectoryToFieldAnnotationSet(offset_t lo, offset_t hi); + std::unique_ptr<ReferenceReader> MakeReadAnnotationsDirectoryToMethodId32( + offset_t lo, + offset_t hi); + std::unique_ptr<ReferenceReader> + MakeReadAnnotationsDirectoryToMethodAnnotationSet(offset_t lo, offset_t hi); + std::unique_ptr<ReferenceReader> + MakeReadAnnotationsDirectoryToParameterMethodId32(offset_t lo, offset_t hi); + std::unique_ptr<ReferenceReader> + MakeReadAnnotationsDirectoryToParameterAnnotationSetRef(offset_t lo, + offset_t hi); + std::unique_ptr<ReferenceReader> MakeReadCodeToStringId16(offset_t lo, + offset_t hi); + std::unique_ptr<ReferenceReader> MakeReadCodeToStringId32(offset_t lo, + offset_t hi); + std::unique_ptr<ReferenceReader> MakeReadCodeToTypeId16(offset_t lo, + offset_t hi); + std::unique_ptr<ReferenceReader> MakeReadCodeToFieldId16(offset_t lo, + offset_t hi); + std::unique_ptr<ReferenceReader> MakeReadCodeToMethodId16(offset_t lo, + offset_t hi); + std::unique_ptr<ReferenceReader> MakeReadCodeToRelCode8(offset_t lo, + offset_t hi); + std::unique_ptr<ReferenceReader> MakeReadCodeToRelCode16(offset_t lo, + offset_t hi); + std::unique_ptr<ReferenceReader> MakeReadCodeToRelCode32(offset_t lo, + offset_t hi); + + // Functions that return reference writers. Different readers may share a + // common writer. Therefore these loosely follow canonical order of locations, + std::unique_ptr<ReferenceWriter> MakeWriteStringId16(MutableBufferView image); + std::unique_ptr<ReferenceWriter> MakeWriteStringId32(MutableBufferView image); + std::unique_ptr<ReferenceWriter> MakeWriteTypeId16(MutableBufferView image); + std::unique_ptr<ReferenceWriter> MakeWriteTypeId32(MutableBufferView image); + std::unique_ptr<ReferenceWriter> MakeWriteProtoId16(MutableBufferView image); + std::unique_ptr<ReferenceWriter> MakeWriteFieldId16(MutableBufferView image); + std::unique_ptr<ReferenceWriter> MakeWriteFieldId32(MutableBufferView image); + std::unique_ptr<ReferenceWriter> MakeWriteMethodId16(MutableBufferView image); + std::unique_ptr<ReferenceWriter> MakeWriteMethodId32(MutableBufferView image); + std::unique_ptr<ReferenceWriter> MakeWriteRelCode8(MutableBufferView image); + std::unique_ptr<ReferenceWriter> MakeWriteRelCode16(MutableBufferView image); + std::unique_ptr<ReferenceWriter> MakeWriteRelCode32(MutableBufferView image); + std::unique_ptr<ReferenceWriter> MakeWriteAbs32(MutableBufferView image); + + private: + friend Disassembler; + using MapItemMap = std::map<uint16_t, const dex::MapItem*>; + + // Disassembler: + bool Parse(ConstBufferView image) override; + + bool ParseHeader(); + + const dex::HeaderItem* header_ = nullptr; + int dex_version_ = 0; + MapItemMap map_item_map_ = {}; + dex::MapItem string_map_item_ = {}; + dex::MapItem type_map_item_ = {}; + dex::MapItem proto_map_item_ = {}; + dex::MapItem field_map_item_ = {}; + dex::MapItem method_map_item_ = {}; + dex::MapItem class_def_map_item_ = {}; + dex::MapItem type_list_map_item_ = {}; + dex::MapItem code_map_item_ = {}; + + // Optionally supported (not all DEX files have these). + dex::MapItem annotation_set_ref_list_map_item_ = {}; + dex::MapItem annotation_set_map_item_ = {}; + dex::MapItem annotations_directory_map_item_ = {}; + + // Sorted list of offsets of parsed items in |image_|. + std::vector<offset_t> code_item_offsets_; + std::vector<offset_t> type_list_offsets_; + std::vector<offset_t> annotation_set_ref_list_offsets_; + std::vector<offset_t> annotation_set_offsets_; + std::vector<offset_t> annotations_directory_item_offsets_; + std::vector<offset_t> annotations_directory_item_field_annotation_offsets_; + std::vector<offset_t> annotations_directory_item_method_annotation_offsets_; + std::vector<offset_t> + annotations_directory_item_parameter_annotation_offsets_; +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_DISASSEMBLER_DEX_H_ diff --git a/disassembler_dex_unittest.cc b/disassembler_dex_unittest.cc new file mode 100644 index 0000000..04fe6eb --- /dev/null +++ b/disassembler_dex_unittest.cc @@ -0,0 +1,51 @@ +// Copyright 2018 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/disassembler_dex.h" + +#include <stddef.h> +#include <stdint.h> + +#include <algorithm> +#include <set> +#include <vector> + +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +namespace { + +template <typename T> +size_t CountDistinct(const std::vector<T>& v) { + return std::set<T>(v.begin(), v.end()).size(); +} + +} // namespace + +// Ensures that ReferenceGroups from DisassemblerDex::MakeReferenceGroups() +// cover each non-sentinel element in ReferenceType in order, exactly once. Also +// ensures that the ReferenceType elements are grouped by ReferencePool, and +// listed in increasing order. +TEST(DisassemblerDexTest, ReferenceGroups) { + std::vector<uint32_t> pool_list; + std::vector<uint32_t> type_list; + DisassemblerDex dis; + for (ReferenceGroup group : dis.MakeReferenceGroups()) { + pool_list.push_back(static_cast<uint32_t>(group.pool_tag().value())); + type_list.push_back(static_cast<uint32_t>(group.type_tag().value())); + } + + // Check ReferenceByte coverage. + constexpr size_t kNumTypes = DisassemblerDex::kNumTypes; + EXPECT_EQ(kNumTypes, type_list.size()); + EXPECT_EQ(kNumTypes, CountDistinct(type_list)); + EXPECT_TRUE(std::is_sorted(type_list.begin(), type_list.end())); + + // Check that ReferenceType elements are grouped by ReferencePool. Note that + // repeats can occur, and pools can be skipped. + EXPECT_TRUE(std::is_sorted(pool_list.begin(), pool_list.end())); +} + +} // namespace zucchini diff --git a/disassembler_elf.cc b/disassembler_elf.cc new file mode 100644 index 0000000..94dc12a --- /dev/null +++ b/disassembler_elf.cc @@ -0,0 +1,855 @@ +// Copyright 2018 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/disassembler_elf.h" + +#include <stddef.h> + +#include <utility> + +#include "base/logging.h" +#include "base/numerics/checked_math.h" +#include "base/numerics/safe_conversions.h" +#include "components/zucchini/abs32_utils.h" +#include "components/zucchini/algorithm.h" +#include "components/zucchini/arm_utils.h" +#include "components/zucchini/buffer_source.h" + +namespace zucchini { + +namespace { + +constexpr uint64_t kElfImageBase = 0; +constexpr size_t kSizeBound = 0x7FFF0000; + +// Threshold value for heuristics to detect THUMB2 code. +constexpr double kAArch32BitCondAlwaysDensityThreshold = 0.4; + +// Bit fields for JudgeSection() return value. +enum SectionJudgement : int { + // Bit: Section does not invalidate ELF, but may or may not be useful. + SECTION_BIT_SAFE = 1 << 0, + // Bit: Section useful for AddressTranslator, to map between offsets and RVAs. + SECTION_BIT_USEFUL_FOR_ADDRESS_TRANSLATOR = 1 << 1, + // Bit: Section useful for |offset_bound|, to estimate ELF size. + SECTION_BIT_USEFUL_FOR_OFFSET_BOUND = 1 << 2, + // Bit: Section potentially useful for pointer extraction. + SECTION_BIT_MAYBE_USEFUL_FOR_POINTERS = 1 << 3, + + // The following are verdicts from combining bits, to improve semantics. + // Default value: A section is malformed and invalidates ELF. + SECTION_IS_MALFORMED = 0, + // Section does not invalidate ELF, but is also not used for anything. + SECTION_IS_USELESS = SECTION_BIT_SAFE, +}; + +// Decides how a section affects ELF parsing, and returns a bit field composed +// from SectionJudgement values. +template <class TRAITS> +int JudgeSection(size_t image_size, const typename TRAITS::Elf_Shdr* section) { + // BufferRegion uses |size_t| this can be 32-bit in some cases. For Elf64 + // |sh_addr|, |sh_offset| and |sh_size| are 64-bit this can result in + // overflows in the subsequent validation steps. + if (!base::IsValueInRangeForNumericType<size_t>(section->sh_addr) || + !base::IsValueInRangeForNumericType<size_t>(section->sh_offset) || + !base::IsValueInRangeForNumericType<size_t>(section->sh_size)) { + return SECTION_IS_MALFORMED; + } + + // Examine RVA range: Reject if numerical overflow may happen. + if (!BufferRegion{static_cast<size_t>(section->sh_addr), + static_cast<size_t>(section->sh_size)} + .FitsIn(kSizeBound)) + return SECTION_IS_MALFORMED; + + // Examine offset range: If section takes up |image| data then be stricter. + size_t offset_bound = + (section->sh_type == elf::SHT_NOBITS) ? kSizeBound : image_size; + if (!BufferRegion{static_cast<size_t>(section->sh_offset), + static_cast<size_t>(section->sh_size)} + .FitsIn(offset_bound)) + return SECTION_IS_MALFORMED; + + // Empty sections don't contribute to offset-RVA mapping. For consistency, it + // should also not affect |offset_bounds|. + if (section->sh_size == 0) + return SECTION_IS_USELESS; + + // Sections with |sh_addr == 0| are ignored because these tend to duplicates + // (can cause problems for lookup) and uninteresting. For consistency, it + // should also not affect |offset_bounds|. + if (section->sh_addr == 0) + return SECTION_IS_USELESS; + + if (section->sh_type == elf::SHT_NOBITS) { + // Special case for .tbss sections: These should be ignored because they may + // have offset-RVA map that don't match other sections. + if (section->sh_flags & elf::SHF_TLS) + return SECTION_IS_USELESS; + + // Section is useful for offset-RVA translation, but does not affect + // |offset_bounds| since it can have large virtual size (e.g., .bss). + return SECTION_BIT_SAFE | SECTION_BIT_USEFUL_FOR_ADDRESS_TRANSLATOR; + } + + return SECTION_BIT_SAFE | SECTION_BIT_USEFUL_FOR_ADDRESS_TRANSLATOR | + SECTION_BIT_USEFUL_FOR_OFFSET_BOUND | + SECTION_BIT_MAYBE_USEFUL_FOR_POINTERS; +} + +// Determines whether |section| is a reloc section. +template <class TRAITS> +bool IsRelocSection(const typename TRAITS::Elf_Shdr& section) { + DCHECK_GT(section.sh_size, 0U); + if (section.sh_type == elf::SHT_REL) { + // Also validate |section.sh_entsize|, which gets used later. + return section.sh_entsize == sizeof(typename TRAITS::Elf_Rel); + } + if (section.sh_type == elf::SHT_RELA) + return section.sh_entsize == sizeof(typename TRAITS::Elf_Rela); + return false; +} + +// Determines whether |section| is a section with executable code. +template <class TRAITS> +bool IsExecSection(const typename TRAITS::Elf_Shdr& section) { + DCHECK_GT(section.sh_size, 0U); + return section.sh_type == elf::SHT_PROGBITS && + (section.sh_flags & elf::SHF_EXECINSTR) != 0; +} + +} // namespace + +/******** Elf32Traits ********/ + +// static +constexpr Bitness Elf32Traits::kBitness; +constexpr elf::FileClass Elf32Traits::kIdentificationClass; + +/******** Elf32IntelTraits ********/ + +// static +constexpr ExecutableType Elf32IntelTraits::kExeType; +const char Elf32IntelTraits::kExeTypeString[] = "ELF x86"; +constexpr elf::MachineArchitecture Elf32IntelTraits::kMachineValue; +constexpr uint32_t Elf32IntelTraits::kRelType; + +/******** ElfAArch32Traits ********/ + +// static +constexpr ExecutableType ElfAArch32Traits::kExeType; +const char ElfAArch32Traits::kExeTypeString[] = "ELF ARM"; +constexpr elf::MachineArchitecture ElfAArch32Traits::kMachineValue; +constexpr uint32_t ElfAArch32Traits::kRelType; + +/******** Elf64Traits ********/ + +// static +constexpr Bitness Elf64Traits::kBitness; +constexpr elf::FileClass Elf64Traits::kIdentificationClass; + +/******** Elf64IntelTraits ********/ + +// static +constexpr ExecutableType Elf64IntelTraits::kExeType; +const char Elf64IntelTraits::kExeTypeString[] = "ELF x64"; +constexpr elf::MachineArchitecture Elf64IntelTraits::kMachineValue; +constexpr uint32_t Elf64IntelTraits::kRelType; + +/******** ElfAArch64Traits ********/ + +// static +constexpr ExecutableType ElfAArch64Traits::kExeType; +const char ElfAArch64Traits::kExeTypeString[] = "ELF ARM64"; +constexpr elf::MachineArchitecture ElfAArch64Traits::kMachineValue; +constexpr uint32_t ElfAArch64Traits::kRelType; + +/******** DisassemblerElf ********/ + +// static. +template <class TRAITS> +bool DisassemblerElf<TRAITS>::QuickDetect(ConstBufferView image) { + BufferSource source(image); + + // Do not consume the bytes for the magic value, as they are part of the + // header. + if (!source.CheckNextBytes({0x7F, 'E', 'L', 'F'})) + return false; + + auto* header = source.GetPointer<typename Traits::Elf_Ehdr>(); + if (!header) + return false; + + if (header->e_ident[elf::EI_CLASS] != Traits::kIdentificationClass) + return false; + + if (header->e_ident[elf::EI_DATA] != 1) // Only ELFDATA2LSB is supported. + return false; + + if (header->e_type != elf::ET_EXEC && header->e_type != elf::ET_DYN) + return false; + + if (header->e_version != 1 || header->e_ident[elf::EI_VERSION] != 1) + return false; + + if (header->e_machine != supported_architecture()) + return false; + + if (header->e_shentsize != sizeof(typename Traits::Elf_Shdr)) + return false; + + return true; +} + +template <class TRAITS> +DisassemblerElf<TRAITS>::~DisassemblerElf() = default; + +template <class TRAITS> +ExecutableType DisassemblerElf<TRAITS>::GetExeType() const { + return Traits::kExeType; +} + +template <class TRAITS> +std::string DisassemblerElf<TRAITS>::GetExeTypeString() const { + return Traits::kExeTypeString; +} + +// |num_equivalence_iterations_| = 2 for reloc -> abs32. +template <class TRAITS> +DisassemblerElf<TRAITS>::DisassemblerElf() : Disassembler(2) {} + +template <class TRAITS> +bool DisassemblerElf<TRAITS>::Parse(ConstBufferView image) { + image_ = image; + if (!ParseHeader()) + return false; + ParseSections(); + return true; +} + +template <class TRAITS> +std::unique_ptr<ReferenceReader> DisassemblerElf<TRAITS>::MakeReadRelocs( + offset_t lo, + offset_t hi) { + DCHECK_LE(lo, hi); + DCHECK_LE(hi, image_.size()); + + if (reloc_section_dims_.empty()) + return std::make_unique<EmptyReferenceReader>(); + + return std::make_unique<RelocReaderElf>( + image_, Traits::kBitness, reloc_section_dims_, + supported_relocation_type(), lo, hi, translator_); +} + +template <class TRAITS> +std::unique_ptr<ReferenceWriter> DisassemblerElf<TRAITS>::MakeWriteRelocs( + MutableBufferView image) { + return std::make_unique<RelocWriterElf>(image, Traits::kBitness, translator_); +} + +template <class TRAITS> +bool DisassemblerElf<TRAITS>::ParseHeader() { + BufferSource source(image_); + // Ensure any offsets will fit within the |image_|'s bounds. + if (!base::IsValueInRangeForNumericType<offset_t>(image_.size())) + return false; + + // Ensures |header_| is valid later on. + if (!QuickDetect(image_)) + return false; + + header_ = source.GetPointer<typename Traits::Elf_Ehdr>(); + + sections_count_ = header_->e_shnum; + source = std::move(BufferSource(image_).Skip(header_->e_shoff)); + sections_ = source.GetArray<typename Traits::Elf_Shdr>(sections_count_); + if (!sections_) + return false; + offset_t section_table_end = + base::checked_cast<offset_t>(source.begin() - image_.begin()); + + segments_count_ = header_->e_phnum; + source = std::move(BufferSource(image_).Skip(header_->e_phoff)); + segments_ = source.GetArray<typename Traits::Elf_Phdr>(segments_count_); + if (!segments_) + return false; + offset_t segment_table_end = + base::checked_cast<offset_t>(source.begin() - image_.begin()); + + // Check string section -- even though we've stopped using them. + elf::Elf32_Half string_section_id = header_->e_shstrndx; + if (string_section_id >= sections_count_) + return false; + size_t section_names_size = sections_[string_section_id].sh_size; + if (section_names_size > 0) { + // If nonempty, then last byte of string section must be null. + const char* section_names = nullptr; + source = std::move( + BufferSource(image_).Skip(sections_[string_section_id].sh_offset)); + section_names = source.GetArray<char>(section_names_size); + if (!section_names || section_names[section_names_size - 1] != '\0') + return false; + } + + // Establish bound on encountered offsets. + offset_t offset_bound = std::max(section_table_end, segment_table_end); + + // Visits |segments_| to get estimate on |offset_bound|. + for (const typename Traits::Elf_Phdr* segment = segments_; + segment != segments_ + segments_count_; ++segment) { + // |image_.covers()| is a sufficient check except when size_t is 32 bit and + // parsing ELF64. In such cases a value-in-range check is needed on the + // segment. This fixes crbug/1035603. + offset_t segment_end; + base::CheckedNumeric<offset_t> checked_segment_end = segment->p_offset; + checked_segment_end += segment->p_filesz; + if (!checked_segment_end.AssignIfValid(&segment_end) || + !image_.covers({static_cast<size_t>(segment->p_offset), + static_cast<size_t>(segment->p_filesz)})) { + return false; + } + offset_bound = std::max(offset_bound, segment_end); + } + + // Visit and validate each section; add address translation data to |units|. + std::vector<AddressTranslator::Unit> units; + units.reserve(sections_count_); + section_judgements_.reserve(sections_count_); + + for (int i = 0; i < sections_count_; ++i) { + const typename Traits::Elf_Shdr* section = §ions_[i]; + int judgement = JudgeSection<Traits>(image_.size(), section); + section_judgements_.push_back(judgement); + if ((judgement & SECTION_BIT_SAFE) == 0) + return false; + + uint32_t sh_size = base::checked_cast<uint32_t>(section->sh_size); + offset_t sh_offset = base::checked_cast<offset_t>(section->sh_offset); + rva_t sh_addr = base::checked_cast<rva_t>(section->sh_addr); + if ((judgement & SECTION_BIT_USEFUL_FOR_ADDRESS_TRANSLATOR) != 0) { + // Store mappings between RVA and offset. + units.push_back({sh_offset, sh_size, sh_addr, sh_size}); + } + if ((judgement & SECTION_BIT_USEFUL_FOR_OFFSET_BOUND) != 0) { + offset_t section_end = base::checked_cast<offset_t>(sh_offset + sh_size); + offset_bound = std::max(offset_bound, section_end); + } + } + + // Initialize |translator_| for offset-RVA translations. Any inconsistency + // (e.g., 2 offsets correspond to the same RVA) would invalidate the ELF file. + if (translator_.Initialize(std::move(units)) != AddressTranslator::kSuccess) + return false; + + DCHECK_LE(offset_bound, image_.size()); + image_.shrink(offset_bound); + return true; +} + +template <class TRAITS> +void DisassemblerElf<TRAITS>::ExtractInterestingSectionHeaders() { + DCHECK(reloc_section_dims_.empty()); + DCHECK(exec_headers_.empty()); + for (elf::Elf32_Half i = 0; i < sections_count_; ++i) { + const typename Traits::Elf_Shdr* section = sections_ + i; + if ((section_judgements_[i] & SECTION_BIT_MAYBE_USEFUL_FOR_POINTERS) != 0) { + if (IsRelocSection<Traits>(*section)) + reloc_section_dims_.emplace_back(*section); + else if (IsExecSection<Traits>(*section)) + exec_headers_.push_back(section); + } + } + auto comp = [](const typename Traits::Elf_Shdr* a, + const typename Traits::Elf_Shdr* b) { + return a->sh_offset < b->sh_offset; + }; + std::sort(reloc_section_dims_.begin(), reloc_section_dims_.end()); + std::sort(exec_headers_.begin(), exec_headers_.end(), comp); +} + +template <class TRAITS> +void DisassemblerElf<TRAITS>::GetAbs32FromRelocSections() { + constexpr int kAbs32Width = Traits::kVAWidth; + DCHECK(abs32_locations_.empty()); + + // Read reloc targets to get preliminary abs32 locations. + std::unique_ptr<ReferenceReader> relocs = MakeReadRelocs(0, offset_t(size())); + for (auto ref = relocs->GetNext(); ref.has_value(); ref = relocs->GetNext()) + abs32_locations_.push_back(ref->target); + + std::sort(abs32_locations_.begin(), abs32_locations_.end()); + + // Abs32 references must have targets translatable to offsets. Remove those + // that are unable to do so. + size_t num_untranslatable = + RemoveUntranslatableAbs32(image_, {Traits::kBitness, kElfImageBase}, + translator_, &abs32_locations_); + LOG_IF(WARNING, num_untranslatable) << "Removed " << num_untranslatable + << " untranslatable abs32 references."; + + // Abs32 reference bodies must not overlap. If found, simply remove them. + size_t num_overlapping = + RemoveOverlappingAbs32Locations(kAbs32Width, &abs32_locations_); + LOG_IF(WARNING, num_overlapping) + << "Removed " << num_overlapping + << " abs32 references with overlapping bodies."; + + abs32_locations_.shrink_to_fit(); +} + +template <class TRAITS> +void DisassemblerElf<TRAITS>::GetRel32FromCodeSections() { + for (const typename Traits::Elf_Shdr* section : exec_headers_) + ParseExecSection(*section); + PostProcessRel32(); +} + +template <class TRAITS> +void DisassemblerElf<TRAITS>::ParseSections() { + ExtractInterestingSectionHeaders(); + GetAbs32FromRelocSections(); + GetRel32FromCodeSections(); +} + +/******** DisassemblerElfIntel ********/ + +template <class TRAITS> +DisassemblerElfIntel<TRAITS>::DisassemblerElfIntel() = default; + +template <class TRAITS> +DisassemblerElfIntel<TRAITS>::~DisassemblerElfIntel() = default; + +template <class TRAITS> +std::vector<ReferenceGroup> DisassemblerElfIntel<TRAITS>::MakeReferenceGroups() + const { + return { + {ReferenceTypeTraits{sizeof(TRAITS::Elf_Rel::r_offset), TypeTag(kReloc), + PoolTag(kReloc)}, + &DisassemblerElfIntel<TRAITS>::MakeReadRelocs, + &DisassemblerElfIntel<TRAITS>::MakeWriteRelocs}, + {ReferenceTypeTraits{Traits::kVAWidth, TypeTag(kAbs32), PoolTag(kAbs32)}, + &DisassemblerElfIntel<TRAITS>::MakeReadAbs32, + &DisassemblerElfIntel<TRAITS>::MakeWriteAbs32}, + // N.B.: Rel32 |width| is 4 bytes, even for x64. + {ReferenceTypeTraits{4, TypeTag(kRel32), PoolTag(kRel32)}, + &DisassemblerElfIntel<TRAITS>::MakeReadRel32, + &DisassemblerElfIntel<TRAITS>::MakeWriteRel32}}; +} + +template <class TRAITS> +void DisassemblerElfIntel<TRAITS>::ParseExecSection( + const typename TRAITS::Elf_Shdr& section) { + constexpr int kAbs32Width = Traits::kVAWidth; + + // |this->| is needed to access protected members of templated base class. To + // reduce noise, use local references for these. + ConstBufferView& image_ = this->image_; + const AddressTranslator& translator_ = this->translator_; + auto& abs32_locations_ = this->abs32_locations_; + + // Range of values was ensured in ParseHeader(). + rva_t start_rva = base::checked_cast<rva_t>(section.sh_addr); + rva_t end_rva = base::checked_cast<rva_t>(start_rva + section.sh_size); + + AddressTranslator::RvaToOffsetCache target_rva_checker(translator_); + + ConstBufferView region(image_.begin() + section.sh_offset, section.sh_size); + Abs32GapFinder gap_finder(image_, region, abs32_locations_, kAbs32Width); + typename TRAITS::Rel32FinderUse rel_finder(image_, translator_); + // Iterate over gaps between abs32 references, to avoid collision. + while (gap_finder.FindNext()) { + rel_finder.SetRegion(gap_finder.GetGap()); + while (rel_finder.FindNext()) { + auto rel32 = rel_finder.GetRel32(); + if (target_rva_checker.IsValid(rel32.target_rva) && + (rel32.can_point_outside_section || + (start_rva <= rel32.target_rva && rel32.target_rva < end_rva))) { + rel_finder.Accept(); + rel32_locations_.push_back(rel32.location); + } + } + } +} + +template <class TRAITS> +void DisassemblerElfIntel<TRAITS>::PostProcessRel32() { + rel32_locations_.shrink_to_fit(); + std::sort(rel32_locations_.begin(), rel32_locations_.end()); +} + +template <class TRAITS> +std::unique_ptr<ReferenceReader> DisassemblerElfIntel<TRAITS>::MakeReadAbs32( + offset_t lo, + offset_t hi) { + // TODO(huangs): Don't use Abs32RvaExtractorWin32 here; use new class that + // caters to different ELF architectures. + Abs32RvaExtractorWin32 abs_rva_extractor( + this->image_, AbsoluteAddress(TRAITS::kBitness, kElfImageBase), + this->abs32_locations_, lo, hi); + return std::make_unique<Abs32ReaderWin32>(std::move(abs_rva_extractor), + this->translator_); +} + +template <class TRAITS> +std::unique_ptr<ReferenceWriter> DisassemblerElfIntel<TRAITS>::MakeWriteAbs32( + MutableBufferView image) { + return std::make_unique<Abs32WriterWin32>( + image, AbsoluteAddress(TRAITS::kBitness, kElfImageBase), + this->translator_); +} + +template <class TRAITS> +std::unique_ptr<ReferenceReader> DisassemblerElfIntel<TRAITS>::MakeReadRel32( + offset_t lo, + offset_t hi) { + return std::make_unique<Rel32ReaderX86>(this->image_, lo, hi, + &rel32_locations_, this->translator_); +} + +template <class TRAITS> +std::unique_ptr<ReferenceWriter> DisassemblerElfIntel<TRAITS>::MakeWriteRel32( + MutableBufferView image) { + return std::make_unique<Rel32WriterX86>(image, this->translator_); +} + +// Explicit instantiation for supported classes. +template class DisassemblerElfIntel<Elf32IntelTraits>; +template class DisassemblerElfIntel<Elf64IntelTraits>; +template bool DisassemblerElf<Elf32IntelTraits>::QuickDetect( + ConstBufferView image); +template bool DisassemblerElf<Elf64IntelTraits>::QuickDetect( + ConstBufferView image); + +/******** DisassemblerElfArm ********/ + +template <class Traits> +DisassemblerElfArm<Traits>::DisassemblerElfArm() = default; + +template <class Traits> +DisassemblerElfArm<Traits>::~DisassemblerElfArm() = default; + +template <class Traits> +bool DisassemblerElfArm<Traits>::IsTargetOffsetInExecSection( + offset_t offset) const { + // Executable sections can appear in large numbers in .o files and in + // pathological cases. Since this function may be called for each reference + // candidate, linear search may be too slow (so use binary search). + return IsTargetOffsetInElfSectionList(this->exec_headers_, offset); +} + +template <class Traits> +void DisassemblerElfArm<Traits>::ParseExecSection( + const typename Traits::Elf_Shdr& section) { + ConstBufferView& image_ = this->image_; + const AddressTranslator& translator_ = this->translator_; + auto& abs32_locations_ = this->abs32_locations_; + + ConstBufferView region(image_.begin() + section.sh_offset, section.sh_size); + Abs32GapFinder gap_finder(image_, region, abs32_locations_, Traits::kVAWidth); + std::unique_ptr<typename Traits::Rel32FinderUse> rel_finder = + MakeRel32Finder(section); + AddressTranslator::RvaToOffsetCache rva_to_offset(translator_); + while (gap_finder.FindNext()) { + rel_finder->SetRegion(gap_finder.GetGap()); + while (rel_finder->FindNext()) { + auto rel32 = rel_finder->GetRel32(); + offset_t target_offset = rva_to_offset.Convert(rel32.target_rva); + if (target_offset != kInvalidOffset) { + // For robustness, reject illegal offsets, which can arise from, e.g., + // misidentify ARM vs. THUMB2 mode, or even misidentifying data as code! + if (IsTargetOffsetInExecSection(target_offset)) { + rel_finder->Accept(); + rel32_locations_table_[rel32.type].push_back(rel32.location); + } + } + } + } +} + +template <class Traits> +void DisassemblerElfArm<Traits>::PostProcessRel32() { + for (int type = 0; type < AArch32Rel32Translator::NUM_ADDR_TYPE; ++type) { + std::sort(rel32_locations_table_[type].begin(), + rel32_locations_table_[type].end()); + rel32_locations_table_[type].shrink_to_fit(); + } +} + +template <class Traits> +std::unique_ptr<ReferenceReader> DisassemblerElfArm<Traits>::MakeReadAbs32( + offset_t lo, + offset_t hi) { + // TODO(huangs): Reconcile the use of Win32-specific classes in ARM code! + Abs32RvaExtractorWin32 abs_rva_extractor(this->image_, + AbsoluteAddress(Traits::kBitness, 0), + this->abs32_locations_, lo, hi); + return std::make_unique<Abs32ReaderWin32>(std::move(abs_rva_extractor), + this->translator_); +} + +template <class Traits> +std::unique_ptr<ReferenceWriter> DisassemblerElfArm<Traits>::MakeWriteAbs32( + MutableBufferView image) { + return std::make_unique<Abs32WriterWin32>( + image, AbsoluteAddress(Traits::kBitness, 0), this->translator_); +} + +/******** DisassemblerElfAArch32 ********/ + +DisassemblerElfAArch32::DisassemblerElfAArch32() = default; +DisassemblerElfAArch32::~DisassemblerElfAArch32() = default; + +std::vector<ReferenceGroup> DisassemblerElfAArch32::MakeReferenceGroups() + const { + return { + {ReferenceTypeTraits{sizeof(Traits::Elf_Rel::r_offset), + TypeTag(AArch32ReferenceType::kReloc), + PoolTag(ArmReferencePool::kPoolReloc)}, + &DisassemblerElfAArch32::MakeReadRelocs, + &DisassemblerElfAArch32::MakeWriteRelocs}, + {ReferenceTypeTraits{Traits::kVAWidth, + TypeTag(AArch32ReferenceType::kAbs32), + PoolTag(ArmReferencePool::kPoolAbs32)}, + &DisassemblerElfAArch32::MakeReadAbs32, + &DisassemblerElfAArch32::MakeWriteAbs32}, + {ReferenceTypeTraits{4, TypeTag(AArch32ReferenceType::kRel32_A24), + PoolTag(ArmReferencePool::kPoolRel32)}, + &DisassemblerElfAArch32::MakeReadRel32A24, + &DisassemblerElfAArch32::MakeWriteRel32A24}, + {ReferenceTypeTraits{2, TypeTag(AArch32ReferenceType::kRel32_T8), + PoolTag(ArmReferencePool::kPoolRel32)}, + &DisassemblerElfAArch32::MakeReadRel32T8, + &DisassemblerElfAArch32::MakeWriteRel32T8}, + {ReferenceTypeTraits{2, TypeTag(AArch32ReferenceType::kRel32_T11), + PoolTag(ArmReferencePool::kPoolRel32)}, + &DisassemblerElfAArch32::MakeReadRel32T11, + &DisassemblerElfAArch32::MakeWriteRel32T11}, + {ReferenceTypeTraits{4, TypeTag(AArch32ReferenceType::kRel32_T20), + PoolTag(ArmReferencePool::kPoolRel32)}, + &DisassemblerElfAArch32::MakeReadRel32T20, + &DisassemblerElfAArch32::MakeWriteRel32T20}, + {ReferenceTypeTraits{4, TypeTag(AArch32ReferenceType::kRel32_T24), + PoolTag(ArmReferencePool::kPoolRel32)}, + &DisassemblerElfAArch32::MakeReadRel32T24, + &DisassemblerElfAArch32::MakeWriteRel32T24}, + }; +} + +std::unique_ptr<DisassemblerElfAArch32::Traits::Rel32FinderUse> +DisassemblerElfAArch32::MakeRel32Finder( + const typename Traits::Elf_Shdr& section) { + return std::make_unique<Rel32FinderAArch32>(image_, translator_, + IsExecSectionThumb2(section)); +} + +bool DisassemblerElfAArch32::IsExecSectionThumb2( + const typename Traits::Elf_Shdr& section) const { + // ARM mode requires 4-byte alignment. + if (section.sh_addr % 4 != 0 || section.sh_size % 4 != 0) + return true; + const uint8_t* first = image_.begin() + section.sh_offset; + const uint8_t* end = first + section.sh_size; + // Each instruction in 32-bit ARM (little-endian) looks like + // ?? ?? ?? X?, + // where X specifies conditional execution. X = 0xE represents AL = "ALways + // execute", and tends to appear very often. We use this as our main indicator + // to discern 32-bit ARM mode from THUMB2 mode. + size_t num = 0; + size_t den = 0; + for (const uint8_t* cur = first; cur < end; cur += 4) { + // |cur[3]| is within bounds because |end - cur| is a multiple of 4. + uint8_t maybe_cond = cur[3] & 0xF0; + if (maybe_cond == 0xE0) + ++num; + ++den; + } + + if (den > 0) { + LOG(INFO) << "Section scan: " << num << " / " << den << " => " + << base::StringPrintf("%.2f", num * 100.0 / den) << "%"; + } + return num < den * kAArch32BitCondAlwaysDensityThreshold; +} + +std::unique_ptr<ReferenceReader> DisassemblerElfAArch32::MakeReadRel32A24( + offset_t lower, + offset_t upper) { + return std::make_unique< + Rel32ReaderArm<AArch32Rel32Translator::AddrTraits_A24>>( + translator_, image_, + rel32_locations_table_[AArch32Rel32Translator::ADDR_A24], lower, upper); +} + +std::unique_ptr<ReferenceWriter> DisassemblerElfAArch32::MakeWriteRel32A24( + MutableBufferView image) { + return std::make_unique< + Rel32WriterArm<AArch32Rel32Translator::AddrTraits_A24>>(translator_, + image); +} + +std::unique_ptr<ReferenceReader> DisassemblerElfAArch32::MakeReadRel32T8( + offset_t lower, + offset_t upper) { + return std::make_unique< + Rel32ReaderArm<AArch32Rel32Translator::AddrTraits_T8>>( + translator_, image_, + rel32_locations_table_[AArch32Rel32Translator::ADDR_T8], lower, upper); +} + +std::unique_ptr<ReferenceWriter> DisassemblerElfAArch32::MakeWriteRel32T8( + MutableBufferView image) { + return std::make_unique< + Rel32WriterArm<AArch32Rel32Translator::AddrTraits_T8>>(translator_, + image); +} + +std::unique_ptr<ReferenceReader> DisassemblerElfAArch32::MakeReadRel32T11( + offset_t lower, + offset_t upper) { + return std::make_unique< + Rel32ReaderArm<AArch32Rel32Translator::AddrTraits_T11>>( + translator_, image_, + rel32_locations_table_[AArch32Rel32Translator::ADDR_T11], lower, upper); +} + +std::unique_ptr<ReferenceWriter> DisassemblerElfAArch32::MakeWriteRel32T11( + MutableBufferView image) { + return std::make_unique< + Rel32WriterArm<AArch32Rel32Translator::AddrTraits_T11>>(translator_, + image); +} + +std::unique_ptr<ReferenceReader> DisassemblerElfAArch32::MakeReadRel32T20( + offset_t lower, + offset_t upper) { + return std::make_unique< + Rel32ReaderArm<AArch32Rel32Translator::AddrTraits_T20>>( + translator_, image_, + rel32_locations_table_[AArch32Rel32Translator::ADDR_T20], lower, upper); +} + +std::unique_ptr<ReferenceWriter> DisassemblerElfAArch32::MakeWriteRel32T20( + MutableBufferView image) { + return std::make_unique< + Rel32WriterArm<AArch32Rel32Translator::AddrTraits_T20>>(translator_, + image); +} + +std::unique_ptr<ReferenceReader> DisassemblerElfAArch32::MakeReadRel32T24( + offset_t lower, + offset_t upper) { + return std::make_unique< + Rel32ReaderArm<AArch32Rel32Translator::AddrTraits_T24>>( + translator_, image_, + rel32_locations_table_[AArch32Rel32Translator::ADDR_T24], lower, upper); +} + +std::unique_ptr<ReferenceWriter> DisassemblerElfAArch32::MakeWriteRel32T24( + MutableBufferView image) { + return std::make_unique< + Rel32WriterArm<AArch32Rel32Translator::AddrTraits_T24>>(translator_, + image); +} + +/******** DisassemblerElfAArch64 ********/ + +DisassemblerElfAArch64::DisassemblerElfAArch64() = default; + +DisassemblerElfAArch64::~DisassemblerElfAArch64() = default; + +std::vector<ReferenceGroup> DisassemblerElfAArch64::MakeReferenceGroups() + const { + return { + {ReferenceTypeTraits{sizeof(Traits::Elf_Rel::r_offset), + TypeTag(AArch64ReferenceType::kReloc), + PoolTag(ArmReferencePool::kPoolReloc)}, + &DisassemblerElfAArch64::MakeReadRelocs, + &DisassemblerElfAArch64::MakeWriteRelocs}, + {ReferenceTypeTraits{Traits::kVAWidth, + TypeTag(AArch64ReferenceType::kAbs32), + PoolTag(ArmReferencePool::kPoolAbs32)}, + &DisassemblerElfAArch64::MakeReadAbs32, + &DisassemblerElfAArch64::MakeWriteAbs32}, + {ReferenceTypeTraits{4, TypeTag(AArch64ReferenceType::kRel32_Immd14), + PoolTag(ArmReferencePool::kPoolRel32)}, + &DisassemblerElfAArch64::MakeReadRel32Immd14, + &DisassemblerElfAArch64::MakeWriteRel32Immd14}, + {ReferenceTypeTraits{4, TypeTag(AArch64ReferenceType::kRel32_Immd19), + PoolTag(ArmReferencePool::kPoolRel32)}, + &DisassemblerElfAArch64::MakeReadRel32Immd19, + &DisassemblerElfAArch64::MakeWriteRel32Immd19}, + {ReferenceTypeTraits{4, TypeTag(AArch64ReferenceType::kRel32_Immd26), + PoolTag(ArmReferencePool::kPoolRel32)}, + &DisassemblerElfAArch64::MakeReadRel32Immd26, + &DisassemblerElfAArch64::MakeWriteRel32Immd26}, + }; +} + +std::unique_ptr<DisassemblerElfAArch64::Traits::Rel32FinderUse> +DisassemblerElfAArch64::MakeRel32Finder( + const typename Traits::Elf_Shdr& section) { + return std::make_unique<Rel32FinderAArch64>(image_, translator_); +} + +std::unique_ptr<ReferenceReader> DisassemblerElfAArch64::MakeReadRel32Immd14( + offset_t lower, + offset_t upper) { + return std::make_unique< + Rel32ReaderArm<AArch64Rel32Translator::AddrTraits_Immd14>>( + translator_, this->image_, + rel32_locations_table_[AArch64Rel32Translator::ADDR_IMMD14], lower, + upper); +} + +std::unique_ptr<ReferenceWriter> DisassemblerElfAArch64::MakeWriteRel32Immd14( + MutableBufferView image) { + return std::make_unique< + Rel32WriterArm<AArch64Rel32Translator::AddrTraits_Immd14>>(translator_, + image); +} + +std::unique_ptr<ReferenceReader> DisassemblerElfAArch64::MakeReadRel32Immd19( + offset_t lower, + offset_t upper) { + return std::make_unique< + Rel32ReaderArm<AArch64Rel32Translator::AddrTraits_Immd19>>( + translator_, this->image_, + rel32_locations_table_[AArch64Rel32Translator::ADDR_IMMD19], lower, + upper); +} + +std::unique_ptr<ReferenceWriter> DisassemblerElfAArch64::MakeWriteRel32Immd19( + MutableBufferView image) { + return std::make_unique< + Rel32WriterArm<AArch64Rel32Translator::AddrTraits_Immd19>>(translator_, + image); +} + +std::unique_ptr<ReferenceReader> DisassemblerElfAArch64::MakeReadRel32Immd26( + offset_t lower, + offset_t upper) { + return std::make_unique< + Rel32ReaderArm<AArch64Rel32Translator::AddrTraits_Immd26>>( + translator_, this->image_, + rel32_locations_table_[AArch64Rel32Translator::ADDR_IMMD26], lower, + upper); +} + +std::unique_ptr<ReferenceWriter> DisassemblerElfAArch64::MakeWriteRel32Immd26( + MutableBufferView image) { + return std::make_unique< + Rel32WriterArm<AArch64Rel32Translator::AddrTraits_Immd26>>(translator_, + image); +} + +// Explicit instantiation for supported classes. +template class DisassemblerElfArm<ElfAArch32Traits>; +template class DisassemblerElfArm<ElfAArch64Traits>; +template bool DisassemblerElf<ElfAArch32Traits>::QuickDetect( + ConstBufferView image); +template bool DisassemblerElf<ElfAArch64Traits>::QuickDetect( + ConstBufferView image); + +} // namespace zucchini diff --git a/disassembler_elf.h b/disassembler_elf.h new file mode 100644 index 0000000..0bd11a6 --- /dev/null +++ b/disassembler_elf.h @@ -0,0 +1,383 @@ +// Copyright 2018 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_DISASSEMBLER_ELF_H_ +#define COMPONENTS_ZUCCHINI_DISASSEMBLER_ELF_H_ + +#include <stdint.h> + +#include <algorithm> +#include <deque> +#include <memory> +#include <string> +#include <vector> + +#include "components/zucchini/address_translator.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/disassembler.h" +#include "components/zucchini/image_utils.h" +#include "components/zucchini/rel32_finder.h" +#include "components/zucchini/rel32_utils.h" +#include "components/zucchini/reloc_elf.h" +#include "components/zucchini/type_elf.h" + +namespace zucchini { + +struct ArmReferencePool { + enum : uint8_t { + kPoolReloc, + kPoolAbs32, + kPoolRel32, + }; +}; + +struct AArch32ReferenceType { + enum : uint8_t { + kReloc, // kPoolReloc + + kAbs32, // kPoolAbs32 + + kRel32_A24, // kPoolRel32 + kRel32_T8, + kRel32_T11, + kRel32_T20, + kRel32_T24, + + kTypeCount + }; +}; + +struct AArch64ReferenceType { + enum : uint8_t { + kReloc, // kPoolReloc + + kAbs32, // kPoolAbs32 + + kRel32_Immd14, // kPoolRel32 + kRel32_Immd19, + kRel32_Immd26, + + kTypeCount + }; +}; + +struct Elf32Traits { + static constexpr Bitness kBitness = kBit32; + static constexpr elf::FileClass kIdentificationClass = elf::ELFCLASS32; + using Elf_Shdr = elf::Elf32_Shdr; + using Elf_Phdr = elf::Elf32_Phdr; + using Elf_Ehdr = elf::Elf32_Ehdr; + using Elf_Rel = elf::Elf32_Rel; + using Elf_Rela = elf::Elf32_Rela; +}; + +// Architecture-specific definitions. + +struct Elf32IntelTraits : public Elf32Traits { + static constexpr ExecutableType kExeType = kExeTypeElfX86; + static const char kExeTypeString[]; + static constexpr elf::MachineArchitecture kMachineValue = elf::EM_386; + static constexpr uint32_t kRelType = elf::R_386_RELATIVE; + enum : uint32_t { kVAWidth = 4 }; + using Rel32FinderUse = Rel32FinderX86; +}; + +struct ElfAArch32Traits : public Elf32Traits { + static constexpr ExecutableType kExeType = kExeTypeElfAArch32; + static const char kExeTypeString[]; + static constexpr elf::MachineArchitecture kMachineValue = elf::EM_ARM; + static constexpr uint32_t kRelType = elf::R_ARM_RELATIVE; + enum : uint32_t { kVAWidth = 4 }; + using ArmReferenceType = AArch32ReferenceType; + using Rel32FinderUse = Rel32FinderAArch32; +}; + +struct Elf64Traits { + static constexpr Bitness kBitness = kBit64; + static constexpr elf::FileClass kIdentificationClass = elf::ELFCLASS64; + using Elf_Shdr = elf::Elf64_Shdr; + using Elf_Phdr = elf::Elf64_Phdr; + using Elf_Ehdr = elf::Elf64_Ehdr; + using Elf_Rel = elf::Elf64_Rel; + using Elf_Rela = elf::Elf64_Rela; +}; + +// Architecture-specific definitions. +struct Elf64IntelTraits : public Elf64Traits { + static constexpr ExecutableType kExeType = kExeTypeElfX64; + static const char kExeTypeString[]; + static constexpr elf::MachineArchitecture kMachineValue = elf::EM_X86_64; + static constexpr uint32_t kRelType = elf::R_X86_64_RELATIVE; + enum : uint32_t { kVAWidth = 8 }; + using Rel32FinderUse = Rel32FinderX64; +}; + +struct ElfAArch64Traits : public Elf64Traits { + static constexpr ExecutableType kExeType = kExeTypeElfAArch64; + static const char kExeTypeString[]; + static constexpr elf::MachineArchitecture kMachineValue = elf::EM_AARCH64; + // TODO(huangs): See if R_AARCH64_GLOB_DAT and R_AARCH64_JUMP_SLOT should be + // used. + static constexpr uint32_t kRelType = elf::R_AARCH64_RELATIVE; + enum : uint32_t { kVAWidth = 8 }; + using ArmReferenceType = AArch64ReferenceType; + using Rel32FinderUse = Rel32FinderAArch64; +}; + +// Decides whether target |offset| is covered by a section in |sorted_headers|. +template <class ELF_SHDR> +bool IsTargetOffsetInElfSectionList( + const std::vector<const ELF_SHDR*>& sorted_headers, + offset_t offset) { + // Use binary search to search in a list of intervals, in a fashion similar to + // AddressTranslator::OffsetToUnit(). + auto comp = [](offset_t offset, const ELF_SHDR* header) -> bool { + return offset < header->sh_offset; + }; + auto it = std::upper_bound(sorted_headers.begin(), sorted_headers.end(), + offset, comp); + if (it == sorted_headers.begin()) + return false; + --it; + // Just check offset without worrying about width, since this is a target. + // Not using RangeCovers() because |sh_offset| and |sh_size| can be 64-bit. + return offset >= (*it)->sh_offset && + offset - (*it)->sh_offset < (*it)->sh_size; +} + +// Disassembler for ELF. +template <class TRAITS> +class DisassemblerElf : public Disassembler { + public: + using Traits = TRAITS; + // Applies quick checks to determine whether |image| *may* point to the start + // of an executable. Returns true iff the check passes. + static bool QuickDetect(ConstBufferView image); + + DisassemblerElf(const DisassemblerElf&) = delete; + const DisassemblerElf& operator=(const DisassemblerElf&) = delete; + ~DisassemblerElf() override; + + // Disassembler: + ExecutableType GetExeType() const override; + std::string GetExeTypeString() const override; + std::vector<ReferenceGroup> MakeReferenceGroups() const override = 0; + + // Read/Write functions that are common among different architectures. + std::unique_ptr<ReferenceReader> MakeReadRelocs(offset_t lo, offset_t hi); + std::unique_ptr<ReferenceWriter> MakeWriteRelocs(MutableBufferView image); + + const AddressTranslator& translator() const { return translator_; } + + protected: + friend Disassembler; + + DisassemblerElf(); + + bool Parse(ConstBufferView image) override; + + // Returns the supported Elf_Ehdr::e_machine enum. + static constexpr elf::MachineArchitecture supported_architecture() { + return Traits::kMachineValue; + } + + // Returns the type to look for in the reloc section. + static constexpr uint32_t supported_relocation_type() { + return Traits::kRelType; + } + + // Performs architecture-specific parsing of an executable section, to extract + // rel32 references. + virtual void ParseExecSection(const typename Traits::Elf_Shdr& section) = 0; + + // Processes rel32 data after they are extracted from executable sections. + virtual void PostProcessRel32() = 0; + + // Parses ELF header and section headers, and performs basic validation. + // Returns whether parsing was successful. + bool ParseHeader(); + + // Extracts and stores section headers that we need. + void ExtractInterestingSectionHeaders(); + + // Parsing functions that extract references from various sections. + void GetAbs32FromRelocSections(); + void GetRel32FromCodeSections(); + void ParseSections(); + + // Main ELF header. + const typename Traits::Elf_Ehdr* header_ = nullptr; + + // Section header table, ordered by section id. + elf::Elf32_Half sections_count_ = 0; + const typename Traits::Elf_Shdr* sections_ = nullptr; + + // Program header table. + elf::Elf32_Half segments_count_ = 0; + const typename Traits::Elf_Phdr* segments_ = nullptr; + + // Bit fields to store the role each section may play. + std::vector<int> section_judgements_; + + // Translator between offsets and RVAs. + AddressTranslator translator_; + + // Identity translator for abs32 translation. + AddressTranslator identity_translator_; + + // Extracted relocation section dimensions data, sorted by file offsets. + std::vector<SectionDimensionsElf> reloc_section_dims_; + + // Headers of executable sections, sorted by file offsets of the data each + // header points to. + std::vector<const typename Traits::Elf_Shdr*> exec_headers_; + + // Sorted file offsets of abs32 locations. + std::vector<offset_t> abs32_locations_; +}; + +// Disassembler for ELF with Intel architectures. +template <class TRAITS> +class DisassemblerElfIntel : public DisassemblerElf<TRAITS> { + public: + using Traits = TRAITS; + enum ReferenceType : uint8_t { kReloc, kAbs32, kRel32, kTypeCount }; + + DisassemblerElfIntel(); + DisassemblerElfIntel(const DisassemblerElfIntel&) = delete; + const DisassemblerElfIntel& operator=(const DisassemblerElfIntel&) = delete; + ~DisassemblerElfIntel() override; + + // Disassembler: + std::vector<ReferenceGroup> MakeReferenceGroups() const override; + + // DisassemblerElf: + void ParseExecSection(const typename Traits::Elf_Shdr& section) override; + void PostProcessRel32() override; + + // Specialized Read/Write functions. + std::unique_ptr<ReferenceReader> MakeReadAbs32(offset_t lo, offset_t hi); + std::unique_ptr<ReferenceWriter> MakeWriteAbs32(MutableBufferView image); + std::unique_ptr<ReferenceReader> MakeReadRel32(offset_t lo, offset_t hi); + std::unique_ptr<ReferenceWriter> MakeWriteRel32(MutableBufferView image); + + private: + // Sorted file offsets of rel32 locations. + // Using std::deque to reduce peak memory footprint. + std::deque<offset_t> rel32_locations_; +}; + +using DisassemblerElfX86 = DisassemblerElfIntel<Elf32IntelTraits>; +using DisassemblerElfX64 = DisassemblerElfIntel<Elf64IntelTraits>; + +// Disassembler for ELF with ARM architectures. +template <class TRAITS> +class DisassemblerElfArm : public DisassemblerElf<TRAITS> { + public: + using Traits = TRAITS; + DisassemblerElfArm(); + DisassemblerElfArm(const DisassemblerElfArm&) = delete; + const DisassemblerElfArm& operator=(const DisassemblerElfArm&) = delete; + ~DisassemblerElfArm() override; + + // Determines whether target |offset| is in an executable section. + bool IsTargetOffsetInExecSection(offset_t offset) const; + + // Creates an architecture-specific Rel32Finder for ParseExecSection. + virtual std::unique_ptr<typename Traits::Rel32FinderUse> MakeRel32Finder( + const typename Traits::Elf_Shdr& section) = 0; + + // DisassemblerElf: + void ParseExecSection(const typename Traits::Elf_Shdr& section) override; + void PostProcessRel32() override; + + // Specialized Read/Write functions. + std::unique_ptr<ReferenceReader> MakeReadAbs32(offset_t lo, offset_t hi); + std::unique_ptr<ReferenceWriter> MakeWriteAbs32(MutableBufferView image); + + protected: + // Sorted file offsets of rel32 locations for each rel32 address type. + std::deque<offset_t> + rel32_locations_table_[Traits::ArmReferenceType::kTypeCount]; +}; + +// Disassembler for ELF with AArch32 (AKA ARM32). +class DisassemblerElfAArch32 : public DisassemblerElfArm<ElfAArch32Traits> { + public: + DisassemblerElfAArch32(); + DisassemblerElfAArch32(const DisassemblerElfAArch32&) = delete; + const DisassemblerElfAArch32& operator=(const DisassemblerElfAArch32&) = + delete; + ~DisassemblerElfAArch32() override; + + // Disassembler: + std::vector<ReferenceGroup> MakeReferenceGroups() const override; + + // DisassemblerElfArm: + std::unique_ptr<typename Traits::Rel32FinderUse> MakeRel32Finder( + const typename Traits::Elf_Shdr& section) override; + + // Under the naive assumption that an executable section is entirely ARM mode + // or THUMB2 mode, this function implements heuristics to distinguish between + // the two. Returns true if section is THUMB2 mode; otherwise return false. + bool IsExecSectionThumb2(const typename Traits::Elf_Shdr& section) const; + + // Specialized Read/Write functions for different rel32 address types. + std::unique_ptr<ReferenceReader> MakeReadRel32A24(offset_t lower, + offset_t upper); + std::unique_ptr<ReferenceWriter> MakeWriteRel32A24(MutableBufferView image); + + std::unique_ptr<ReferenceReader> MakeReadRel32T8(offset_t lower, + offset_t upper); + std::unique_ptr<ReferenceWriter> MakeWriteRel32T8(MutableBufferView image); + + std::unique_ptr<ReferenceReader> MakeReadRel32T11(offset_t lower, + offset_t upper); + std::unique_ptr<ReferenceWriter> MakeWriteRel32T11(MutableBufferView image); + + std::unique_ptr<ReferenceReader> MakeReadRel32T20(offset_t lower, + offset_t upper); + std::unique_ptr<ReferenceWriter> MakeWriteRel32T20(MutableBufferView image); + + std::unique_ptr<ReferenceReader> MakeReadRel32T24(offset_t lower, + offset_t upper); + std::unique_ptr<ReferenceWriter> MakeWriteRel32T24(MutableBufferView image); +}; + +// Disassembler for ELF with AArch64 (AKA ARM64). +class DisassemblerElfAArch64 : public DisassemblerElfArm<ElfAArch64Traits> { + public: + DisassemblerElfAArch64(); + DisassemblerElfAArch64(const DisassemblerElfAArch64&) = delete; + const DisassemblerElfAArch64& operator=(const DisassemblerElfAArch64&) = + delete; + ~DisassemblerElfAArch64() override; + + // Disassembler: + std::vector<ReferenceGroup> MakeReferenceGroups() const override; + + // DisassemblerElfArm: + std::unique_ptr<typename Traits::Rel32FinderUse> MakeRel32Finder( + const typename Traits::Elf_Shdr& section) override; + + // Specialized Read/Write functions for different rel32 address types. + std::unique_ptr<ReferenceReader> MakeReadRel32Immd14(offset_t lower, + offset_t upper); + std::unique_ptr<ReferenceWriter> MakeWriteRel32Immd14( + MutableBufferView image); + + std::unique_ptr<ReferenceReader> MakeReadRel32Immd19(offset_t lower, + offset_t upper); + std::unique_ptr<ReferenceWriter> MakeWriteRel32Immd19( + MutableBufferView image); + + std::unique_ptr<ReferenceReader> MakeReadRel32Immd26(offset_t lower, + offset_t upper); + std::unique_ptr<ReferenceWriter> MakeWriteRel32Immd26( + MutableBufferView image); +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_DISASSEMBLER_ELF_H_ diff --git a/disassembler_elf_unittest.cc b/disassembler_elf_unittest.cc new file mode 100644 index 0000000..d98eb50 --- /dev/null +++ b/disassembler_elf_unittest.cc @@ -0,0 +1,179 @@ +// Copyright 2018 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/disassembler_elf.h" + +#include <stddef.h> +#include <stdint.h> + +#include <algorithm> +#include <random> +#include <string> +#include <vector> + +#include "components/zucchini/test_utils.h" +#include "components/zucchini/type_elf.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +TEST(DisassemblerElfTest, IsTargetOffsetInElfSectionList) { + // Minimal required fields for IsTargetOffsetInElfSectionList(). + struct FakeElfShdr { + offset_t sh_offset; + offset_t sh_size; + }; + + // Calls IsTargetOffsetInElfSectionList() for fixed |sorted_list|, and sweeps + // offsets in [lo, hi). Renders results into a string consisting of '.' (not + // in list) and '*' (in list). + auto test = [&](const std::vector<FakeElfShdr>& sorted_list, offset_t lo, + offset_t hi) -> std::string { + // Ensure |sorted_list| is indeed sorted, without overlaps. + for (size_t i = 1; i < sorted_list.size(); ++i) { + if (sorted_list[i].sh_offset < + sorted_list[i - 1].sh_offset + sorted_list[i - 1].sh_size) { + return "(Bad input)"; + } + } + // The interface to IsTargetOffsetInElfSectionList() takes a list of + // pointers (since data can be casted from images), so make the conversion. + std::vector<const FakeElfShdr*> ptr_list; + for (const FakeElfShdr& header : sorted_list) + ptr_list.push_back(&header); + std::string result; + for (offset_t offset = lo; offset < hi; ++offset) { + result += IsTargetOffsetInElfSectionList(ptr_list, offset) ? '*' : '.'; + } + return result; + }; + + EXPECT_EQ("..........", test(std::vector<FakeElfShdr>(), 0, 10)); + EXPECT_EQ("*.........", test({{0, 1}}, 0, 10)); + EXPECT_EQ("...*......", test({{3, 1}}, 0, 10)); + EXPECT_EQ("...****...", test({{3, 4}}, 0, 10)); + EXPECT_EQ("...****...", test({{10003, 4}}, 10000, 10010)); + EXPECT_EQ("...********...", test({{3, 4}, {7, 4}}, 0, 14)); + EXPECT_EQ("...****.****...", test({{3, 4}, {8, 4}}, 0, 15)); + EXPECT_EQ("...****..****...", test({{3, 4}, {9, 4}}, 0, 16)); + EXPECT_EQ("..****...*****..", test({{2, 4}, {9, 5}}, 0, 16)); + EXPECT_EQ("...***......***..", test({{3, 3}, {12, 3}}, 0, 17)); + + // Many small ranges. + EXPECT_EQ("..**.**.*.*...*.*.**...**.*.**.*..", // (Comment strut). + test({{2, 2}, + {5, 2}, + {8, 1}, + {10, 1}, + {14, 1}, + {16, 1}, + {18, 2}, + {23, 2}, + {26, 1}, + {28, 2}, + {31, 1}}, + 0, 34)); + EXPECT_EQ("..*****.****.***.**.*..", + test({{137, 5}, {143, 4}, {148, 3}, {152, 2}, {155, 1}}, 135, 158)); + // Consecutive. + EXPECT_EQ("..***************..", + test({{137, 5}, {142, 4}, {146, 3}, {149, 2}, {151, 1}}, 135, 154)); + // Hover around 32 (power of 2). + EXPECT_EQ("..*******************************..", + test({{2002, 31}}, 2000, 2035)); + EXPECT_EQ("..********************************..", + test({{5002, 32}}, 5000, 5036)); + EXPECT_EQ("..*********************************..", + test({{8002, 33}}, 8000, 8037)); + // Consecutive + small gap. + EXPECT_EQ( + "..*****************.***********..", + test({{9876543, 8}, {9876551, 9}, {9876561, 11}}, 9876541, 9876574)); + // Sample internal of big range. + EXPECT_EQ("**************************************************", + test({{100, 1000000}}, 5000, 5050)); + // Sample boundaries of big range. + EXPECT_EQ(".........................*************************", + test({{100, 1000000}}, 75, 125)); + EXPECT_EQ("*************************.........................", + test({{100, 1000000}}, 1000075, 1000125)); + // 1E9 is still good. + EXPECT_EQ(".....*.....", test({{1000000000, 1}}, 999999995, 1000000006)); +} + +TEST(DisassemblerElfTest, QuickDetect) { + std::vector<uint8_t> image_data; + ConstBufferView image; + + // Empty. + EXPECT_FALSE(DisassemblerElfX86::QuickDetect(image)); + EXPECT_FALSE(DisassemblerElfX64::QuickDetect(image)); + + // Unrelated. + image_data = ParseHexString("DE AD"); + image = {image_data.data(), image_data.size()}; + EXPECT_FALSE(DisassemblerElfX86::QuickDetect(image)); + EXPECT_FALSE(DisassemblerElfX64::QuickDetect(image)); + + // Only Magic. + image_data = ParseHexString("7F 45 4C 46"); + image = {image_data.data(), image_data.size()}; + EXPECT_FALSE(DisassemblerElfX86::QuickDetect(image)); + EXPECT_FALSE(DisassemblerElfX64::QuickDetect(image)); + + // Only identification. + image_data = + ParseHexString("7F 45 4C 46 01 01 01 00 00 00 00 00 00 00 00 00"); + image = {image_data.data(), image_data.size()}; + EXPECT_FALSE(DisassemblerElfX86::QuickDetect(image)); + EXPECT_FALSE(DisassemblerElfX64::QuickDetect(image)); + + // Large enough, filled with zeros. + image_data.assign(sizeof(elf::Elf32_Ehdr), 0); + image = {image_data.data(), image_data.size()}; + EXPECT_FALSE(DisassemblerElfX86::QuickDetect(image)); + EXPECT_FALSE(DisassemblerElfX64::QuickDetect(image)); + + // Random. + std::random_device rd; + std::mt19937 gen{rd()}; + std::generate(image_data.begin(), image_data.end(), gen); + image = {image_data.data(), image_data.size()}; + EXPECT_FALSE(DisassemblerElfX86::QuickDetect(image)); + EXPECT_FALSE(DisassemblerElfX64::QuickDetect(image)); + + // Typical x86 elf header. + { + elf::Elf32_Ehdr header = {}; + auto e_ident = + ParseHexString("7F 45 4C 46 01 01 01 00 00 00 00 00 00 00 00 00"); + std::copy(e_ident.begin(), e_ident.end(), header.e_ident); + header.e_type = elf::ET_EXEC; + header.e_machine = elf::EM_386; + header.e_version = 1; + header.e_shentsize = sizeof(elf::Elf32_Shdr); + ConstBufferView image(reinterpret_cast<const uint8_t*>(&header), + sizeof(header)); + EXPECT_TRUE(DisassemblerElfX86::QuickDetect(image)); + EXPECT_FALSE(DisassemblerElfX64::QuickDetect(image)); + } + + // Typical x64 elf header. + { + elf::Elf64_Ehdr header = {}; + auto e_ident = + ParseHexString("7F 45 4C 46 02 01 01 00 00 00 00 00 00 00 00 00"); + std::copy(e_ident.begin(), e_ident.end(), header.e_ident); + header.e_type = elf::ET_EXEC; + header.e_machine = elf::EM_X86_64; + header.e_version = 1; + header.e_shentsize = sizeof(elf::Elf64_Shdr); + ConstBufferView image(reinterpret_cast<const uint8_t*>(&header), + sizeof(header)); + EXPECT_FALSE(DisassemblerElfX86::QuickDetect(image)); + EXPECT_TRUE(DisassemblerElfX64::QuickDetect(image)); + } +} + +} // namespace zucchini diff --git a/disassembler_no_op.cc b/disassembler_no_op.cc new file mode 100644 index 0000000..b17979c --- /dev/null +++ b/disassembler_no_op.cc @@ -0,0 +1,31 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/disassembler_no_op.h" + +namespace zucchini { + +// |num_equivalence_iterations_| = 1 since no pointers are present. +DisassemblerNoOp::DisassemblerNoOp() : Disassembler(1) {} + +DisassemblerNoOp::~DisassemblerNoOp() = default; + +ExecutableType DisassemblerNoOp::GetExeType() const { + return kExeTypeNoOp; +} + +std::string DisassemblerNoOp::GetExeTypeString() const { + return "(Unknown)"; +} + +std::vector<ReferenceGroup> DisassemblerNoOp::MakeReferenceGroups() const { + return std::vector<ReferenceGroup>(); +} + +bool DisassemblerNoOp::Parse(ConstBufferView image) { + image_ = image; + return true; +} + +} // namespace zucchini diff --git a/disassembler_no_op.h b/disassembler_no_op.h new file mode 100644 index 0000000..ef10651 --- /dev/null +++ b/disassembler_no_op.h @@ -0,0 +1,39 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_DISASSEMBLER_NO_OP_H_ +#define COMPONENTS_ZUCCHINI_DISASSEMBLER_NO_OP_H_ + +#include <memory> +#include <string> +#include <vector> + +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/disassembler.h" +#include "components/zucchini/image_utils.h" + +namespace zucchini { + +// This disassembler works on any file and does not look for reference. +class DisassemblerNoOp : public Disassembler { + public: + DisassemblerNoOp(); + DisassemblerNoOp(const DisassemblerNoOp&) = delete; + const DisassemblerNoOp& operator=(const DisassemblerNoOp&) = delete; + ~DisassemblerNoOp() override; + + // Disassembler: + ExecutableType GetExeType() const override; + std::string GetExeTypeString() const override; + std::vector<ReferenceGroup> MakeReferenceGroups() const override; + + private: + friend Disassembler; + + bool Parse(ConstBufferView image) override; +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_DISASSEMBLER_NO_OP_H_ diff --git a/disassembler_win32.cc b/disassembler_win32.cc new file mode 100644 index 0000000..37e43e5 --- /dev/null +++ b/disassembler_win32.cc @@ -0,0 +1,410 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/disassembler_win32.h" + +#include <stddef.h> + +#include <algorithm> + +#include "base/logging.h" +#include "base/numerics/safe_conversions.h" +#include "components/zucchini/abs32_utils.h" +#include "components/zucchini/algorithm.h" +#include "components/zucchini/buffer_source.h" +#include "components/zucchini/rel32_finder.h" +#include "components/zucchini/rel32_utils.h" +#include "components/zucchini/reloc_win32.h" + +namespace zucchini { + +namespace { + +// Decides whether |image| points to a Win32 PE file. If this is a possibility, +// assigns |source| to enable further parsing, and returns true. Otherwise +// leaves |source| at an undefined state and returns false. +bool ReadWin32Header(ConstBufferView image, BufferSource* source) { + *source = BufferSource(image); + + // Check "MZ" magic of DOS header. + if (!source->CheckNextBytes({'M', 'Z'})) + return false; + + const auto* dos_header = source->GetPointer<pe::ImageDOSHeader>(); + // For |e_lfanew|, reject on misalignment or overlap with DOS header. + if (!dos_header || (dos_header->e_lfanew & 7) != 0 || + dos_header->e_lfanew < 0U + sizeof(pe::ImageDOSHeader)) { + return false; + } + // Offset to PE header is in DOS header. + *source = std::move(BufferSource(image).Skip(dos_header->e_lfanew)); + // Check 'PE\0\0' magic from PE header. + if (!source->ConsumeBytes({'P', 'E', 0, 0})) + return false; + + return true; +} + +template <class TRAITS> +const pe::ImageDataDirectory* ReadDataDirectory( + const typename TRAITS::ImageOptionalHeader* optional_header, + size_t index) { + if (index >= optional_header->number_of_rva_and_sizes) + return nullptr; + return &optional_header->data_directory[index]; +} + +// Decides whether |section| (assumed value) is a section that contains code. +template <class TRAITS> +bool IsWin32CodeSection(const pe::ImageSectionHeader& section) { + return (section.characteristics & kCodeCharacteristics) == + kCodeCharacteristics; +} + +} // namespace + +/******** Win32X86Traits ********/ + +// static +constexpr Bitness Win32X86Traits::kBitness; +constexpr ExecutableType Win32X86Traits::kExeType; +const char Win32X86Traits::kExeTypeString[] = "Windows PE x86"; + +/******** Win32X64Traits ********/ + +// static +constexpr Bitness Win32X64Traits::kBitness; +constexpr ExecutableType Win32X64Traits::kExeType; +const char Win32X64Traits::kExeTypeString[] = "Windows PE x64"; + +/******** DisassemblerWin32 ********/ + +// static. +template <class TRAITS> +bool DisassemblerWin32<TRAITS>::QuickDetect(ConstBufferView image) { + BufferSource source; + return ReadWin32Header(image, &source); +} + +// |num_equivalence_iterations_| = 2 for reloc -> abs32. +template <class TRAITS> +DisassemblerWin32<TRAITS>::DisassemblerWin32() : Disassembler(2) {} + +template <class TRAITS> +DisassemblerWin32<TRAITS>::~DisassemblerWin32() = default; + +template <class TRAITS> +ExecutableType DisassemblerWin32<TRAITS>::GetExeType() const { + return Traits::kExeType; +} + +template <class TRAITS> +std::string DisassemblerWin32<TRAITS>::GetExeTypeString() const { + return Traits::kExeTypeString; +} + +template <class TRAITS> +std::vector<ReferenceGroup> DisassemblerWin32<TRAITS>::MakeReferenceGroups() + const { + return { + {ReferenceTypeTraits{2, TypeTag(kReloc), PoolTag(kReloc)}, + &DisassemblerWin32::MakeReadRelocs, &DisassemblerWin32::MakeWriteRelocs}, + {ReferenceTypeTraits{Traits::kVAWidth, TypeTag(kAbs32), PoolTag(kAbs32)}, + &DisassemblerWin32::MakeReadAbs32, &DisassemblerWin32::MakeWriteAbs32}, + {ReferenceTypeTraits{4, TypeTag(kRel32), PoolTag(kRel32)}, + &DisassemblerWin32::MakeReadRel32, &DisassemblerWin32::MakeWriteRel32}, + }; +} + +template <class TRAITS> +std::unique_ptr<ReferenceReader> DisassemblerWin32<TRAITS>::MakeReadRelocs( + offset_t lo, + offset_t hi) { + if (!ParseAndStoreRelocBlocks()) + return std::make_unique<EmptyReferenceReader>(); + + RelocRvaReaderWin32 reloc_rva_reader(image_, reloc_region_, + reloc_block_offsets_, lo, hi); + CHECK_GE(image_.size(), Traits::kVAWidth); + offset_t offset_bound = + base::checked_cast<offset_t>(image_.size() - Traits::kVAWidth + 1); + return std::make_unique<RelocReaderWin32>(std::move(reloc_rva_reader), + Traits::kRelocType, offset_bound, + translator_); +} + +template <class TRAITS> +std::unique_ptr<ReferenceReader> DisassemblerWin32<TRAITS>::MakeReadAbs32( + offset_t lo, + offset_t hi) { + ParseAndStoreAbs32(); + Abs32RvaExtractorWin32 abs_rva_extractor( + image_, {Traits::kBitness, image_base_}, abs32_locations_, lo, hi); + return std::make_unique<Abs32ReaderWin32>(std::move(abs_rva_extractor), + translator_); +} + +template <class TRAITS> +std::unique_ptr<ReferenceReader> DisassemblerWin32<TRAITS>::MakeReadRel32( + offset_t lo, + offset_t hi) { + ParseAndStoreRel32(); + return std::make_unique<Rel32ReaderX86>(image_, lo, hi, &rel32_locations_, + translator_); +} + +template <class TRAITS> +std::unique_ptr<ReferenceWriter> DisassemblerWin32<TRAITS>::MakeWriteRelocs( + MutableBufferView image) { + if (!ParseAndStoreRelocBlocks()) + return std::make_unique<EmptyReferenceWriter>(); + + return std::make_unique<RelocWriterWin32>(Traits::kRelocType, image, + reloc_region_, reloc_block_offsets_, + translator_); +} + +template <class TRAITS> +std::unique_ptr<ReferenceWriter> DisassemblerWin32<TRAITS>::MakeWriteAbs32( + MutableBufferView image) { + return std::make_unique<Abs32WriterWin32>( + image, AbsoluteAddress(Traits::kBitness, image_base_), translator_); +} + +template <class TRAITS> +std::unique_ptr<ReferenceWriter> DisassemblerWin32<TRAITS>::MakeWriteRel32( + MutableBufferView image) { + return std::make_unique<Rel32WriterX86>(image, translator_); +} + +template <class TRAITS> +bool DisassemblerWin32<TRAITS>::Parse(ConstBufferView image) { + image_ = image; + return ParseHeader(); +} + +template <class TRAITS> +bool DisassemblerWin32<TRAITS>::ParseHeader() { + BufferSource source; + + if (!ReadWin32Header(image_, &source)) + return false; + + constexpr size_t kDataDirBase = + offsetof(typename Traits::ImageOptionalHeader, data_directory); + auto* coff_header = source.GetPointer<pe::ImageFileHeader>(); + if (!coff_header || coff_header->size_of_optional_header < kDataDirBase) + return false; + + // |number_of_rva_and_sizes < kImageNumberOfDirectoryEntries| is possible. So + // in theory, GetPointer() on ImageOptionalHeader can reach EOF for a tiny PE + // file, causing false rejection. However, this should not occur for practical + // cases; and rejection is okay for corner cases (e.g., from a fuzzer). + auto* optional_header = + source.GetPointer<typename Traits::ImageOptionalHeader>(); + if (!optional_header || optional_header->magic != Traits::kMagic) + return false; + + // Check |optional_header->number_of_rva_and_sizes|. + const size_t data_dir_size = + coff_header->size_of_optional_header - kDataDirBase; + const size_t num_data_dir = data_dir_size / sizeof(pe::ImageDataDirectory); + if (num_data_dir != optional_header->number_of_rva_and_sizes || + num_data_dir * sizeof(pe::ImageDataDirectory) != data_dir_size || + num_data_dir > pe::kImageNumberOfDirectoryEntries) { + return false; + } + + base_relocation_table_ = ReadDataDirectory<Traits>( + optional_header, pe::kIndexOfBaseRelocationTable); + if (!base_relocation_table_) + return false; + + image_base_ = optional_header->image_base; + + // |optional_header->size_of_image| is the size of the image when loaded into + // memory, and not the actual size on disk. + rva_t rva_bound = optional_header->size_of_image; + if (rva_bound >= kRvaBound) + return false; + + // An exclusive upper bound of all offsets used in the image. This gets + // updated as sections get visited. + offset_t offset_bound = + base::checked_cast<offset_t>(source.begin() - image_.begin()); + + // Extract |sections_|. + size_t sections_count = coff_header->number_of_sections; + auto* sections_array = + source.GetArray<pe::ImageSectionHeader>(sections_count); + if (!sections_array) + return false; + sections_.assign(sections_array, sections_array + sections_count); + + // Prepare |units| for offset-RVA translation. + std::vector<AddressTranslator::Unit> units; + units.reserve(sections_count); + + // Visit each section, validate, and add address translation data to |units|. + bool has_text_section = false; + decltype(pe::ImageSectionHeader::virtual_address) prev_virtual_address = 0; + for (size_t i = 0; i < sections_count; ++i) { + const pe::ImageSectionHeader& section = sections_[i]; + // Apply strict checks on section bounds. + if (!image_.covers( + {section.file_offset_of_raw_data, section.size_of_raw_data})) { + return false; + } + if (!RangeIsBounded(section.virtual_address, section.virtual_size, + rva_bound)) { + return false; + } + + // PE sections should be sorted by RVAs. For robustness, we don't rely on + // this, so even if unsorted we don't care. Output warning though. + if (prev_virtual_address > section.virtual_address) + LOG(WARNING) << "RVA anomaly found for Section " << i; + prev_virtual_address = section.virtual_address; + + // Add |section| data for offset-RVA translation. + units.push_back({section.file_offset_of_raw_data, section.size_of_raw_data, + section.virtual_address, section.virtual_size}); + + offset_t end_offset = + section.file_offset_of_raw_data + section.size_of_raw_data; + offset_bound = std::max(end_offset, offset_bound); + if (IsWin32CodeSection<Traits>(section)) + has_text_section = true; + } + + if (offset_bound > image_.size()) + return false; + if (!has_text_section) + return false; + + // Initialize |translator_| for offset-RVA translations. Any inconsistency + // (e.g., 2 offsets correspond to the same RVA) would invalidate the PE file. + if (translator_.Initialize(std::move(units)) != AddressTranslator::kSuccess) + return false; + + // Resize |image_| to include only contents claimed by sections. Note that + // this may miss digital signatures at end of PE files, but for patching this + // is of minor concern. + image_.shrink(offset_bound); + + return true; +} + +template <class TRAITS> +bool DisassemblerWin32<TRAITS>::ParseAndStoreRelocBlocks() { + if (has_parsed_relocs_) + return reloc_region_.lo() != kInvalidOffset; + + has_parsed_relocs_ = true; + DCHECK(reloc_block_offsets_.empty()); + + offset_t relocs_offset = + translator_.RvaToOffset(base_relocation_table_->virtual_address); + size_t relocs_size = base_relocation_table_->size; + const BufferRegion temp_reloc_region = {relocs_offset, relocs_size}; + + // Reject bogus relocs. It's possible to have no reloc, so this is non-fatal! + if (relocs_offset == kInvalidOffset || !image_.covers(temp_reloc_region)) + return false; + + // Precompute offsets of all reloc blocks. + if (!RelocRvaReaderWin32::FindRelocBlocks(image_, temp_reloc_region, + &reloc_block_offsets_)) { + return false; + } + // Reassign |reloc_region_| only on success. + reloc_region_ = temp_reloc_region; + return true; +} + +template <class TRAITS> +bool DisassemblerWin32<TRAITS>::ParseAndStoreAbs32() { + if (has_parsed_abs32_) + return true; + has_parsed_abs32_ = true; + + // Read reloc targets as preliminary abs32 locations. + std::unique_ptr<ReferenceReader> relocs = MakeReadRelocs(0, offset_t(size())); + for (auto ref = relocs->GetNext(); ref.has_value(); ref = relocs->GetNext()) + abs32_locations_.push_back(ref->target); + + std::sort(abs32_locations_.begin(), abs32_locations_.end()); + + // Abs32 references must have targets translatable to offsets. Remove those + // that are unable to do so. + size_t num_untranslatable = RemoveUntranslatableAbs32( + image_, {Traits::kBitness, image_base_}, translator_, &abs32_locations_); + LOG_IF(WARNING, num_untranslatable) << "Removed " << num_untranslatable + << " untranslatable abs32 references."; + + // Abs32 reference bodies must not overlap. If found, simply remove them. + size_t num_overlapping = + RemoveOverlappingAbs32Locations(Traits::kVAWidth, &abs32_locations_); + LOG_IF(WARNING, num_overlapping) + << "Removed " << num_overlapping + << " abs32 references with overlapping bodies."; + + abs32_locations_.shrink_to_fit(); + return true; +} + +template <class TRAITS> +bool DisassemblerWin32<TRAITS>::ParseAndStoreRel32() { + if (has_parsed_rel32_) + return true; + has_parsed_rel32_ = true; + + ParseAndStoreAbs32(); + + AddressTranslator::RvaToOffsetCache target_rva_checker(translator_); + + for (const pe::ImageSectionHeader& section : sections_) { + if (!IsWin32CodeSection<Traits>(section)) + continue; + + rva_t start_rva = section.virtual_address; + rva_t end_rva = start_rva + section.virtual_size; + + // |virtual_size < size_of_raw_data| is possible. In this case, disassembly + // should not proceed beyond |virtual_size|, so rel32 location RVAs remain + // translatable to file offsets. + uint32_t size_to_use = + std::min(section.virtual_size, section.size_of_raw_data); + ConstBufferView region = + image_[{section.file_offset_of_raw_data, size_to_use}]; + Abs32GapFinder gap_finder(image_, region, abs32_locations_, + Traits::kVAWidth); + typename Traits::RelFinder rel_finder(image_, translator_); + // Iterate over gaps between abs32 references, to avoid collision. + while (gap_finder.FindNext()) { + rel_finder.SetRegion(gap_finder.GetGap()); + // Heuristically detect rel32 references, store if valid. + while (rel_finder.FindNext()) { + auto rel32 = rel_finder.GetRel32(); + if (target_rva_checker.IsValid(rel32.target_rva) && + (rel32.can_point_outside_section || + (start_rva <= rel32.target_rva && rel32.target_rva < end_rva))) { + rel_finder.Accept(); + rel32_locations_.push_back(rel32.location); + } + } + } + } + rel32_locations_.shrink_to_fit(); + // |sections_| entries are usually sorted by offset, but there's no guarantee. + // So sort explicitly, to be sure. + std::sort(rel32_locations_.begin(), rel32_locations_.end()); + return true; +} + +// Explicit instantiation for supported classes. +template class DisassemblerWin32<Win32X86Traits>; +template class DisassemblerWin32<Win32X64Traits>; + +} // namespace zucchini diff --git a/disassembler_win32.h b/disassembler_win32.h new file mode 100644 index 0000000..77b65ac --- /dev/null +++ b/disassembler_win32.h @@ -0,0 +1,131 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_DISASSEMBLER_WIN32_H_ +#define COMPONENTS_ZUCCHINI_DISASSEMBLER_WIN32_H_ + +#include <stddef.h> +#include <stdint.h> + +#include <deque> +#include <memory> +#include <string> +#include <utility> +#include <vector> + +#include "components/zucchini/address_translator.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/disassembler.h" +#include "components/zucchini/image_utils.h" +#include "components/zucchini/type_win_pe.h" + +namespace zucchini { + +class Rel32FinderX86; +class Rel32FinderX64; + +struct Win32X86Traits { + static constexpr Bitness kBitness = kBit32; + static constexpr ExecutableType kExeType = kExeTypeWin32X86; + enum : uint16_t { kMagic = 0x10B }; + enum : uint16_t { kRelocType = 3 }; + enum : uint32_t { kVAWidth = 4 }; + static const char kExeTypeString[]; + + using ImageOptionalHeader = pe::ImageOptionalHeader; + using RelFinder = Rel32FinderX86; + using Address = uint32_t; +}; + +struct Win32X64Traits { + static constexpr Bitness kBitness = kBit64; + static constexpr ExecutableType kExeType = kExeTypeWin32X64; + enum : uint16_t { kMagic = 0x20B }; + enum : uint16_t { kRelocType = 10 }; + enum : uint32_t { kVAWidth = 8 }; + static const char kExeTypeString[]; + + using ImageOptionalHeader = pe::ImageOptionalHeader64; + using RelFinder = Rel32FinderX64; + using Address = uint64_t; +}; + +template <class TRAITS> +class DisassemblerWin32 : public Disassembler { + public: + using Traits = TRAITS; + enum ReferenceType : uint8_t { kReloc, kAbs32, kRel32, kTypeCount }; + + // Applies quick checks to determine whether |image| *may* point to the start + // of an executable. Returns true iff the check passes. + static bool QuickDetect(ConstBufferView image); + + DisassemblerWin32(); + DisassemblerWin32(const DisassemblerWin32&) = delete; + const DisassemblerWin32& operator=(const DisassemblerWin32&) = delete; + ~DisassemblerWin32() override; + + // Disassembler: + ExecutableType GetExeType() const override; + std::string GetExeTypeString() const override; + std::vector<ReferenceGroup> MakeReferenceGroups() const override; + + // Functions that return reader / writer for references. + std::unique_ptr<ReferenceReader> MakeReadRelocs(offset_t lo, offset_t hi); + std::unique_ptr<ReferenceReader> MakeReadAbs32(offset_t lo, offset_t hi); + std::unique_ptr<ReferenceReader> MakeReadRel32(offset_t lo, offset_t hi); + std::unique_ptr<ReferenceWriter> MakeWriteRelocs(MutableBufferView image); + std::unique_ptr<ReferenceWriter> MakeWriteAbs32(MutableBufferView image); + std::unique_ptr<ReferenceWriter> MakeWriteRel32(MutableBufferView image); + + private: + friend Disassembler; + + // Disassembler: + bool Parse(ConstBufferView image) override; + + // Parses the file header. Returns true iff successful. + bool ParseHeader(); + + // Parsers to extract references. These are lazily called, and return whether + // parsing was successful (failures are non-fatal). + bool ParseAndStoreRelocBlocks(); + bool ParseAndStoreAbs32(); + bool ParseAndStoreRel32(); + + // In-memory copy of sections. + std::vector<pe::ImageSectionHeader> sections_; + + // Image base address to translate between RVA and VA. + typename Traits::Address image_base_ = 0; + + // Pointer to data Directory entry of the relocation table. + const pe::ImageDataDirectory* base_relocation_table_ = nullptr; + + // Translator between offsets and RVAs. + AddressTranslator translator_; + + // Reference storage. + BufferRegion reloc_region_ = {kInvalidOffset, 0U}; + std::vector<offset_t> reloc_block_offsets_; + offset_t reloc_end_ = 0; + std::vector<offset_t> abs32_locations_; + // Using std::deque to reduce peak memory footprint. + std::deque<offset_t> rel32_locations_; + + // Initialization states of reference storage, used for lazy initialization. + // TODO(huangs): Investigate whether lazy initialization is useful for memory + // reduction. This is a carryover from Courgette. To be sure we should run + // experiment after Zucchini is able to do ensemble patching. + bool has_parsed_relocs_ = false; + bool has_parsed_abs32_ = false; + bool has_parsed_rel32_ = false; +}; + +using DisassemblerWin32X86 = DisassemblerWin32<Win32X86Traits>; +using DisassemblerWin32X64 = DisassemblerWin32<Win32X64Traits>; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_DISASSEMBLER_WIN32_H_ diff --git a/disassembler_ztf.cc b/disassembler_ztf.cc new file mode 100644 index 0000000..dfe9045 --- /dev/null +++ b/disassembler_ztf.cc @@ -0,0 +1,653 @@ +// Copyright 2018 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/disassembler_ztf.h" + +#include <algorithm> +#include <cmath> +#include <iterator> +#include <limits> +#include <numeric> + +#include "base/check_op.h" +#include "base/numerics/checked_math.h" +#include "base/numerics/safe_conversions.h" +#include "components/zucchini/algorithm.h" +#include "components/zucchini/buffer_source.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/io_utils.h" + +namespace zucchini { + +namespace { + +constexpr uint8_t kDelimiter = ','; + +constexpr int kHeaderMagicSize = 4; +constexpr int kFooterMagicSize = 5; +constexpr int kTotalMagicSize = kHeaderMagicSize + kFooterMagicSize; + +// Number of characters that aren't digits in each type of reference. +constexpr int kNumConstCharInAbs = 3; +constexpr int kNumConstCharInRel = 5; + +/******** ZtfConfig ********/ + +// For passing around metadata about the type of reference to match. +// - |digits_per_dim| is the length of the offset in lines/cols of a +// reference. +// - |open_char| is an ASCII character representing the opening char. +// - |close_char| is an ASCII character representing the closing char. +struct ZtfConfig { + uint8_t digits_per_dim; + uint8_t open_char; + uint8_t close_char; + + constexpr uint8_t abs_width() const { + return digits_per_dim * 2 + kNumConstCharInAbs; + } + + constexpr uint8_t rel_width() const { + return digits_per_dim * 2 + kNumConstCharInRel; + } + + uint8_t Width(ztf::LineCol /* lc */) const { return abs_width(); } + + uint8_t Width(ztf::DeltaLineCol /* dlc */) const { return rel_width(); } +}; + +// Creates a ZtfConfig for parsing or writing based on the desired |digits| and +// |pool|. +template <DisassemblerZtf::ReferencePool pool> +constexpr ZtfConfig MakeZtfConfig(uint8_t digits) { + switch (pool) { + case DisassemblerZtf::kAngles: + return ZtfConfig{digits, '<', '>'}; + case DisassemblerZtf::kBraces: + return ZtfConfig{digits, '{', '}'}; + case DisassemblerZtf::kBrackets: + return ZtfConfig{digits, '[', ']'}; + case DisassemblerZtf::kParentheses: + break; // Handled below. + } + return ZtfConfig{digits, '(', ')'}; +} + +/******** ZtfParser ********/ + +// ZtfParser is used to extract (absolute) LineCol and (relative) DeltaLineCol +// from a ZTF file, and contains various helpers for character, digits, and sign +// matching. +class ZtfParser { + public: + ZtfParser(offset_t hi, ConstBufferView image, ZtfConfig config) + : image_(image), hi_(hi), config_(config) { + DCHECK_LE(static_cast<size_t>(std::pow(10U, config_.digits_per_dim)), + ztf::kMaxDimValue); + } + + ZtfParser(const ZtfParser&) = delete; + const ZtfParser& operator=(const ZtfParser&) = delete; + + // Attempts to match an absolute reference at |offset|. If successful then + // assigns the result to |abs_lc| and returns true. Otherwise returns false. + // An absolute reference takes the form: + // <open><digits><delimiter><digits><close> + bool MatchAtOffset(offset_t offset, ztf::LineCol* abs_lc) { + if (hi_ < config_.abs_width() || offset > hi_ - config_.abs_width()) + return false; + offset_ = offset; + return MatchChar(config_.open_char) && MatchDigits(+1, &abs_lc->line) && + MatchChar(kDelimiter) && MatchDigits(+1, &abs_lc->col) && + MatchChar(config_.close_char); + } + + // Attempts to match an absolute reference at |offset|. If successful then + // assigns the result to |rel_lc| and returns true. Otherwise returns false. A + // relative reference takes the form: + // <open><sign><digits><delimiter><sign><digits><close> + bool MatchAtOffset(offset_t offset, ztf::DeltaLineCol* rel_dlc) { + if (hi_ < config_.rel_width() || offset > hi_ - config_.rel_width()) + return false; + offset_ = offset; + ztf::dim_t line_sign; + ztf::dim_t col_sign; + return MatchChar(config_.open_char) && MatchSign(&line_sign) && + MatchDigits(line_sign, &rel_dlc->line) && MatchChar(kDelimiter) && + MatchSign(&col_sign) && MatchDigits(col_sign, &rel_dlc->col) && + MatchChar(config_.close_char); + } + + private: + // The Match*() functions below can advance |offset_|, and return a bool to + // indicate success to allow chaining using &&. + + // Returns true if |character| is at location |offset_| in |image_| and + // increments |offset_|. + bool MatchChar(uint8_t character) { + return character == image_.read<uint8_t>(offset_++); + } + + // Looks for '+' or '-' at |offset_|. If found, stores +1 or -1 in |sign| and + // returns true. Otherwise returns false. + bool MatchSign(ztf::dim_t* sign) { + uint8_t val = image_.read<uint8_t>(offset_++); + if (val == static_cast<uint8_t>(ztf::SignChar::kMinus)) { + *sign = -1; + return true; + } + if (val == static_cast<uint8_t>(ztf::SignChar::kPlus)) { + *sign = 1; + return true; + } + return false; + } + + // Attempts to extract a number with the number of base 10 digits equal to + // |config_.digits_per_dim| from |image_| starting from |offset_|. Returns + // true and assigns the integer value to |value| if successful. + bool MatchDigits(ztf::dim_t sign, ztf::dim_t* value) { + ztf::dim_t output = 0; + for (int i = 0; i < config_.digits_per_dim; ++i) { + auto digit = image_.read<uint8_t>(offset_++); + if (digit >= '0' && digit < '0' + 10) + output = output * 10 + digit - '0'; + else + return false; + } + if (!output && sign < 0) // Disallow "-0", "-00", etc. + return false; + *value = sign * output; + return true; + } + + ConstBufferView image_; + const offset_t hi_; + const ZtfConfig config_; + offset_t offset_ = 0; +}; + +/******** ZtfWriter ********/ + +// ZtfWriter is used to write references to an image. This includes writing +// the enclosing characters around the reference. +class ZtfWriter { + public: + ZtfWriter(MutableBufferView image, ZtfConfig config) + : image_(image), + config_(config), + val_bound_( + static_cast<ztf::dim_t>(std::pow(10, config_.digits_per_dim))) {} + + ZtfWriter(const ZtfWriter&) = delete; + const ZtfWriter& operator=(const ZtfWriter&) = delete; + + // Write an absolute reference |abs_ref| at |offset|. Note that references + // that would overwrite a newline are skipped as this would invalidate all + // the other reference line numbers. + void Write(offset_t offset, ztf::LineCol abs_ref) { + offset_ = offset; + if (!SafeToWriteNumber(abs_ref.line) || !SafeToWriteNumber(abs_ref.col) || + !SafeToWriteData(offset_, offset_ + config_.abs_width())) { + return; + } + WriteChar(config_.open_char); + WriteNumber(abs_ref.line); + WriteChar(kDelimiter); + WriteNumber(abs_ref.col); + WriteChar(config_.close_char); + } + + // Write a relative reference |rel_ref| at |offset|. Note that references + // that would overwrite a newline are skipped as this would invalidate all + // the other reference line numbers. + void Write(offset_t offset, ztf::DeltaLineCol rel_ref) { + offset_ = offset; + if (!SafeToWriteNumber(rel_ref.line) || !SafeToWriteNumber(rel_ref.col) || + !SafeToWriteData(offset_, offset_ + config_.rel_width())) { + return; + } + WriteChar(config_.open_char); + WriteSign(rel_ref.line); + WriteNumber(rel_ref.line); + WriteChar(kDelimiter); + WriteSign(rel_ref.col); + WriteNumber(rel_ref.col); + WriteChar(config_.close_char); + } + + private: + // Returns whether it is safe to modify bytes in |[lo, hi)| in |image_| for + // Reference correction. Failure cases are: + // - Out-of-bound writes. + // - Overwriting '\n'. This is a ZTF special case since '\n' dictates file + // structure, and Reference correction should never mess with this. + bool SafeToWriteData(offset_t lo, offset_t hi) const { + DCHECK_LE(lo, hi); + // Out of bounds. + if (hi > image_.size()) + return false; + for (offset_t i = lo; i < hi; ++i) { + if (image_.read<uint8_t>(i) == '\n') + return false; + } + return true; + } + + // Checks whether it is safe to write a |val| based on + // |config_.digits_per_dim|. + bool SafeToWriteNumber(ztf::dim_t val) const { + return std::abs(val) < val_bound_; + } + + // The Write*() functions each advance |offset_| by a fixed distance. The + // caller should ensure there's enough space to write data. + + // Write |character| at |offset_| and increment |offset_|. + void WriteChar(uint8_t character) { image_.write(offset_++, character); } + + // Write the sign of |value| at |offset_| and increment |offset_|. + void WriteSign(ztf::dim_t value) { + image_.write(offset_++, + value >= 0 ? ztf::SignChar::kPlus : ztf::SignChar::kMinus); + } + + // Writes the absolute value of the number represented by |value| at |offset_| + // using zero padding to fill |config_.digits_per_dim|. + void WriteNumber(ztf::dim_t value) { + size_t size = config_.digits_per_dim + 1; + DCHECK_LE(size, kMaxDigitCount + 1); + char digits[kMaxDigitCount + 1]; // + 1 for terminator. + int len = + snprintf(digits, size, "%0*u", config_.digits_per_dim, std::abs(value)); + DCHECK_EQ(len, config_.digits_per_dim); + for (int i = 0; i < len; ++i) + image_.write(offset_++, digits[i]); + } + + MutableBufferView image_; + const ZtfConfig config_; + // Bound on numeric values, as limited by |config_.digits_per_dim|. + const ztf::dim_t val_bound_; + offset_t offset_ = 0; +}; + +// Specialization of ReferenceReader for reading text references. +template <typename T> +class ZtfReferenceReader : public ReferenceReader { + public: + ZtfReferenceReader(offset_t lo, + offset_t hi, + ConstBufferView image, + const ZtfTranslator& translator, + ZtfConfig config) + : offset_(lo), + hi_(hi), + translator_(translator), + config_(config), + parser_(hi_, image, config_) { + DCHECK_LE(hi_, image.size()); + } + + // Walks |offset_| from |lo| to |hi_| running |parser_|. If any matches are + // found they are returned. + absl::optional<Reference> GetNext() override { + T line_col; + for (; offset_ < hi_; ++offset_) { + if (!parser_.MatchAtOffset(offset_, &line_col)) + continue; + + auto target = ConvertToTargetOffset(offset_, line_col); + // Ignore targets that point outside the file. + if (target == kInvalidOffset) + continue; + offset_t location = offset_; + offset_ += config_.Width(line_col); + return Reference{location, target}; + } + return absl::nullopt; + } + + private: + // Converts |lc| (an absolute reference) to an offset using |translator_|. + offset_t ConvertToTargetOffset(offset_t /* location */, + ztf::LineCol lc) const { + return translator_.LineColToOffset(lc); + } + + // Converts |dlc| (a relative reference) to an offset using |translator_|. + // This requires converting the |dlc| to a ztf::LineCol to find the offset. + offset_t ConvertToTargetOffset(offset_t location, + ztf::DeltaLineCol dlc) const { + auto lc = translator_.OffsetToLineCol(location); + if (!lc.has_value()) + return kInvalidOffset; + return translator_.LineColToOffset(lc.value() + dlc); + } + + offset_t offset_; + const offset_t hi_; + const ZtfTranslator& translator_; + const ZtfConfig config_; + ZtfParser parser_; +}; + +// Specialization of ReferenceWriter for writing text references. +template <typename T> +class ZtfReferenceWriter : public ReferenceWriter { + public: + ZtfReferenceWriter(MutableBufferView image, + const ZtfTranslator& translator, + ZtfConfig config) + : translator_(translator), writer_(image, config) {} + + void PutNext(Reference reference) override { + T line_col; + if (!ConvertToTargetLineCol(reference, &line_col)) + return; + + writer_.Write(reference.location, line_col); + } + + private: + // Converts |reference| to an absolute reference to be stored in |out_lc|. + // Returns true on success. + bool ConvertToTargetLineCol(Reference reference, ztf::LineCol* out_lc) { + auto temp_lc = translator_.OffsetToLineCol(reference.target); + if (!temp_lc.has_value() || !translator_.IsValid(temp_lc.value())) + return false; + + *out_lc = temp_lc.value(); + return true; + } + + // Converts |reference| to a relative reference to be stored in |out_dlc|. + // Will return true on success. + bool ConvertToTargetLineCol(Reference reference, ztf::DeltaLineCol* out_dlc) { + auto location_lc = translator_.OffsetToLineCol(reference.location); + if (!location_lc.has_value()) + return false; + + auto target_lc = translator_.OffsetToLineCol(reference.target); + if (!target_lc.has_value()) + return false; + + *out_dlc = target_lc.value() - location_lc.value(); + return translator_.IsValid(reference.location, *out_dlc); + } + + const ZtfTranslator& translator_; + ZtfWriter writer_; +}; + +// Reads a text header to check for the magic string "ZTxt" at the start +// indicating the file should be treated as a Zucchini text file. +bool ReadZtfHeader(ConstBufferView image) { + BufferSource source(image); + // Reject empty images and "ZTxtxTZ\n" (missing 't'). + if (source.size() < kTotalMagicSize) + return false; + if (source.size() > std::numeric_limits<offset_t>::max()) + return false; + return source.CheckNextBytes({'Z', 'T', 'x', 't'}); +} + +} // namespace + +/******** ZtfTranslator ********/ + +ZtfTranslator::ZtfTranslator() {} + +ZtfTranslator::~ZtfTranslator() = default; + +bool ZtfTranslator::Init(ConstBufferView image) { + line_starts_.clear(); + // Record the starting offset of every line in |image_| into |line_start_|. + line_starts_.push_back(0); + for (size_t i = 0; i < image.size(); ++i) { + if (image.read<uint8_t>(i) == '\n') { + // Maximum number of entries is |ztf::kMaxDimValue|, including the end + // sentinel. + if (line_starts_.size() >= ztf::kMaxDimValue) + return false; + line_starts_.push_back(base::checked_cast<offset_t>(i + 1)); + // Check that the line length is reachable from an absolute reference. + if (line_starts_.back() - *std::next(line_starts_.rbegin()) >= + ztf::kMaxDimValue) { + return false; + } + } + } + // Since the last character of ZTF file is always '\n', |line_starts_| will + // always contain the file length as the last element, which serves as a + // sentinel. + CHECK_EQ(image.size(), static_cast<size_t>(line_starts_.back())); + return true; +} + +bool ZtfTranslator::IsValid(ztf::LineCol lc) const { + DCHECK(!line_starts_.empty()); + return lc.line >= 1 && lc.col >= 1 && + static_cast<offset_t>(lc.line) <= NumLines() && + static_cast<offset_t>(lc.col) <= LineLength(lc.line); +} + +bool ZtfTranslator::IsValid(offset_t offset, ztf::DeltaLineCol dlc) const { + DCHECK(!line_starts_.empty()); + auto abs_lc = OffsetToLineCol(offset); + if (!abs_lc.has_value()) + return false; + + if (!base::CheckAdd(abs_lc->line, dlc.line).IsValid() || + !base::CheckAdd(abs_lc->col, dlc.col).IsValid()) { + return false; + } + return IsValid(abs_lc.value() + dlc); +} + +offset_t ZtfTranslator::LineColToOffset(ztf::LineCol lc) const { + // Guard against out of bounds access to |line_starts_| and ensure the + // |lc| falls within the file. + DCHECK(!line_starts_.empty()); + if (!IsValid(lc)) + return kInvalidOffset; + + offset_t target = line_starts_[lc.line - 1] + lc.col - 1; + DCHECK_LT(target, line_starts_.back()); + return target; +} + +absl::optional<ztf::LineCol> ZtfTranslator::OffsetToLineCol( + offset_t offset) const { + DCHECK(!line_starts_.empty()); + // Don't place a target outside the image. + if (offset >= line_starts_.back()) + return absl::nullopt; + auto it = SearchForRange(offset); + ztf::LineCol lc; + lc.line = std::distance(line_starts_.cbegin(), it) + 1; + lc.col = offset - line_starts_[lc.line - 1] + 1; + DCHECK_LE(static_cast<offset_t>(lc.col), LineLength(lc.line)); + return lc; +} + +std::vector<offset_t>::const_iterator ZtfTranslator::SearchForRange( + offset_t offset) const { + DCHECK(!line_starts_.empty()); + auto it = + std::upper_bound(line_starts_.cbegin(), line_starts_.cend(), offset); + DCHECK(it != line_starts_.cbegin()); + return --it; +} + +offset_t ZtfTranslator::LineLength(uint16_t line) const { + DCHECK_GE(line, 1); + DCHECK_LE(line, NumLines()); + return line_starts_[line] - line_starts_[line - 1]; +} + +/******** DisassemblerZtf ********/ + +// Use 2 even though reference "chaining" isn't present in ZTF as it is the +// usual case for other Disassemblers and this is meant to mimic that as closely +// as possible. +DisassemblerZtf::DisassemblerZtf() : Disassembler(2) {} + +DisassemblerZtf::~DisassemblerZtf() = default; + +// static. +bool DisassemblerZtf::QuickDetect(ConstBufferView image) { + return ReadZtfHeader(image); +} + +ExecutableType DisassemblerZtf::GetExeType() const { + return kExeTypeZtf; +} + +std::string DisassemblerZtf::GetExeTypeString() const { + return "Zucchini Text Format"; +} + +std::vector<ReferenceGroup> DisassemblerZtf::MakeReferenceGroups() const { + return { + {{5, TypeTag(kAnglesAbs1), PoolTag(kAngles)}, + &DisassemblerZtf::MakeReadAbs<1, kAngles>, + &DisassemblerZtf::MakeWriteAbs<1, kAngles>}, + {{7, TypeTag(kAnglesAbs2), PoolTag(kAngles)}, + &DisassemblerZtf::MakeReadAbs<2, kAngles>, + &DisassemblerZtf::MakeWriteAbs<2, kAngles>}, + {{9, TypeTag(kAnglesAbs3), PoolTag(kAngles)}, + &DisassemblerZtf::MakeReadAbs<3, kAngles>, + &DisassemblerZtf::MakeWriteAbs<3, kAngles>}, + {{7, TypeTag(kAnglesRel1), PoolTag(kAngles)}, + &DisassemblerZtf::MakeReadRel<1, kAngles>, + &DisassemblerZtf::MakeWriteRel<1, kAngles>}, + {{9, TypeTag(kAnglesRel2), PoolTag(kAngles)}, + &DisassemblerZtf::MakeReadRel<2, kAngles>, + &DisassemblerZtf::MakeWriteRel<2, kAngles>}, + {{11, TypeTag(kAnglesRel3), PoolTag(kAngles)}, + &DisassemblerZtf::MakeReadRel<3, kAngles>, + &DisassemblerZtf::MakeWriteRel<3, kAngles>}, + {{5, TypeTag(kBracesAbs1), PoolTag(kBraces)}, + &DisassemblerZtf::MakeReadAbs<1, kBraces>, + &DisassemblerZtf::MakeWriteAbs<1, kBraces>}, + {{7, TypeTag(kBracesAbs2), PoolTag(kBraces)}, + &DisassemblerZtf::MakeReadAbs<2, kBraces>, + &DisassemblerZtf::MakeWriteAbs<2, kBraces>}, + {{9, TypeTag(kBracesAbs3), PoolTag(kBraces)}, + &DisassemblerZtf::MakeReadAbs<3, kBraces>, + &DisassemblerZtf::MakeWriteAbs<3, kBraces>}, + {{7, TypeTag(kBracesRel1), PoolTag(kBraces)}, + &DisassemblerZtf::MakeReadRel<1, kBraces>, + &DisassemblerZtf::MakeWriteRel<1, kBraces>}, + {{9, TypeTag(kBracesRel2), PoolTag(kBraces)}, + &DisassemblerZtf::MakeReadRel<2, kBraces>, + &DisassemblerZtf::MakeWriteRel<2, kBraces>}, + {{11, TypeTag(kBracesRel3), PoolTag(kBraces)}, + &DisassemblerZtf::MakeReadRel<3, kBraces>, + &DisassemblerZtf::MakeWriteRel<3, kBraces>}, + {{5, TypeTag(kBracketsAbs1), PoolTag(kBrackets)}, + &DisassemblerZtf::MakeReadAbs<1, kBrackets>, + &DisassemblerZtf::MakeWriteAbs<1, kBrackets>}, + {{7, TypeTag(kBracketsAbs2), PoolTag(kBrackets)}, + &DisassemblerZtf::MakeReadAbs<2, kBrackets>, + &DisassemblerZtf::MakeWriteAbs<2, kBrackets>}, + {{9, TypeTag(kBracketsAbs3), PoolTag(kBrackets)}, + &DisassemblerZtf::MakeReadAbs<3, kBrackets>, + &DisassemblerZtf::MakeWriteAbs<3, kBrackets>}, + {{7, TypeTag(kBracketsRel1), PoolTag(kBrackets)}, + &DisassemblerZtf::MakeReadRel<1, kBrackets>, + &DisassemblerZtf::MakeWriteRel<1, kBrackets>}, + {{9, TypeTag(kBracketsRel2), PoolTag(kBrackets)}, + &DisassemblerZtf::MakeReadRel<2, kBrackets>, + &DisassemblerZtf::MakeWriteRel<2, kBrackets>}, + {{11, TypeTag(kBracketsRel3), PoolTag(kBrackets)}, + &DisassemblerZtf::MakeReadRel<3, kBrackets>, + &DisassemblerZtf::MakeWriteRel<3, kBrackets>}, + {{5, TypeTag(kParenthesesAbs1), PoolTag(kParentheses)}, + &DisassemblerZtf::MakeReadAbs<1, kParentheses>, + &DisassemblerZtf::MakeWriteAbs<1, kParentheses>}, + {{7, TypeTag(kParenthesesAbs2), PoolTag(kParentheses)}, + &DisassemblerZtf::MakeReadAbs<2, kParentheses>, + &DisassemblerZtf::MakeWriteAbs<2, kParentheses>}, + {{9, TypeTag(kParenthesesAbs3), PoolTag(kParentheses)}, + &DisassemblerZtf::MakeReadAbs<3, kParentheses>, + &DisassemblerZtf::MakeWriteAbs<3, kParentheses>}, + {{7, TypeTag(kParenthesesRel1), PoolTag(kParentheses)}, + &DisassemblerZtf::MakeReadRel<1, kParentheses>, + &DisassemblerZtf::MakeWriteRel<1, kParentheses>}, + {{9, TypeTag(kParenthesesRel2), PoolTag(kParentheses)}, + &DisassemblerZtf::MakeReadRel<2, kParentheses>, + &DisassemblerZtf::MakeWriteRel<2, kParentheses>}, + {{11, TypeTag(kParenthesesRel3), PoolTag(kParentheses)}, + &DisassemblerZtf::MakeReadRel<3, kParentheses>, + &DisassemblerZtf::MakeWriteRel<3, kParentheses>}, + }; +} + +template <uint8_t digits, DisassemblerZtf::ReferencePool pool> +std::unique_ptr<ReferenceReader> DisassemblerZtf::MakeReadAbs(offset_t lo, + offset_t hi) { + static_assert(digits >= 1 && digits <= kMaxDigitCount, + "|digits| must be in range [1, 3]"); + return std::make_unique<ZtfReferenceReader<ztf::LineCol>>( + lo, hi, image_, translator_, MakeZtfConfig<pool>(digits)); +} + +template <uint8_t digits, DisassemblerZtf::ReferencePool pool> +std::unique_ptr<ReferenceReader> DisassemblerZtf::MakeReadRel(offset_t lo, + offset_t hi) { + static_assert(digits >= 1 && digits <= kMaxDigitCount, + "|digits| must be in range [1, 3]"); + return std::make_unique<ZtfReferenceReader<ztf::DeltaLineCol>>( + lo, hi, image_, translator_, MakeZtfConfig<pool>(digits)); +} + +template <uint8_t digits, DisassemblerZtf::ReferencePool pool> +std::unique_ptr<ReferenceWriter> DisassemblerZtf::MakeWriteAbs( + MutableBufferView image) { + static_assert(digits >= 1 && digits <= kMaxDigitCount, + "|digits| must be in range [1, 3]"); + return std::make_unique<ZtfReferenceWriter<ztf::LineCol>>( + image, translator_, MakeZtfConfig<pool>(digits)); +} + +template <uint8_t digits, DisassemblerZtf::ReferencePool pool> +std::unique_ptr<ReferenceWriter> DisassemblerZtf::MakeWriteRel( + MutableBufferView image) { + static_assert(digits >= 1 && digits <= kMaxDigitCount, + "|digits| must be in range [1, 3]"); + return std::make_unique<ZtfReferenceWriter<ztf::DeltaLineCol>>( + image, translator_, MakeZtfConfig<pool>(digits)); +} + +bool DisassemblerZtf::Parse(ConstBufferView image) { + image_ = image; + if (!ReadZtfHeader(image_)) + return false; + + CHECK_GE(image_.size(), + static_cast<size_t>(kTotalMagicSize)); // Needs header and footer. + + // Find the terminating footer "txTZ\n" that indicates the end of the image. + offset_t offset = 0; + for (; offset <= image_.size() - kFooterMagicSize; offset++) { + if (image_.read<uint8_t>(offset) == 't' && + image_.read<uint8_t>(offset + 1) == 'x' && + image_.read<uint8_t>(offset + 2) == 'T' && + image_.read<uint8_t>(offset + 3) == 'Z' && + image_.read<uint8_t>(offset + 4) == '\n') { + break; + } + } + + // If no footer is found before the end of the image then the parsing failed. + if (offset > image_.size() - kFooterMagicSize) + return false; + image_.shrink(offset + kFooterMagicSize); + + return translator_.Init(image_); +} + +} // namespace zucchini diff --git a/disassembler_ztf.h b/disassembler_ztf.h new file mode 100644 index 0000000..0e73c2a --- /dev/null +++ b/disassembler_ztf.h @@ -0,0 +1,201 @@ +// Copyright 2018 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_DISASSEMBLER_ZTF_H_ +#define COMPONENTS_ZUCCHINI_DISASSEMBLER_ZTF_H_ + +#include <stdint.h> +#include <stdlib.h> + +#include <memory> +#include <string> +#include <vector> + +#include "components/zucchini/disassembler.h" +#include "components/zucchini/image_utils.h" +#include "components/zucchini/type_ztf.h" +#include "third_party/abseil-cpp/absl/types/optional.h" + +namespace zucchini { + +// Disassembler for text based files. This file format is supported for +// debugging Zucchini and is not intended for production usage. +// +// A valid Zucchini Text Format (ZTF) file is specified as follows: +// +// Header: +// The first four bytes must be - 'Z' 'T' 'x' 't' +// Footer: +// The last five bytes must be - 't' 'x' 'T' 'Z' '\n' +// (note that terminating new line is required). +// Content: +// The content can be any sequence of printable ASCII characters and new line +// (but not carriage return). This excludes the sequence that comprises the +// Footer. +// References: +// A reference is either Absolute or Relative. All references must begin and +// end with a pair of enclosing characters <open>, <close>. The options are: +// - Angles: '<' and '>' +// - Braces: '{' and '}' +// - Brackets: '[' and ']' +// - Parentheses: '(' and ')' +// +// A reference contains three items: +// - A line number <line> +// - A delimiter ',' <delimiter> +// - A column number <col> +// <line> and <col> may contain 1-3 digits and both must contain the same +// number of digits. If a number is too short then it can be left-padded +// with '0'. +// +// For Absolute references, <line> and <col> are 1-based (i.e. positive) +// index of line and column numbers of a character in the ZTF. This follows +// standard convention for text editors. Note that "\n" is considered to be +// part of a preceding line. +// +// <open><line><delimiter><col><close> +// +// For Relative references, <line> and <col> are integer offsets deltas of the +// target's (absolute) line and column relative to the line and column of the +// reference's first byte (i.e. <open>). Relative references have <sign> ('+' +// or '-') before <line> and <col>. For the special case of "0", "00", etc., +// <sign> must be "+". +// +// <open><sign><line><delimiter><sign><col><close> +// +// If a reference points outside the target either in writing or reading it is +// considered invalid and ignored. Similarly if it overflows a line. i.e. if a +// line is 10 characters long and a references targets character 11 of that +// line it is rejected. Lines are delimited with '\n' which is counted toward +// the line length. +// +// If a reference is to be written that would overwrite a '\n' character it is +// ignored as this would break all other line values. + +enum : size_t { kMaxDigitCount = 3 }; + +// Helper class for translating among offset_t, ztf::LineCol and +// ztf::DeltaLineCol. +class ZtfTranslator { + public: + ZtfTranslator(); + ZtfTranslator(const ZtfTranslator&) = delete; + const ZtfTranslator& operator=(const ZtfTranslator&) = delete; + ~ZtfTranslator(); + + // Initializes |line_starts_| with the contents of |image|. + bool Init(ConstBufferView image); + + // Checks if |lc| is a valid location in the file. + bool IsValid(ztf::LineCol lc) const; + + // Checks if |dlc| relative to |offset| is a valid location in the file. + bool IsValid(offset_t offset, ztf::DeltaLineCol dlc) const; + + // Returns the offset corresponding to |line_col| if it is valid. Otherwise + // returns |kInvalidOffset|. + offset_t LineColToOffset(ztf::LineCol line_col) const; + + // Returns the ztf::LineCol for an |offset| if it is valid. Otherwise returns + // absl::nullopt. + absl::optional<ztf::LineCol> OffsetToLineCol(offset_t offset) const; + + private: + // Returns an iterator to the range containing |offset|. Which is represented + // by the starting offset. The next element will contain the upper bound of + // the range. + std::vector<offset_t>::const_iterator SearchForRange(offset_t offset) const; + + // Returns the length of a 1-indexed line. The caller is expected to check + // that the requested line exists. + offset_t LineLength(uint16_t line) const; + + offset_t NumLines() const { + return static_cast<offset_t>(line_starts_.size() - 1); + } + + // |line_starts_| is a sorted list of each line's starting offset, along with + // the image size as the sentinel; it looks like {0, ..., image.size}. + std::vector<offset_t> line_starts_; +}; + +// Disassembler for Zucchini Text Format (ZTF). +class DisassemblerZtf : public Disassembler { + public: + // Target Pools + enum ReferencePool : uint8_t { + kAngles, // <> + kBraces, // {} + kBrackets, // [] + kParentheses // () + }; + + // Type breakdown. Should contain all permutations of ReferencePool, Abs|Rel + // and the possible number of digits (1-3). + enum ReferenceType : uint8_t { + kAnglesAbs1, + kAnglesAbs2, + kAnglesAbs3, + kAnglesRel1, + kAnglesRel2, + kAnglesRel3, + kBracesAbs1, + kBracesAbs2, + kBracesAbs3, + kBracesRel1, + kBracesRel2, + kBracesRel3, + kBracketsAbs1, + kBracketsAbs2, + kBracketsAbs3, + kBracketsRel1, + kBracketsRel2, + kBracketsRel3, + kParenthesesAbs1, + kParenthesesAbs2, + kParenthesesAbs3, + kParenthesesRel1, + kParenthesesRel2, + kParenthesesRel3, + kNumTypes + }; + + DisassemblerZtf(); + DisassemblerZtf(const DisassemblerZtf&) = delete; + const DisassemblerZtf& operator=(const DisassemblerZtf&) = delete; + ~DisassemblerZtf() override; + + // Applies quick checks to determine if |image| *may* point to the start of a + // ZTF file. Returns true on success. + static bool QuickDetect(ConstBufferView image); + + // Disassembler: + ExecutableType GetExeType() const override; + std::string GetExeTypeString() const override; + std::vector<ReferenceGroup> MakeReferenceGroups() const override; + + // Reference Readers, templated to allow configurable digit count and pool. + template <uint8_t digits, ReferencePool pool> + std::unique_ptr<ReferenceReader> MakeReadAbs(offset_t lo, offset_t hi); + template <uint8_t digits, ReferencePool pool> + std::unique_ptr<ReferenceReader> MakeReadRel(offset_t lo, offset_t hi); + + // Reference Writers, templated to allow configurable digit count and pool. + template <uint8_t digits, ReferencePool pool> + std::unique_ptr<ReferenceWriter> MakeWriteAbs(MutableBufferView image); + template <uint8_t digits, ReferencePool pool> + std::unique_ptr<ReferenceWriter> MakeWriteRel(MutableBufferView image); + + private: + friend Disassembler; + + // Disassembler: + bool Parse(ConstBufferView image) override; + + ZtfTranslator translator_; +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_DISASSEMBLER_ZTF_H_ diff --git a/disassembler_ztf_unittest.cc b/disassembler_ztf_unittest.cc new file mode 100644 index 0000000..9b53e62 --- /dev/null +++ b/disassembler_ztf_unittest.cc @@ -0,0 +1,402 @@ +// Copyright 2018 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/disassembler_ztf.h" + +#include <stddef.h> +#include <stdint.h> + +#include <algorithm> +#include <map> +#include <set> +#include <utility> +#include <vector> + +#include "base/cxx17_backports.h" +#include "base/strings/string_piece.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/element_detection.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +namespace { + +constexpr char kNormalText[] = R"(ZTxt +Hello World! +This is an example of an absolute reference <<1,1>> +And {-01,+05} is an example of a relative ref +txTZ +TRAILING DATA)"; +// -1 to exclude null byte. +constexpr size_t kNormalTextExtraBytes = base::size("TRAILING DATA") - 1; + +constexpr char kOutOfBoundsText[] = R"(ZTxt<1,1> +Hello World! +This is an example of an OOB absolute reference <890,605> +And {-050,+100} is an example of an OOB relative ref. +but [+00,+10] is valid at least. As is (1,5). +<1, 6> and { ,1} aren't nor is {4,5] +{7,6}<1,1><2,3>{+00,+00}{004,100}[+00,+60][+000,-100]<-000,-035>(-00,-00)txTZ +)"; + +// Converts a raw string into data. +std::vector<uint8_t> StrToData(base::StringPiece s) { + return std::vector<uint8_t>(s.begin(), s.end()); +} + +// Compare if |a.location < b.location| as references have unique locations. +struct ReferenceCompare { + bool operator()(const Reference& a, const Reference& b) const { + return a.location < b.location; + } +}; + +using ReferenceKey = + std::pair<DisassemblerZtf::ReferencePool, DisassemblerZtf::ReferenceType>; +using ReferenceSets = + std::map<ReferenceKey, std::set<Reference, ReferenceCompare>>; + +// Write references in |refs_to_write| to |image|. Also validate the +// disassembler parses |image| such that it is of |expected_size|. +void WriteReferences(MutableBufferView image, + size_t expected_size, + const ReferenceSets& refs_to_write) { + EXPECT_TRUE(DisassemblerZtf::QuickDetect(image)); + std::unique_ptr<DisassemblerZtf> dis = + Disassembler::Make<DisassemblerZtf>(image); + EXPECT_TRUE(dis); + EXPECT_EQ(expected_size, dis->size()); + image.shrink(dis->size()); + auto reference_groups = dis->MakeReferenceGroups(); + for (const auto& group : reference_groups) { + auto writer = group.GetWriter(image, dis.get()); + ReferenceKey key = { + static_cast<DisassemblerZtf::ReferencePool>(group.pool_tag().value()), + static_cast<DisassemblerZtf::ReferenceType>(group.type_tag().value())}; + if (!refs_to_write.count(key)) + continue; + for (const auto& ref : refs_to_write.at(key)) + writer->PutNext(ref); + } +} + +// Read references in |refs_to_read| from |image|. Once found +// the elements are removed from |refs_to_read|. Also validate the +// disassembler parses |image| such that it is of |expected_size|. +void ReadReferences(ConstBufferView image, + size_t expected_size, + ReferenceSets* refs_to_read) { + EXPECT_TRUE(DisassemblerZtf::QuickDetect(image)); + std::unique_ptr<DisassemblerZtf> dis = + Disassembler::Make<DisassemblerZtf>(image); + EXPECT_TRUE(dis); + EXPECT_EQ(expected_size, dis->size()); + auto reference_groups = dis->MakeReferenceGroups(); + for (const auto& group : reference_groups) { + auto reader = group.GetReader(dis.get()); + ReferenceKey key = { + static_cast<DisassemblerZtf::ReferencePool>(group.pool_tag().value()), + static_cast<DisassemblerZtf::ReferenceType>(group.type_tag().value())}; + if (!refs_to_read->count(key)) { + // No elements of this pool/type pair are expected so assert that none are + // found. + auto ref = reader->GetNext(); + EXPECT_FALSE(ref.has_value()); + continue; + } + // For each reference remove it from the set if it exists, error if + // unexpected references are found. + for (auto ref = reader->GetNext(); ref.has_value(); + ref = reader->GetNext()) { + EXPECT_EQ(1UL, refs_to_read->at(key).erase(ref.value())); + } + EXPECT_EQ(0U, refs_to_read->at(key).size()); + } +} + +void TestTranslation(const ZtfTranslator& translator, + offset_t expected_location, + ztf::LineCol lc) { + // Check the lc is translated to the expected location. + EXPECT_EQ(expected_location, translator.LineColToOffset(lc)); + auto new_lc = translator.OffsetToLineCol(expected_location); + if (expected_location == kInvalidOffset) { + EXPECT_FALSE(translator.IsValid(lc)); + EXPECT_FALSE(new_lc.has_value()); + } else { + EXPECT_TRUE(translator.IsValid(lc)); + // Check that the reverse is true. |ztf::LineCol{0, 0}| is a sentinel and + // should never be valid. + EXPECT_EQ(lc.line, new_lc->line); + EXPECT_EQ(lc.col, new_lc->col); + } +} + +template <typename T> +size_t CountDistinct(const std::vector<T>& v) { + return std::set<T>(v.begin(), v.end()).size(); +} + +} // namespace + +TEST(ZtfTranslatorTest, Translate) { + ztf::dim_t kMaxVal = INT16_MAX; + ztf::dim_t kMinVal = INT16_MIN; + + const std::vector<uint8_t> text(StrToData(kOutOfBoundsText)); + ConstBufferView image(text.data(), text.size()); + ZtfTranslator translator; + EXPECT_TRUE(translator.Init(image)); + + // Absolute Translations: + + // Check a bunch of invalid locations. + TestTranslation(translator, kInvalidOffset, ztf::LineCol{50, 60}); + TestTranslation(translator, kInvalidOffset, ztf::LineCol{0, 0}); + TestTranslation(translator, kInvalidOffset, ztf::LineCol{1, 0}); + TestTranslation(translator, kInvalidOffset, ztf::LineCol{0, 1}); + TestTranslation(translator, kInvalidOffset, ztf::LineCol{0, 1}); + TestTranslation(translator, kInvalidOffset, ztf::LineCol{1, -1}); + TestTranslation(translator, kInvalidOffset, ztf::LineCol{-1, 1}); + TestTranslation(translator, kInvalidOffset, ztf::LineCol{-1, -1}); + TestTranslation(translator, kInvalidOffset, ztf::LineCol{1, kMaxVal}); + TestTranslation(translator, kInvalidOffset, ztf::LineCol{kMaxVal, 1}); + TestTranslation(translator, kInvalidOffset, ztf::LineCol{1, kMinVal}); + TestTranslation(translator, kInvalidOffset, ztf::LineCol{kMinVal, 1}); + + // Check the start of the file. + TestTranslation(translator, 0, ztf::LineCol{1, 1}); + TestTranslation(translator, 1, ztf::LineCol{1, 2}); + + // Check the boundary around a newline. + TestTranslation(translator, 9, ztf::LineCol{1, 10}); + TestTranslation(translator, kInvalidOffset, ztf::LineCol{1, 11}); + TestTranslation(translator, 10, ztf::LineCol{2, 1}); + TestTranslation(translator, kInvalidOffset, ztf::LineCol{2, 0}); + + // Check the end of the file. + TestTranslation(translator, kInvalidOffset, ztf::LineCol{8, 1}); + TestTranslation(translator, kInvalidOffset, ztf::LineCol{7, 79}); + // Need to subtract to account for the newline. + TestTranslation(translator, text.size() - 1, ztf::LineCol{7, 78}); + TestTranslation(translator, text.size() - 2, ztf::LineCol{7, 77}); + + // Delta Validity + // - Reminder! 0 -> 1:1 + + // Common possible edge cases. + EXPECT_TRUE(translator.IsValid(0, ztf::DeltaLineCol{0, 0})); + EXPECT_TRUE(translator.IsValid(0, ztf::DeltaLineCol{0, 1})); + EXPECT_TRUE(translator.IsValid(0, ztf::DeltaLineCol{1, 0})); + EXPECT_FALSE(translator.IsValid(0, ztf::DeltaLineCol{-1, -1})); + EXPECT_FALSE(translator.IsValid(0, ztf::DeltaLineCol{-1, 0})); + EXPECT_FALSE(translator.IsValid(0, ztf::DeltaLineCol{0, -1})); + EXPECT_FALSE(translator.IsValid(0, ztf::DeltaLineCol{0, -1})); + EXPECT_FALSE(translator.IsValid(0, ztf::DeltaLineCol{0, kMaxVal})); + EXPECT_FALSE(translator.IsValid(0, ztf::DeltaLineCol{kMaxVal, 0})); + EXPECT_FALSE(translator.IsValid(0, ztf::DeltaLineCol{0, kMinVal})); + EXPECT_FALSE(translator.IsValid(0, ztf::DeltaLineCol{kMinVal, 0})); + EXPECT_FALSE(translator.IsValid(233, ztf::DeltaLineCol{0, kMaxVal})); + EXPECT_FALSE(translator.IsValid(233, ztf::DeltaLineCol{kMaxVal, 0})); + EXPECT_FALSE(translator.IsValid(233, ztf::DeltaLineCol{kMaxVal, kMaxVal})); + + // Newline area. + EXPECT_TRUE(translator.IsValid(0, ztf::DeltaLineCol{0, 9})); + EXPECT_FALSE(translator.IsValid(0, ztf::DeltaLineCol{0, 10})); + EXPECT_FALSE(translator.IsValid(9, ztf::DeltaLineCol{0, 1})); + EXPECT_FALSE(translator.IsValid(9, ztf::DeltaLineCol{-1, 0})); + EXPECT_FALSE(translator.IsValid(9, ztf::DeltaLineCol{1, -10})); + EXPECT_TRUE(translator.IsValid(9, ztf::DeltaLineCol{1, -9})); + + // End of file. + EXPECT_FALSE(translator.IsValid(0, ztf::DeltaLineCol{7, 78})); + EXPECT_FALSE(translator.IsValid(0, ztf::DeltaLineCol{7, 77})); + EXPECT_FALSE(translator.IsValid(0, ztf::DeltaLineCol{6, 78})); + EXPECT_TRUE(translator.IsValid(0, ztf::DeltaLineCol{6, 77})); + EXPECT_FALSE(translator.IsValid(text.size() - 1, ztf::DeltaLineCol{0, 1})); + EXPECT_FALSE(translator.IsValid(text.size() - 1, ztf::DeltaLineCol{1, 0})); + EXPECT_TRUE(translator.IsValid(text.size() - 2, ztf::DeltaLineCol{0, 1})); + EXPECT_FALSE(translator.IsValid(text.size() - 2, ztf::DeltaLineCol{1, 0})); +} + +// Ensures that ReferenceGroups from DisassemblerZtf::MakeReferenceGroups() +// cover each non-sentinel element in ReferenceType in order, exactly once. Also +// ensures that the ReferenceType elements are grouped by ReferencePool, and +// listed in increasing order. +TEST(DisassemblerZtfTest, ReferenceGroups) { + std::vector<uint32_t> pool_list; + std::vector<uint32_t> type_list; + DisassemblerZtf dis; + for (ReferenceGroup group : dis.MakeReferenceGroups()) { + pool_list.push_back(static_cast<uint32_t>(group.pool_tag().value())); + type_list.push_back(static_cast<uint32_t>(group.type_tag().value())); + } + + // Check ReferenceByte coverage. + constexpr size_t kNumTypes = DisassemblerZtf::kNumTypes; + EXPECT_EQ(kNumTypes, type_list.size()); + EXPECT_EQ(kNumTypes, CountDistinct(type_list)); + EXPECT_TRUE(std::is_sorted(type_list.begin(), type_list.end())); + + // Check that ReferenceType elements are grouped by ReferencePool. Note that + // repeats can occur, and pools can be skipped. + EXPECT_TRUE(std::is_sorted(pool_list.begin(), pool_list.end())); +} + +TEST(DisassemblerZtfTest, BadMagic) { + // Test a case where there is no header so a disassembler cannot be created. + { + const std::vector<uint8_t> text(StrToData("foobarbaz bazbarfoo")); + ConstBufferView image(text.data(), text.size()); + EXPECT_FALSE(DisassemblerZtf::QuickDetect(image)); + EXPECT_FALSE(Disassembler::Make<DisassemblerZtf>(image)); + } + // Test a case where there is no footer so a disassembler cannot be created. + { + const std::vector<uint8_t> text(StrToData("ZTxtfoobarbaz bazbarfootxTZ")); + ConstBufferView image(text.data(), text.size()); + EXPECT_TRUE(DisassemblerZtf::QuickDetect(image)); + EXPECT_FALSE(Disassembler::Make<DisassemblerZtf>(image)); + } + // Test when the header is too short + { + const std::vector<uint8_t> text(StrToData("ZTxtxTZ\n")); + ConstBufferView image(text.data(), text.size()); + EXPECT_FALSE(DisassemblerZtf::QuickDetect(image)); + EXPECT_FALSE(Disassembler::Make<DisassemblerZtf>(image)); + } +} + +TEST(DisassemblerZtfTest, ZtfSizeBound) { + { + std::vector<uint8_t> text(StrToData("ZTxt")); + std::fill_n(std::back_inserter(text), ztf::kMaxDimValue - 2, '\n'); + text.insert(text.end(), {'t', 'x', 'T', 'Z', '\n'}); + ConstBufferView image(text.data(), text.size()); + EXPECT_TRUE(DisassemblerZtf::QuickDetect(image)); + EXPECT_TRUE(Disassembler::Make<DisassemblerZtf>(image)); + } + { + std::vector<uint8_t> text(StrToData("ZTxt")); + std::fill_n(std::back_inserter(text), ztf::kMaxDimValue - 1, '\n'); + text.insert(text.end(), {'t', 'x', 'T', 'Z', '\n'}); + ConstBufferView image(text.data(), text.size()); + EXPECT_TRUE(DisassemblerZtf::QuickDetect(image)); + EXPECT_FALSE(Disassembler::Make<DisassemblerZtf>(image)); + } +} + +// Try reading from a well formed source. +TEST(DisassemblerZtfTest, NormalRead) { + const std::vector<uint8_t> text(StrToData(kNormalText)); + ConstBufferView image(text.data(), text.size()); + ReferenceSets expected_map = { + {{DisassemblerZtf::kAngles, DisassemblerZtf::kAnglesAbs1}, + {Reference({63, 0})}}, + {{DisassemblerZtf::kBraces, DisassemblerZtf::kBracesRel2}, + {Reference({74, 27})}}, + }; + ReadReferences(image, text.size() - kNormalTextExtraBytes, &expected_map); +} + +// Try writing to a well formed source and ensure that what is read back +// reflects what was written. +TEST(DisassemblerZtfTest, NormalWrite) { + std::vector<uint8_t> mutable_text(StrToData(kNormalText)); + MutableBufferView image(mutable_text.data(), mutable_text.size()); + ReferenceSets change_map = { + {{DisassemblerZtf::kParentheses, DisassemblerZtf::kParenthesesAbs1}, + {Reference({63, 71})}}, + {{DisassemblerZtf::kBrackets, DisassemblerZtf::kBracketsRel3}, + {Reference({74, 4})}}, + }; + WriteReferences(image, mutable_text.size() - kNormalTextExtraBytes, + change_map); + + // As a sanity check see if a disassembler can identify the same references. + ConstBufferView const_image(image); + ReadReferences(const_image, mutable_text.size() - kNormalTextExtraBytes, + &change_map); +} + +// Try reading from a source rife with errors. +TEST(DisassemblerZtfTest, ReadOutOfBoundsRefs) { + const std::vector<uint8_t> text(StrToData(kOutOfBoundsText)); + ConstBufferView image(text.data(), text.size()); + ReferenceSets expected_map = { + {{DisassemblerZtf::kAngles, DisassemblerZtf::kAnglesAbs1}, + {Reference({4, 0}), Reference({223, 0}), Reference({228, 12})}}, + {{DisassemblerZtf::kBrackets, DisassemblerZtf::kBracketsRel2}, + {Reference({139, 149})}}, + {{DisassemblerZtf::kBraces, DisassemblerZtf::kBracesAbs1}, + {Reference({218, 223})}}, + {{DisassemblerZtf::kBraces, DisassemblerZtf::kBracesRel2}, + {Reference({233, 233})}}, + {{DisassemblerZtf::kParentheses, DisassemblerZtf::kParenthesesAbs1}, + {Reference({174, 4})}}, + }; + ReadReferences(image, text.size(), &expected_map); +} + +// Try writing to a source rife with errors (malformed references or ones that +// reference non-existent locations. Some of the values written are also bad. To +// validate check if the expected set of references are read back. +TEST(DisassemblerZtfTest, WriteOutOfBoundsRefs) { + // Replace |old_val| (provided for checking) with |new_val| in |set|. + auto update_set = [](Reference old_ref, Reference new_ref, + std::set<Reference, ReferenceCompare>* set) { + auto it = set->find(old_ref); + EXPECT_NE(it, set->cend()); + EXPECT_EQ(*it, old_ref); + set->erase(it); + set->insert(new_ref); + }; + + // Replace |old_val| (provided for checking) with |new_val| in the set which + // is the value corresponding to |key| in |map|. + auto update_map = + [update_set]( + ReferenceKey key, Reference old_ref, Reference new_ref, + std::map<ReferenceKey, std::set<Reference, ReferenceCompare>>* map) { + auto it = map->find(key); + EXPECT_NE(it, map->cend()); + update_set(old_ref, new_ref, &(it->second)); + }; + + std::vector<uint8_t> mutable_text(StrToData(kOutOfBoundsText)); + MutableBufferView image(mutable_text.data(), mutable_text.size()); + ReferenceSets change_map = { + {{DisassemblerZtf::kAngles, DisassemblerZtf::kAnglesAbs1}, + {Reference({223, 15}), Reference({228, 13})}}, + {{DisassemblerZtf::kAngles, DisassemblerZtf::kAnglesAbs3}, + {Reference({4, 50})}}, // This should fail to write. + {{DisassemblerZtf::kBrackets, DisassemblerZtf::kBracketsRel2}, + {Reference({139, static_cast<offset_t>( + mutable_text.size())})}}, // This should fail. + {{DisassemblerZtf::kParentheses, DisassemblerZtf::kParenthesesAbs1}, + {Reference({174, 21})}}, // This should fail. + {{DisassemblerZtf::kBraces, DisassemblerZtf::kBracesAbs1}, + {Reference({218, 219})}}, + {{DisassemblerZtf::kBraces, DisassemblerZtf::kBracesRel2}, + {Reference({233, 174})}}, + }; + WriteReferences(image, mutable_text.size(), change_map); + + // As a sanity check see if a disassembler can identify the same references + // (excluding the invalid ones). + change_map.erase(change_map.find( + {DisassemblerZtf::kAngles, DisassemblerZtf::kAnglesAbs3})); + change_map.at({DisassemblerZtf::kAngles, DisassemblerZtf::kAnglesAbs1}) + .emplace(Reference{4, 0}); + update_map({DisassemblerZtf::kBrackets, DisassemblerZtf::kBracketsRel2}, + Reference({139, static_cast<offset_t>(mutable_text.size())}), + Reference({139, 149}), &change_map); + update_map({DisassemblerZtf::kParentheses, DisassemblerZtf::kParenthesesAbs1}, + Reference({174, 21}), Reference({174, 4}), &change_map); + ConstBufferView const_image(image); + ReadReferences(const_image, mutable_text.size(), &change_map); +} + +} // namespace zucchini diff --git a/element_detection.cc b/element_detection.cc new file mode 100644 index 0000000..356c0d7 --- /dev/null +++ b/element_detection.cc @@ -0,0 +1,165 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/element_detection.h" + +#include <utility> + +#include "components/zucchini/buildflags.h" +#include "components/zucchini/disassembler.h" +#include "components/zucchini/disassembler_no_op.h" + +#if BUILDFLAG(ENABLE_DEX) +#include "components/zucchini/disassembler_dex.h" +#endif // BUILDFLAG(ENABLE_DEX) + +#if BUILDFLAG(ENABLE_ELF) +#include "components/zucchini/disassembler_elf.h" +#endif // BUILDFLAG(ENABLE_ELF) + +#if BUILDFLAG(ENABLE_WIN) +#include "components/zucchini/disassembler_win32.h" +#endif // BUILDFLAG(ENABLE_WIN) + +#if BUILDFLAG(ENABLE_ZTF) +#include "components/zucchini/disassembler_ztf.h" +#endif // BUILDFLAG(ENABLE_ZTF) + +namespace zucchini { + +namespace { + +// Impose a minimal program size to eliminate pathological cases. +enum : size_t { kMinProgramSize = 16 }; + +} // namespace + +/******** Utility Functions ********/ + +std::unique_ptr<Disassembler> MakeDisassemblerWithoutFallback( + ConstBufferView image) { +#if BUILDFLAG(ENABLE_WIN) + if (DisassemblerWin32X86::QuickDetect(image)) { + auto disasm = Disassembler::Make<DisassemblerWin32X86>(image); + if (disasm && disasm->size() >= kMinProgramSize) + return disasm; + } + + if (DisassemblerWin32X64::QuickDetect(image)) { + auto disasm = Disassembler::Make<DisassemblerWin32X64>(image); + if (disasm && disasm->size() >= kMinProgramSize) + return disasm; + } +#endif // BUILDFLAG(ENABLE_WIN) + +#if BUILDFLAG(ENABLE_ELF) + if (DisassemblerElfX86::QuickDetect(image)) { + auto disasm = Disassembler::Make<DisassemblerElfX86>(image); + if (disasm && disasm->size() >= kMinProgramSize) + return disasm; + } + + if (DisassemblerElfX64::QuickDetect(image)) { + auto disasm = Disassembler::Make<DisassemblerElfX64>(image); + if (disasm && disasm->size() >= kMinProgramSize) + return disasm; + } + + if (DisassemblerElfAArch32::QuickDetect(image)) { + auto disasm = Disassembler::Make<DisassemblerElfAArch32>(image); + if (disasm && disasm->size() >= kMinProgramSize) + return disasm; + } + + if (DisassemblerElfAArch64::QuickDetect(image)) { + auto disasm = Disassembler::Make<DisassemblerElfAArch64>(image); + if (disasm && disasm->size() >= kMinProgramSize) + return disasm; + } +#endif // BUILDFLAG(ENABLE_ELF) + +#if BUILDFLAG(ENABLE_DEX) + if (DisassemblerDex::QuickDetect(image)) { + auto disasm = Disassembler::Make<DisassemblerDex>(image); + if (disasm && disasm->size() >= kMinProgramSize) + return disasm; + } +#endif // BUILDFLAG(ENABLE_DEX) + +#if BUILDFLAG(ENABLE_ZTF) + if (DisassemblerZtf::QuickDetect(image)) { + // This disallows very short examples like "ZTxtxtZ\n" in ensemble patching. + auto disasm = Disassembler::Make<DisassemblerZtf>(image); + if (disasm && disasm->size() >= kMinProgramSize) + return disasm; + } +#endif // BUILDFLAG(ENABLE_ZTF) + + return nullptr; +} + +std::unique_ptr<Disassembler> MakeDisassemblerOfType(ConstBufferView image, + ExecutableType exe_type) { + switch (exe_type) { +#if BUILDFLAG(ENABLE_WIN) + case kExeTypeWin32X86: + return Disassembler::Make<DisassemblerWin32X86>(image); + case kExeTypeWin32X64: + return Disassembler::Make<DisassemblerWin32X64>(image); +#endif // BUILDFLAG(ENABLE_WIN) +#if BUILDFLAG(ENABLE_ELF) + case kExeTypeElfX86: + return Disassembler::Make<DisassemblerElfX86>(image); + case kExeTypeElfX64: + return Disassembler::Make<DisassemblerElfX64>(image); + case kExeTypeElfAArch32: + return Disassembler::Make<DisassemblerElfAArch32>(image); + case kExeTypeElfAArch64: + return Disassembler::Make<DisassemblerElfAArch64>(image); +#endif // BUILDFLAG(ENABLE_ELF) +#if BUILDFLAG(ENABLE_DEX) + case kExeTypeDex: + return Disassembler::Make<DisassemblerDex>(image); +#endif // BUILDFLAG(ENABLE_DEX) +#if BUILDFLAG(ENABLE_ZTF) + case kExeTypeZtf: + return Disassembler::Make<DisassemblerZtf>(image); +#endif // BUILDFLAG(ENABLE_ZTF) + case kExeTypeNoOp: + return Disassembler::Make<DisassemblerNoOp>(image); + default: + // If an architecture is disabled then null is handled gracefully. + return nullptr; + } +} + +absl::optional<Element> DetectElementFromDisassembler(ConstBufferView image) { + std::unique_ptr<Disassembler> disasm = MakeDisassemblerWithoutFallback(image); + if (disasm) + return Element({0, disasm->size()}, disasm->GetExeType()); + return absl::nullopt; +} + +/******** ProgramScanner ********/ + +ElementFinder::ElementFinder(ConstBufferView image, ElementDetector&& detector) + : image_(image), detector_(std::move(detector)) {} + +ElementFinder::~ElementFinder() = default; + +absl::optional<Element> ElementFinder::GetNext() { + for (; pos_ < image_.size(); ++pos_) { + ConstBufferView test_image = + ConstBufferView::FromRange(image_.begin() + pos_, image_.end()); + absl::optional<Element> element = detector_.Run(test_image); + if (element) { + element->offset += pos_; + pos_ = element->EndOffset(); + return element; + } + } + return absl::nullopt; +} + +} // namespace zucchini diff --git a/element_detection.h b/element_detection.h new file mode 100644 index 0000000..856ec27 --- /dev/null +++ b/element_detection.h @@ -0,0 +1,59 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_ELEMENT_DETECTION_H_ +#define COMPONENTS_ZUCCHINI_ELEMENT_DETECTION_H_ + +#include <stddef.h> + +#include <memory> + +#include "base/callback.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/image_utils.h" +#include "third_party/abseil-cpp/absl/types/optional.h" + +namespace zucchini { + +class Disassembler; + +// Attempts to detect an executable located at start of |image|. If found, +// returns the corresponding disassembler. Otherwise returns null. +std::unique_ptr<Disassembler> MakeDisassemblerWithoutFallback( + ConstBufferView image); + +// Attempts to create a disassembler corresponding to |exe_type| and initialize +// it with |image|, On failure, returns null. +std::unique_ptr<Disassembler> MakeDisassemblerOfType(ConstBufferView image, + ExecutableType exe_type); + +// Attempts to detect an element associated with |image| and returns it, or +// returns nullopt if no element is detected. +using ElementDetector = + base::RepeatingCallback<absl::optional<Element>(ConstBufferView image)>; + +// Implementation of ElementDetector using disassemblers. +absl::optional<Element> DetectElementFromDisassembler(ConstBufferView image); + +// A class to scan through an image and iteratively detect elements. +class ElementFinder { + public: + ElementFinder(ConstBufferView image, ElementDetector&& detector); + ElementFinder(const ElementFinder&) = delete; + const ElementFinder& operator=(const ElementFinder&) = delete; + ~ElementFinder(); + + // Scans for the next executable using |detector|. Returns the next element + // found, or nullopt if no more element can be found. + absl::optional<Element> GetNext(); + + private: + ConstBufferView image_; + ElementDetector detector_; + offset_t pos_ = 0; +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_ELEMENT_DETECTION_H_ diff --git a/element_detection_unittest.cc b/element_detection_unittest.cc new file mode 100644 index 0000000..319a88a --- /dev/null +++ b/element_detection_unittest.cc @@ -0,0 +1,102 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/element_detection.h" + +#include <map> +#include <vector> + +#include "base/bind.h" +#include "components/zucchini/buffer_view.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { +namespace { +// This test uses a mock archive format where regions are determined by their +// consecutive byte values rather than parsing real executables. +// +// 0 - Padding or raw data (not mapped to an executable). +// 1 - A Win32x86 executable. +// 2 - A Win32x64 executable. +// +// So an example archive file of; +// 0 1 1 1 0 1 1 0 0 2 2 2 2 +// contains (in order left to right): +// - One padding byte +// - Three byte Win32x86 executable +// - One padding byte +// - Two byte Win32x86 executable +// - Two padding bytes +// - Four byte Win32x64 executable + +class ElementDetectionTest : public ::testing::Test { + protected: + using ElementVector = std::vector<Element>; + using ExeTypeMap = std::map<uint8_t, ExecutableType>; + + ElementDetectionTest() + : exe_map_({{1, kExeTypeWin32X86}, {2, kExeTypeWin32X64}}) {} + + ElementVector TestElementFinder(std::vector<uint8_t> buffer) { + ConstBufferView image(buffer.data(), buffer.size()); + + ElementFinder finder( + image, + base::BindRepeating( + [](ExeTypeMap exe_map, ConstBufferView image, + ConstBufferView region) -> absl::optional<Element> { + EXPECT_GE(region.begin(), image.begin()); + EXPECT_LE(region.end(), image.end()); + EXPECT_GE(region.size(), 0U); + + if (region[0] != 0) { + offset_t length = 1; + while (length < region.size() && region[length] == region[0]) + ++length; + return Element{{0, length}, exe_map[region[0]]}; + } + return absl::nullopt; + }, + exe_map_, image)); + std::vector<Element> elements; + for (auto element = finder.GetNext(); element; element = finder.GetNext()) { + elements.push_back(*element); + } + return elements; + } + + // Translation map from mock archive bytes to actual types used in Zucchini. + ExeTypeMap exe_map_; +}; + +TEST_F(ElementDetectionTest, ElementFinderEmpty) { + std::vector<uint8_t> buffer(10, 0); + ElementFinder finder( + ConstBufferView(buffer.data(), buffer.size()), + base::BindRepeating([](ConstBufferView image) -> absl::optional<Element> { + return absl::nullopt; + })); + EXPECT_EQ(absl::nullopt, finder.GetNext()); +} + +TEST_F(ElementDetectionTest, ElementFinder) { + EXPECT_EQ(ElementVector(), TestElementFinder({})); + EXPECT_EQ(ElementVector(), TestElementFinder({0, 0})); + EXPECT_EQ(ElementVector({{{0, 2}, kExeTypeWin32X86}}), + TestElementFinder({1, 1})); + EXPECT_EQ( + ElementVector({{{0, 2}, kExeTypeWin32X86}, {{2, 2}, kExeTypeWin32X64}}), + TestElementFinder({1, 1, 2, 2})); + EXPECT_EQ(ElementVector({{{1, 2}, kExeTypeWin32X86}}), + TestElementFinder({0, 1, 1, 0})); + EXPECT_EQ( + ElementVector({{{1, 2}, kExeTypeWin32X86}, {{3, 3}, kExeTypeWin32X64}}), + TestElementFinder({0, 1, 1, 2, 2, 2})); + EXPECT_EQ( + ElementVector({{{1, 2}, kExeTypeWin32X86}, {{4, 3}, kExeTypeWin32X64}}), + TestElementFinder({0, 1, 1, 0, 2, 2, 2})); +} + +} // namespace +} // namespace zucchini diff --git a/encoded_view.cc b/encoded_view.cc new file mode 100644 index 0000000..205603f --- /dev/null +++ b/encoded_view.cc @@ -0,0 +1,78 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/encoded_view.h" + +#include <algorithm> +#include <utility> + +#include "base/check_op.h" + +namespace zucchini { + +EncodedView::EncodedView(const ImageIndex& image_index) + : image_index_(image_index), pool_infos_(image_index.PoolCount()) {} +EncodedView::~EncodedView() = default; + +EncodedView::value_type EncodedView::Projection(offset_t location) const { + DCHECK_LT(location, image_index_.size()); + + // Find out what lies at |location|. + TypeTag type = image_index_.LookupType(location); + + // |location| points into raw data. + if (type == kNoTypeTag) { + // The projection is the identity function on raw content. + return image_index_.GetRawValue(location); + } + + // |location| points into a Reference. + const ReferenceSet& ref_set = image_index_.refs(type); + Reference ref = ref_set.at(location); + DCHECK_GE(location, ref.location); + DCHECK_LT(location, ref.location + ref_set.width()); + + // |location| is not the first byte of the reference. + if (location != ref.location) { + // Trailing bytes of a reference are all projected to the same value. + return kReferencePaddingProjection; + } + + PoolTag pool_tag = ref_set.pool_tag(); + const auto& target_pool = ref_set.target_pool(); + + // Targets with an associated Label will use its Label index in projection. + DCHECK_EQ(target_pool.size(), pool_infos_[pool_tag.value()].labels.size()); + uint32_t label = pool_infos_[pool_tag.value()] + .labels[target_pool.KeyForOffset(ref.target)]; + + // Projection is done on (|target|, |type|), shifted by + // kBaseReferenceProjection to avoid collisions with raw content. + value_type projection = label; + projection *= image_index_.TypeCount(); + projection += type.value(); + return projection + kBaseReferenceProjection; +} + +size_t EncodedView::Cardinality() const { + size_t max_width = 0; + for (const auto& pool_info : pool_infos_) + max_width = std::max(max_width, pool_info.bound); + return max_width * image_index_.TypeCount() + kBaseReferenceProjection; +} + +void EncodedView::SetLabels(PoolTag pool, + std::vector<uint32_t>&& labels, + size_t bound) { + DCHECK_EQ(labels.size(), image_index_.pool(pool).size()); + DCHECK(labels.empty() || *max_element(labels.begin(), labels.end()) < bound); + pool_infos_[pool.value()].labels = std::move(labels); + pool_infos_[pool.value()].bound = bound; +} + +EncodedView::PoolInfo::PoolInfo() = default; +EncodedView::PoolInfo::PoolInfo(PoolInfo&&) = default; +EncodedView::PoolInfo::~PoolInfo() = default; + +} // namespace zucchini diff --git a/encoded_view.h b/encoded_view.h new file mode 100644 index 0000000..864d265 --- /dev/null +++ b/encoded_view.h @@ -0,0 +1,185 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_ENCODED_VIEW_H_ +#define COMPONENTS_ZUCCHINI_ENCODED_VIEW_H_ + +#include <stddef.h> +#include <stdint.h> + +#include <iterator> +#include <vector> + +#include "components/zucchini/image_index.h" +#include "components/zucchini/image_utils.h" + +namespace zucchini { + +// Zucchini-gen performs semantics-aware matching: +// - Same-typed reference target in "old" and "new" can be associated. +// Associated targets are assigned an identifier called "label" (and for +// unassociated targets, label = 0). +// - EncodedView maps each offset in "old" and "new" images to a "projected +// value", which can be: +// - Raw byte value (0-255) for non-references. +// - Reference "projected value" (> 256) that depends on target {type, label} +// at each reference's location (byte 0). +// - Reference padding value (256) at the body of each reference (bytes 1+). +// - The projected values for "old" and "new" are used to build the equivalence +// map. + +constexpr size_t kReferencePaddingProjection = 256; +constexpr size_t kBaseReferenceProjection = 257; + +// A Range (providing begin and end iterators) that adapts ImageIndex to make +// image data appear as an Encoded Image, that is encoded data under a higher +// level of abstraction than raw bytes. In particular: +// - First byte of each reference become a projection of its type and label. +// - Subsequent bytes of each reference becomes |kReferencePaddingProjection|. +// - Non-reference raw bytes remain as raw bytes. +class EncodedView { + public: + // RandomAccessIterator whose values are the results of Projection(). + class Iterator { + public: + using iterator_category = std::random_access_iterator_tag; + using value_type = size_t; + using difference_type = ptrdiff_t; + using reference = size_t; + using pointer = size_t*; + + Iterator(const EncodedView* encoded_view, difference_type pos) + : encoded_view_(encoded_view), pos_(pos) {} + + Iterator(const Iterator&) = default; + + Iterator& operator=(const Iterator&) = default; + + value_type operator*() const { + return encoded_view_->Projection(static_cast<offset_t>(pos_)); + } + + value_type operator[](difference_type n) const { + return encoded_view_->Projection(static_cast<offset_t>(pos_ + n)); + } + + Iterator& operator++() { + ++pos_; + return *this; + } + + Iterator operator++(int) { + Iterator tmp = *this; + ++pos_; + return tmp; + } + + Iterator& operator--() { + --pos_; + return *this; + } + + Iterator operator--(int) { + Iterator tmp = *this; + --pos_; + return tmp; + } + + Iterator& operator+=(difference_type n) { + pos_ += n; + return *this; + } + + Iterator& operator-=(difference_type n) { + pos_ -= n; + return *this; + } + + friend bool operator==(Iterator a, Iterator b) { return a.pos_ == b.pos_; } + + friend bool operator!=(Iterator a, Iterator b) { return !(a == b); } + + friend bool operator<(Iterator a, Iterator b) { return a.pos_ < b.pos_; } + + friend bool operator>(Iterator a, Iterator b) { return b < a; } + + friend bool operator<=(Iterator a, Iterator b) { return !(b < a); } + + friend bool operator>=(Iterator a, Iterator b) { return !(a < b); } + + friend difference_type operator-(Iterator a, Iterator b) { + return a.pos_ - b.pos_; + } + + friend Iterator operator+(Iterator it, difference_type n) { + it += n; + return it; + } + + friend Iterator operator-(Iterator it, difference_type n) { + it -= n; + return it; + } + + private: + const EncodedView* encoded_view_; + difference_type pos_; + }; + + using value_type = size_t; + using size_type = offset_t; + using difference_type = ptrdiff_t; + using const_iterator = Iterator; + + // |image_index| is the annotated image being adapted, and is required to + // remain valid for the lifetime of the object. + explicit EncodedView(const ImageIndex& image_index); + EncodedView(const EncodedView&) = delete; + const EncodedView& operator=(const EncodedView&) = delete; + ~EncodedView(); + + // Projects |location| to a scalar value that describes the content at a + // higher level of abstraction. + value_type Projection(offset_t location) const; + + bool IsToken(offset_t location) const { + return image_index_.IsToken(location); + } + + // Returns the cardinality of the projection, i.e., the upper bound on + // values returned by Projection(). + value_type Cardinality() const; + + // Associates |labels| to targets for a given |pool|, replacing previous + // association. Values in |labels| must be smaller than |bound|. + void SetLabels(PoolTag pool, std::vector<uint32_t>&& labels, size_t bound); + const ImageIndex& image_index() const { return image_index_; } + + // Range functions. + size_type size() const { return size_type(image_index_.size()); } + const_iterator begin() const { + return const_iterator{this, difference_type(0)}; + } + const_iterator end() const { + return const_iterator{this, difference_type(size())}; + } + + private: + struct PoolInfo { + PoolInfo(); + PoolInfo(PoolInfo&&); + ~PoolInfo(); + + // |labels| translates IndirectReference target_key to label. + std::vector<uint32_t> labels; + size_t bound = 0; + }; + + const ImageIndex& image_index_; + std::vector<PoolInfo> pool_infos_; +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_ENCODED_VIEW_H_ diff --git a/encoded_view_unittest.cc b/encoded_view_unittest.cc new file mode 100644 index 0000000..96d9dc4 --- /dev/null +++ b/encoded_view_unittest.cc @@ -0,0 +1,202 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/encoded_view.h" + +#include <iterator> +#include <numeric> +#include <vector> + +#include "components/zucchini/image_index.h" +#include "components/zucchini/test_disassembler.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +namespace { + +constexpr size_t PADDING = kReferencePaddingProjection; + +template <class It1, class It2> +void TestInputIterator(It1 first_expected, + It1 last_expected, + It2 first_input, + It2 last_input) { + while (first_expected != last_expected && first_input != last_input) { + EXPECT_EQ(*first_expected, *first_input); + ++first_expected; + ++first_input; + } + EXPECT_EQ(last_input, first_input); + EXPECT_EQ(last_expected, first_expected); +} + +template <class It1, class It2> +void TestForwardIterator(It1 first_expected, + It1 last_expected, + It2 first_input, + It2 last_input) { + TestInputIterator(first_expected, last_expected, first_input, last_input); + + while (first_expected != last_expected && first_input != last_input) { + EXPECT_EQ(*(first_expected++), *(first_input++)); + } + EXPECT_EQ(last_input, first_input); + EXPECT_EQ(last_expected, first_expected); +} + +template <class It1, class It2> +void TestBidirectionalIterator(It1 first_expected, + It1 last_expected, + It2 first_input, + It2 last_input) { + TestForwardIterator(first_expected, last_expected, first_input, last_input); + + while (first_expected != last_expected && first_input != last_input) { + EXPECT_EQ(*(--last_expected), *(--last_input)); + } + EXPECT_EQ(last_input, first_input); + EXPECT_EQ(last_expected, first_expected); +} + +template <class It1, class It2> +void TestRandomAccessIterator(It1 first_expected, + It1 last_expected, + It2 first_input, + It2 last_input) { + TestBidirectionalIterator(first_expected, last_expected, first_input, + last_input); + + using difference_type = typename std::iterator_traits<It1>::difference_type; + + difference_type expected_size = last_expected - first_expected; + difference_type input_size = last_input - first_input; + EXPECT_EQ(expected_size, input_size); + + for (difference_type i = 0; i < expected_size; ++i) { + EXPECT_EQ(*(first_expected + i), *(first_input + i)); + EXPECT_EQ(first_expected[i], first_input[i]); + + EXPECT_EQ(0 < i, first_input < first_input + i); + EXPECT_EQ(0 > i, first_input > first_input + i); + EXPECT_EQ(0 <= i, first_input <= first_input + i); + EXPECT_EQ(0 >= i, first_input >= first_input + i); + + EXPECT_EQ(expected_size < i, last_input < first_input + i); + EXPECT_EQ(expected_size > i, last_input > first_input + i); + EXPECT_EQ(expected_size <= i, last_input <= first_input + i); + EXPECT_EQ(expected_size >= i, last_input >= first_input + i); + + It2 input = first_input; + input += i; + EXPECT_EQ(*input, first_expected[i]); + input -= i; + EXPECT_EQ(first_input, input); + input += i; + + EXPECT_EQ(0 < i, first_input < input); + EXPECT_EQ(0 > i, first_input > input); + EXPECT_EQ(0 <= i, first_input <= input); + EXPECT_EQ(0 >= i, first_input >= input); + + EXPECT_EQ(expected_size < i, last_input < input); + EXPECT_EQ(expected_size > i, last_input > input); + EXPECT_EQ(expected_size <= i, last_input <= input); + EXPECT_EQ(expected_size >= i, last_input >= input); + } +} + +} // namespace + +class EncodedViewTest : public testing::Test { + protected: + EncodedViewTest() + : buffer_(20), + image_index_(ConstBufferView(buffer_.data(), buffer_.size())) { + std::iota(buffer_.begin(), buffer_.end(), 0); + TestDisassembler disasm({2, TypeTag(0), PoolTag(0)}, + {{1, 0}, {8, 1}, {10, 2}}, + {4, TypeTag(1), PoolTag(0)}, {{3, 3}}, + {3, TypeTag(2), PoolTag(1)}, {{12, 4}, {17, 5}}); + image_index_.Initialize(&disasm); + } + + void CheckView(std::vector<size_t> expected, + const EncodedView& encoded_view) const { + for (offset_t i = 0; i < encoded_view.size(); ++i) { + EXPECT_EQ(expected[i], encoded_view.Projection(i)) << i; + } + TestRandomAccessIterator(expected.begin(), expected.end(), + encoded_view.begin(), encoded_view.end()); + } + + std::vector<uint8_t> buffer_; + ImageIndex image_index_; +}; + +TEST_F(EncodedViewTest, Unlabeled) { + EncodedView encoded_view(image_index_); + + encoded_view.SetLabels(PoolTag(0), {0, 0, 0, 0}, 1); + encoded_view.SetLabels(PoolTag(1), {0, 0}, 1); + + std::vector<size_t> expected = { + 0, // raw + kBaseReferenceProjection + 0 + 0 * 3, // ref 0 + PADDING, + kBaseReferenceProjection + 1 + 0 * 3, // ref 1 + PADDING, + PADDING, + PADDING, + 7, // raw + kBaseReferenceProjection + 0 + 0 * 3, // ref 0 + PADDING, + kBaseReferenceProjection + 0 + 0 * 3, // ref 0 + PADDING, + kBaseReferenceProjection + 2 + 0 * 3, // ref 2 + PADDING, + PADDING, + 15, // raw + 16, + kBaseReferenceProjection + 2 + 0 * 3, // ref 2 + PADDING, + PADDING, + }; + EXPECT_EQ(kBaseReferenceProjection + 3 * 1, encoded_view.Cardinality()); + CheckView(expected, encoded_view); +} + +TEST_F(EncodedViewTest, Labeled) { + EncodedView encoded_view(image_index_); + + encoded_view.SetLabels(PoolTag(0), {0, 2, 1, 2}, 3); + encoded_view.SetLabels(PoolTag(1), {0, 0}, 1); + + std::vector<size_t> expected = { + 0, // raw + kBaseReferenceProjection + 0 + 0 * 3, // ref 0 + PADDING, + kBaseReferenceProjection + 1 + 2 * 3, // ref 1 + PADDING, + PADDING, + PADDING, + 7, // raw + kBaseReferenceProjection + 0 + 2 * 3, // ref 0 + PADDING, + kBaseReferenceProjection + 0 + 1 * 3, // ref 0 + PADDING, + kBaseReferenceProjection + 2 + 0 * 3, // ref 2 + PADDING, + PADDING, + 15, // raw + 16, + kBaseReferenceProjection + 2 + 0 * 3, // ref 2 + PADDING, + PADDING, + }; + EXPECT_EQ(kBaseReferenceProjection + 3 * 3, encoded_view.Cardinality()); + CheckView(expected, encoded_view); +} + +} // namespace zucchini diff --git a/ensemble_matcher.cc b/ensemble_matcher.cc new file mode 100644 index 0000000..d6e8148 --- /dev/null +++ b/ensemble_matcher.cc @@ -0,0 +1,37 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/ensemble_matcher.h" + +#include <algorithm> +#include <limits> + +#include "base/containers/cxx20_erase.h" +#include "base/logging.h" + +namespace zucchini { + +/******** EnsembleMatcher ********/ + +EnsembleMatcher::EnsembleMatcher() = default; + +EnsembleMatcher::~EnsembleMatcher() = default; + +void EnsembleMatcher::Trim() { + // Trim rule: If > 1 DEX files are found then ignore all DEX. This is done + // because we do not yet support MultiDex, under which contents can move + // across file boundary between "old" and "new" archives. When this occurs, + // forcing matches of DEX files and patching them separately can result in + // larger patches than naive patching. + auto is_match_dex = [](const ElementMatch& match) { + return match.exe_type() == kExeTypeDex; + }; + auto num_dex = std::count_if(matches_.begin(), matches_.end(), is_match_dex); + if (num_dex > 1) { + LOG(WARNING) << "Found " << num_dex << " DEX: Ignoring all."; + base::EraseIf(matches_, is_match_dex); + } +} + +} // namespace zucchini diff --git a/ensemble_matcher.h b/ensemble_matcher.h new file mode 100644 index 0000000..b188657 --- /dev/null +++ b/ensemble_matcher.h @@ -0,0 +1,60 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_ENSEMBLE_MATCHER_H_ +#define COMPONENTS_ZUCCHINI_ENSEMBLE_MATCHER_H_ + +#include <stddef.h> + +#include <vector> + +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/element_detection.h" +#include "components/zucchini/image_utils.h" + +namespace zucchini { + +// A base class for ensemble matching strategies, which identify Elements in a +// "new" and "old" archives, and match each "new" Element to an "old" Element. +// Matched pairs can then be passed to Disassembler for architecture-specific +// patching. Notes: +// - A matched Element pair must have the same ExecutableType. +// - Special case: Exact matches are ignored, since they can be patched directly +// without architecture-specific patching. +// - Multiple "new" Elements may match a common "old" Element. +// - A "new" Element may have no match. This can happen when no viable match +// exists, or when an exact match is skipped. +class EnsembleMatcher { + public: + EnsembleMatcher(); + EnsembleMatcher(const EnsembleMatcher&) = delete; + const EnsembleMatcher& operator=(const EnsembleMatcher&) = delete; + virtual ~EnsembleMatcher(); + + // Interface to main matching feature. Returns whether match was successful. + // This should be called at most once per instace. + virtual bool RunMatch(ConstBufferView old_image, + ConstBufferView new_image) = 0; + + // Accessors to RunMatch() results. + const std::vector<ElementMatch>& matches() const { return matches_; } + + size_t num_identical() const { return num_identical_; } + + protected: + // Post-processes |matches_| to remove potentially unfavorable entries. + void Trim(); + + // Storage of matched elements: A list of matched pairs, where the list of + // "new" elements have increasing offsets and don't overlap. May be empty. + std::vector<ElementMatch> matches_; + + // Number of identical matches found in match candidates. These should be + // excluded from |matches_|. + size_t num_identical_ = 0; +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_ENSEMBLE_MATCHER_H_ diff --git a/equivalence_map.cc b/equivalence_map.cc new file mode 100644 index 0000000..26c0764 --- /dev/null +++ b/equivalence_map.cc @@ -0,0 +1,548 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/equivalence_map.h" + +#include <algorithm> +#include <utility> + +#include "base/containers/cxx20_erase.h" +#include "base/logging.h" +#include "base/numerics/safe_conversions.h" +#include "components/zucchini/encoded_view.h" +#include "components/zucchini/patch_reader.h" +#include "components/zucchini/suffix_array.h" + +namespace zucchini { + +namespace { + +// TODO(haungs): Tune these numbers to improve pathological case results. + +// In pathological cases Zucchini can exhibit O(n^2) behavior if the seed +// selection process runs to completion. To prevent this we impose a quota for +// the total length of equivalences the seed selection process can perform +// trials on. For regular use cases it is unlikely this quota will be exceeded, +// and if it is the effects on patch size are expected to be small. +constexpr uint64_t kSeedSelectionTotalVisitLengthQuota = 1 << 18; // 256 KiB + +// The aforementioned quota alone is insufficient, as exploring backwards will +// still be very successful resulting in O(n) behavior in the case of a limited +// seed selection trials. This results in O(n^2) behavior returning. To mitigate +// this we also impose a cap on the ExtendEquivalenceBackward() exploration. +constexpr offset_t kBackwardsExtendLimit = 1 << 16; // 64 KiB + +} // namespace + +/******** Utility Functions ********/ + +double GetTokenSimilarity( + const ImageIndex& old_image_index, + const ImageIndex& new_image_index, + const std::vector<TargetsAffinity>& targets_affinities, + offset_t src, + offset_t dst) { + DCHECK(old_image_index.IsToken(src)); + DCHECK(new_image_index.IsToken(dst)); + + TypeTag old_type = old_image_index.LookupType(src); + TypeTag new_type = new_image_index.LookupType(dst); + if (old_type != new_type) + return kMismatchFatal; + + // Raw comparison. + if (!old_image_index.IsReference(src) && !new_image_index.IsReference(dst)) { + return old_image_index.GetRawValue(src) == new_image_index.GetRawValue(dst) + ? 1.0 + : -1.5; + } + + const ReferenceSet& old_ref_set = old_image_index.refs(old_type); + const ReferenceSet& new_ref_set = new_image_index.refs(new_type); + Reference old_reference = old_ref_set.at(src); + Reference new_reference = new_ref_set.at(dst); + PoolTag pool_tag = old_ref_set.pool_tag(); + + double affinity = targets_affinities[pool_tag.value()].AffinityBetween( + old_ref_set.target_pool().KeyForOffset(old_reference.target), + new_ref_set.target_pool().KeyForOffset(new_reference.target)); + + // Both targets are not associated, which implies a weak match. + if (affinity == 0.0) + return 0.5 * old_ref_set.width(); + + // At least one target is associated, so values are compared. + return affinity > 0.0 ? old_ref_set.width() : -2.0; +} + +double GetEquivalenceSimilarity( + const ImageIndex& old_image_index, + const ImageIndex& new_image_index, + const std::vector<TargetsAffinity>& targets_affinities, + const Equivalence& equivalence) { + double similarity = 0.0; + for (offset_t k = 0; k < equivalence.length; ++k) { + // Non-tokens are joined with the nearest previous token: skip until we + // cover the unit. + if (!new_image_index.IsToken(equivalence.dst_offset + k)) + continue; + + similarity += GetTokenSimilarity( + old_image_index, new_image_index, targets_affinities, + equivalence.src_offset + k, equivalence.dst_offset + k); + if (similarity == kMismatchFatal) + return kMismatchFatal; + } + return similarity; +} + +EquivalenceCandidate ExtendEquivalenceForward( + const ImageIndex& old_image_index, + const ImageIndex& new_image_index, + const std::vector<TargetsAffinity>& targets_affinities, + const EquivalenceCandidate& candidate, + double min_similarity) { + Equivalence equivalence = candidate.eq; + offset_t best_k = equivalence.length; + double current_similarity = candidate.similarity; + double best_similarity = current_similarity; + double current_penalty = min_similarity; + for (offset_t k = best_k; + equivalence.src_offset + k < old_image_index.size() && + equivalence.dst_offset + k < new_image_index.size(); + ++k) { + // Mismatch in type, |candidate| cannot be extended further. + if (old_image_index.LookupType(equivalence.src_offset + k) != + new_image_index.LookupType(equivalence.dst_offset + k)) { + break; + } + + if (!new_image_index.IsToken(equivalence.dst_offset + k)) { + // Non-tokens are joined with the nearest previous token: skip until we + // cover the unit, and extend |best_k| if applicable. + if (best_k == k) + best_k = k + 1; + continue; + } + + double similarity = GetTokenSimilarity( + old_image_index, new_image_index, targets_affinities, + equivalence.src_offset + k, equivalence.dst_offset + k); + current_similarity += similarity; + current_penalty = std::max(0.0, current_penalty) - similarity; + + if (current_similarity < 0.0 || current_penalty >= min_similarity) + break; + if (current_similarity >= best_similarity) { + best_similarity = current_similarity; + best_k = k + 1; + } + } + equivalence.length = best_k; + return {equivalence, best_similarity}; +} + +EquivalenceCandidate ExtendEquivalenceBackward( + const ImageIndex& old_image_index, + const ImageIndex& new_image_index, + const std::vector<TargetsAffinity>& targets_affinities, + const EquivalenceCandidate& candidate, + double min_similarity) { + Equivalence equivalence = candidate.eq; + offset_t best_k = 0; + double current_similarity = candidate.similarity; + double best_similarity = current_similarity; + double current_penalty = 0.0; + offset_t k_min = std::min( + {equivalence.dst_offset, equivalence.src_offset, kBackwardsExtendLimit}); + for (offset_t k = 1; k <= k_min; ++k) { + // Mismatch in type, |candidate| cannot be extended further. + if (old_image_index.LookupType(equivalence.src_offset - k) != + new_image_index.LookupType(equivalence.dst_offset - k)) { + break; + } + + // Non-tokens are joined with the nearest previous token: skip until we + // reach the next token. + if (!new_image_index.IsToken(equivalence.dst_offset - k)) + continue; + + DCHECK_EQ(old_image_index.LookupType(equivalence.src_offset - k), + new_image_index.LookupType(equivalence.dst_offset - + k)); // Sanity check. + double similarity = GetTokenSimilarity( + old_image_index, new_image_index, targets_affinities, + equivalence.src_offset - k, equivalence.dst_offset - k); + + current_similarity += similarity; + current_penalty = std::max(0.0, current_penalty) - similarity; + + if (current_similarity < 0.0 || current_penalty >= min_similarity) + break; + if (current_similarity >= best_similarity) { + best_similarity = current_similarity; + best_k = k; + } + } + + equivalence.dst_offset -= best_k; + equivalence.src_offset -= best_k; + equivalence.length += best_k; + return {equivalence, best_similarity}; +} + +EquivalenceCandidate VisitEquivalenceSeed( + const ImageIndex& old_image_index, + const ImageIndex& new_image_index, + const std::vector<TargetsAffinity>& targets_affinities, + offset_t src, + offset_t dst, + double min_similarity) { + EquivalenceCandidate candidate{{src, dst, 0}, 0.0}; // Empty. + if (!old_image_index.IsToken(src)) + return candidate; + candidate = + ExtendEquivalenceForward(old_image_index, new_image_index, + targets_affinities, candidate, min_similarity); + if (candidate.similarity < min_similarity) + return candidate; // Not worth exploring any more. + return ExtendEquivalenceBackward(old_image_index, new_image_index, + targets_affinities, candidate, + min_similarity); +} + +/******** OffsetMapper ********/ + +OffsetMapper::OffsetMapper(std::vector<Equivalence>&& equivalences, + offset_t old_image_size, + offset_t new_image_size) + : equivalences_(std::move(equivalences)), + old_image_size_(old_image_size), + new_image_size_(new_image_size) { + DCHECK_GT(new_image_size_, 0U); + DCHECK(std::is_sorted(equivalences_.begin(), equivalences_.end(), + [](const Equivalence& a, const Equivalence& b) { + return a.src_offset < b.src_offset; + })); + // This is for testing. Assume pruned. +} + +OffsetMapper::OffsetMapper(EquivalenceSource&& equivalence_source, + offset_t old_image_size, + offset_t new_image_size) + : old_image_size_(old_image_size), new_image_size_(new_image_size) { + DCHECK_GT(new_image_size_, 0U); + for (auto e = equivalence_source.GetNext(); e.has_value(); + e = equivalence_source.GetNext()) { + equivalences_.push_back(*e); + } + PruneEquivalencesAndSortBySource(&equivalences_); +} + +OffsetMapper::OffsetMapper(const EquivalenceMap& equivalence_map, + offset_t old_image_size, + offset_t new_image_size) + : equivalences_(equivalence_map.size()), + old_image_size_(old_image_size), + new_image_size_(new_image_size) { + DCHECK_GT(new_image_size_, 0U); + std::transform(equivalence_map.begin(), equivalence_map.end(), + equivalences_.begin(), + [](const EquivalenceCandidate& c) { return c.eq; }); + PruneEquivalencesAndSortBySource(&equivalences_); +} + +OffsetMapper::~OffsetMapper() = default; + +// Safely evaluates |offset - unit.src_offset + unit.dst_offset| with signed +// arithmetic, then clips the result to |[0, new_image_size_)|. +offset_t OffsetMapper::NaiveExtendedForwardProject(const Equivalence& unit, + offset_t offset) const { + int64_t old_offset64 = offset; + int64_t src_offset64 = unit.src_offset; + int64_t dst_offset64 = unit.dst_offset; + uint64_t new_offset64 = std::min<uint64_t>( + std::max<int64_t>(0LL, old_offset64 - src_offset64 + dst_offset64), + new_image_size_ - 1); + return base::checked_cast<offset_t>(new_offset64); +} + +offset_t OffsetMapper::ExtendedForwardProject(offset_t offset) const { + DCHECK(!equivalences_.empty()); + if (offset < old_image_size_) { + // Finds the equivalence unit whose "old" block is nearest to |offset|, + // favoring the block with lower offset in case of a tie. + auto pos = std::upper_bound( + equivalences_.begin(), equivalences_.end(), offset, + [](offset_t a, const Equivalence& b) { return a < b.src_offset; }); + // For tiebreaking: |offset - pos[-1].src_end()| is actually 1 less than + // |offset|'s distance to "old" block of |pos[-1]|. Therefore "<" is used. + if (pos != equivalences_.begin() && + (pos == equivalences_.end() || offset < pos[-1].src_end() || + offset - pos[-1].src_end() < pos->src_offset - offset)) { + --pos; + } + return NaiveExtendedForwardProject(*pos, offset); + } + // Fake offsets. + offset_t delta = offset - old_image_size_; + return delta < kOffsetBound - new_image_size_ ? new_image_size_ + delta + : kOffsetBound - 1; +} + +void OffsetMapper::ForwardProjectAll(std::vector<offset_t>* offsets) const { + DCHECK(std::is_sorted(offsets->begin(), offsets->end())); + auto current = equivalences_.begin(); + for (auto& src : *offsets) { + while (current != end() && current->src_end() <= src) { + ++current; + } + + if (current != end() && current->src_offset <= src) { + src = src - current->src_offset + current->dst_offset; + } else { + src = kInvalidOffset; + } + } + base::Erase(*offsets, kInvalidOffset); + offsets->shrink_to_fit(); +} + +void OffsetMapper::PruneEquivalencesAndSortBySource( + std::vector<Equivalence>* equivalences) { + std::sort(equivalences->begin(), equivalences->end(), + [](const Equivalence& a, const Equivalence& b) { + return a.src_offset < b.src_offset; + }); + + for (auto current = equivalences->begin(); current != equivalences->end(); + ++current) { + // A "reaper" is an equivalence after |current| that overlaps with it, but + // is longer, and so truncates |current|. For example: + // ****** <= |current| + // ** + // **** + // **** + // ********** <= |next| as reaper. + // If a reaper is found (as |next|), every equivalence strictly between + // |current| and |next| would be truncated to 0 and discarded. Handling this + // case is important to avoid O(n^2) behavior. + bool next_is_reaper = false; + + // Look ahead to resolve overlaps, until a better candidate is found. + auto next = current + 1; + for (; next != equivalences->end(); ++next) { + DCHECK_GE(next->src_offset, current->src_offset); + if (next->src_offset >= current->src_end()) + break; // No more overlap. + + if (current->length < next->length) { + // |next| is better: So it is a reaper that shrinks |current|. + offset_t delta = current->src_end() - next->src_offset; + current->length -= delta; + next_is_reaper = true; + break; + } + } + + if (next_is_reaper) { + // Discard all equivalences strictly between |cur| and |next|. + for (auto reduced = current + 1; reduced != next; ++reduced) + reduced->length = 0; + current = next - 1; + } else { + // Shrink all equivalences that overlap with |current|. These are all + // worse than |current| since no reaper is found. + for (auto reduced = current + 1; reduced != next; ++reduced) { + offset_t delta = current->src_end() - reduced->src_offset; + reduced->length -= std::min(reduced->length, delta); + reduced->src_offset += delta; + reduced->dst_offset += delta; + DCHECK_EQ(reduced->src_offset, current->src_end()); + } + } + } + + // Discard all equivalences with length == 0. + base::EraseIf(*equivalences, [](const Equivalence& equivalence) { + return equivalence.length == 0; + }); +} + +/******** EquivalenceMap ********/ + +EquivalenceMap::EquivalenceMap() = default; + +EquivalenceMap::EquivalenceMap(std::vector<EquivalenceCandidate>&& equivalences) + : candidates_(std::move(equivalences)) { + SortByDestination(); +} + +EquivalenceMap::EquivalenceMap(EquivalenceMap&&) = default; + +EquivalenceMap::~EquivalenceMap() = default; + +void EquivalenceMap::Build( + const std::vector<offset_t>& old_sa, + const EncodedView& old_view, + const EncodedView& new_view, + const std::vector<TargetsAffinity>& targets_affinities, + double min_similarity) { + DCHECK_EQ(old_sa.size(), old_view.size()); + + CreateCandidates(old_sa, old_view, new_view, targets_affinities, + min_similarity); + SortByDestination(); + Prune(old_view, new_view, targets_affinities, min_similarity); + + offset_t coverage = 0; + offset_t current_offset = 0; + for (auto candidate : candidates_) { + DCHECK_GE(candidate.eq.dst_offset, current_offset); + coverage += candidate.eq.length; + current_offset = candidate.eq.dst_end(); + } + LOG(INFO) << "Equivalence Count: " << size(); + LOG(INFO) << "Coverage / Extra / Total: " << coverage << " / " + << new_view.size() - coverage << " / " << new_view.size(); +} + +void EquivalenceMap::CreateCandidates( + const std::vector<offset_t>& old_sa, + const EncodedView& old_view, + const EncodedView& new_view, + const std::vector<TargetsAffinity>& targets_affinities, + double min_similarity) { + candidates_.clear(); + + // This is an heuristic to find 'good' equivalences on encoded views. + // Equivalences are found in ascending order of |new_image|. + offset_t dst_offset = 0; + + while (dst_offset < new_view.size()) { + if (!new_view.IsToken(dst_offset)) { + ++dst_offset; + continue; + } + auto match = + SuffixLowerBound(old_sa, old_view.begin(), + new_view.begin() + dst_offset, new_view.end()); + + offset_t next_dst_offset = dst_offset + 1; + // TODO(huangs): Clean up. + double best_similarity = min_similarity; + uint64_t total_visit_length = 0; + EquivalenceCandidate best_candidate = {{0, 0, 0}, 0.0}; + for (auto it = match; it != old_sa.end(); ++it) { + EquivalenceCandidate candidate = VisitEquivalenceSeed( + old_view.image_index(), new_view.image_index(), targets_affinities, + static_cast<offset_t>(*it), dst_offset, min_similarity); + if (candidate.similarity > best_similarity) { + best_candidate = candidate; + best_similarity = candidate.similarity; + next_dst_offset = candidate.eq.dst_end(); + total_visit_length += candidate.eq.length; + if (total_visit_length > kSeedSelectionTotalVisitLengthQuota) { + break; + } + } else { + break; + } + } + total_visit_length = 0; + for (auto it = match; it != old_sa.begin(); --it) { + EquivalenceCandidate candidate = VisitEquivalenceSeed( + old_view.image_index(), new_view.image_index(), targets_affinities, + static_cast<offset_t>(it[-1]), dst_offset, min_similarity); + if (candidate.similarity > best_similarity) { + best_candidate = candidate; + best_similarity = candidate.similarity; + next_dst_offset = candidate.eq.dst_end(); + total_visit_length += candidate.eq.length; + if (total_visit_length > kSeedSelectionTotalVisitLengthQuota) { + break; + } + } else { + break; + } + } + if (best_candidate.similarity >= min_similarity) { + candidates_.push_back(best_candidate); + } + + dst_offset = next_dst_offset; + } +} + +void EquivalenceMap::SortByDestination() { + std::sort(candidates_.begin(), candidates_.end(), + [](const EquivalenceCandidate& a, const EquivalenceCandidate& b) { + return a.eq.dst_offset < b.eq.dst_offset; + }); +} + +void EquivalenceMap::Prune( + const EncodedView& old_view, + const EncodedView& new_view, + const std::vector<TargetsAffinity>& target_affinities, + double min_similarity) { + // TODO(etiennep): unify with + // OffsetMapper::PruneEquivalencesAndSortBySource(). + for (auto current = candidates_.begin(); current != candidates_.end(); + ++current) { + if (current->similarity < min_similarity) + continue; // This candidate will be discarded anyways. + + bool next_is_reaper = false; + + // Look ahead to resolve overlaps, until a better candidate is found. + auto next = current + 1; + for (; next != candidates_.end(); ++next) { + DCHECK_GE(next->eq.dst_offset, current->eq.dst_offset); + if (next->eq.dst_offset >= current->eq.dst_offset + current->eq.length) + break; // No more overlap. + + if (current->similarity < next->similarity) { + // |next| is better: So it is a reaper that shrinks |current|. + offset_t delta = current->eq.dst_end() - next->eq.dst_offset; + current->eq.length -= delta; + current->similarity = GetEquivalenceSimilarity( + old_view.image_index(), new_view.image_index(), target_affinities, + current->eq); + + next_is_reaper = true; + break; + } + } + + if (next_is_reaper) { + // Discard all equivalences strictly between |cur| and |next|. + for (auto reduced = current + 1; reduced != next; ++reduced) { + reduced->eq.length = 0; + reduced->similarity = 0; + } + current = next - 1; + } else { + // Shrinks all overlapping candidates following and worse than |current|. + for (auto reduced = current + 1; reduced != next; ++reduced) { + offset_t delta = current->eq.dst_end() - reduced->eq.dst_offset; + reduced->eq.length -= std::min(reduced->eq.length, delta); + reduced->eq.src_offset += delta; + reduced->eq.dst_offset += delta; + reduced->similarity = GetEquivalenceSimilarity( + old_view.image_index(), new_view.image_index(), target_affinities, + reduced->eq); + DCHECK_EQ(reduced->eq.dst_offset, current->eq.dst_end()); + } + } + } + + // Discard all candidates with similarity smaller than |min_similarity|. + base::EraseIf(candidates_, + [min_similarity](const EquivalenceCandidate& candidate) { + return candidate.similarity < min_similarity; + }); +} + +} // namespace zucchini diff --git a/equivalence_map.h b/equivalence_map.h new file mode 100644 index 0000000..8b716a1 --- /dev/null +++ b/equivalence_map.h @@ -0,0 +1,207 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_EQUIVALENCE_MAP_H_ +#define COMPONENTS_ZUCCHINI_EQUIVALENCE_MAP_H_ + +#include <stddef.h> + +#include <limits> +#include <vector> + +#include "components/zucchini/image_index.h" +#include "components/zucchini/image_utils.h" +#include "components/zucchini/targets_affinity.h" + +namespace zucchini { + +constexpr double kMismatchFatal = -std::numeric_limits<double>::infinity(); + +class EncodedView; +class EquivalenceSource; + +// Returns similarity score between a token (raw byte or first byte of a +// reference) in |old_image_index| at |src| and a token in |new_image_index| +// at |dst|. |targets_affinities| describes affinities for each target pool and +// is used to evaluate similarity between references, hence it's size must be +// equal to the number of pools in both |old_image_index| and |new_image_index|. +// Both |src| and |dst| must refer to tokens in |old_image_index| and +// |new_image_index|. +double GetTokenSimilarity( + const ImageIndex& old_image_index, + const ImageIndex& new_image_index, + const std::vector<TargetsAffinity>& targets_affinities, + offset_t src, + offset_t dst); + +// Returns a similarity score between content in |old_image_index| and +// |new_image_index| at regions described by |equivalence|, using +// |targets_affinities| to evaluate similarity between references. +double GetEquivalenceSimilarity( + const ImageIndex& old_image_index, + const ImageIndex& new_image_index, + const std::vector<TargetsAffinity>& targets_affinities, + const Equivalence& equivalence); + +// Extends |equivalence| forward and returns the result. This is related to +// VisitEquivalenceSeed(). +EquivalenceCandidate ExtendEquivalenceForward( + const ImageIndex& old_image_index, + const ImageIndex& new_image_index, + const std::vector<TargetsAffinity>& targets_affinities, + const EquivalenceCandidate& equivalence, + double min_similarity); + +// Extends |equivalence| backward and returns the result. This is related to +// VisitEquivalenceSeed(). +EquivalenceCandidate ExtendEquivalenceBackward( + const ImageIndex& old_image_index, + const ImageIndex& new_image_index, + const std::vector<TargetsAffinity>& targets_affinities, + const EquivalenceCandidate& equivalence, + double min_similarity); + +// Creates an equivalence, starting with |src| and |dst| as offset hint, and +// extends it both forward and backward, trying to maximise similarity between +// |old_image_index| and |new_image_index|, and returns the result. +// |targets_affinities| is used to evaluate similarity between references. +// |min_similarity| describes the minimum acceptable similarity score and is +// used as threshold to discard bad equivalences. +EquivalenceCandidate VisitEquivalenceSeed( + const ImageIndex& old_image_index, + const ImageIndex& new_image_index, + const std::vector<TargetsAffinity>& targets_affinities, + offset_t src, + offset_t dst, + double min_similarity); + +// Container of pruned equivalences used to map offsets from |old_image| to +// offsets in |new_image|. Equivalences are pruned by cropping smaller +// equivalences to avoid overlaps, to make the equivalence map (for covered +// bytes in |old_image| and |new_image|) one-to-one. +class OffsetMapper { + public: + using const_iterator = std::vector<Equivalence>::const_iterator; + + // Constructors for various data sources. "Old" and "new" image sizes are + // needed for bounds checks and to handle dangling targets. + // - From a list of |equivalences|, already sorted (by |src_offset|) and + // pruned, useful for tests. + OffsetMapper(std::vector<Equivalence>&& equivalences, + offset_t old_image_size, + offset_t new_image_size); + // - From a generator, useful for Zucchini-apply. + OffsetMapper(EquivalenceSource&& equivalence_source, + offset_t old_image_size, + offset_t new_image_size); + // - From an EquivalenceMap that needs to be processed, useful for + // Zucchini-gen. + OffsetMapper(const EquivalenceMap& equivalence_map, + offset_t old_image_size, + offset_t new_image_size); + ~OffsetMapper(); + + size_t size() const { return equivalences_.size(); } + const_iterator begin() const { return equivalences_.begin(); } + const_iterator end() const { return equivalences_.end(); } + + // Returns naive extended forward-projection of "old" |offset| that follows + // |eq|'s delta. |eq| needs not cover |offset|. + // - Averts underflow / overflow by clamping to |[0, new_image_size_)|. + // - However, |offset| is *not* restricted to |[0, old_image_size_)|; the + // caller must to make the check (hence "naive"). + offset_t NaiveExtendedForwardProject(const Equivalence& unit, + offset_t offset) const; + + // Returns an offset in |new_image| corresponding to |offset| in |old_image|. + // Assumes |equivalences_| to be non-empty. Cases: + // - If |offset| is covered (i.e., in an "old" block), then use the delta of + // the (unique) equivalence unit that covers |offset|. + // - If |offset| is non-covered, but in |[0, old_image_size_)|, then find the + // nearest "old" block, use its delta, and avert underflow / overflow by + // clamping the result to |[0, new_image_size_)|. + // - If |offset| is >= |new_image_size_| (a "fake offset"), then use + // |new_image_size_ - old_image_size_| as the delta. + offset_t ExtendedForwardProject(offset_t offset) const; + + // Given sorted |offsets|, applies a projection in-place of all offsets that + // are part of a pruned equivalence from |old_image| to |new_image|. Other + // offsets are removed from |offsets|. + void ForwardProjectAll(std::vector<offset_t>* offsets) const; + + // Accessor for testing. + const std::vector<Equivalence> equivalences() const { return equivalences_; } + + // Sorts |equivalences| by |src_offset| and removes all source overlaps; so a + // source location that was covered by some Equivalence would become covered + // by exactly one Equivalence. Moreover, for the offset, the equivalence + // corresponds to the largest (pre-pruning) covering Equivalence, and in case + // of a tie, the Equivalence with minimal |src_offset|. |equivalences| may + // change in size since empty Equivalences are removed. + static void PruneEquivalencesAndSortBySource( + std::vector<Equivalence>* equivalences); + + private: + // |equivalences_| is pruned, i.e., no "old" blocks overlap (and no "new" + // block overlaps). Also, it is sorted by "old" offsets. + std::vector<Equivalence> equivalences_; + const offset_t old_image_size_; + const offset_t new_image_size_; +}; + +// Container of equivalences between |old_image_index| and |new_image_index|, +// sorted by |Equivalence::dst_offset|, only used during patch generation. +class EquivalenceMap { + public: + using const_iterator = std::vector<EquivalenceCandidate>::const_iterator; + + EquivalenceMap(); + // Initializes the object with |equivalences|. + explicit EquivalenceMap(std::vector<EquivalenceCandidate>&& candidates); + EquivalenceMap(EquivalenceMap&&); + EquivalenceMap(const EquivalenceMap&) = delete; + ~EquivalenceMap(); + + // Finds relevant equivalences between |old_view| and |new_view|, using + // suffix array |old_sa| computed from |old_view| and using + // |targets_affinities| to evaluate similarity between references. This + // function is not symmetric. Equivalences might overlap in |old_view|, but + // not in |new_view|. It tries to maximize accumulated similarity within each + // equivalence, while maximizing |new_view| coverage. The minimum similarity + // of an equivalence is given by |min_similarity|. + void Build(const std::vector<offset_t>& old_sa, + const EncodedView& old_view, + const EncodedView& new_view, + const std::vector<TargetsAffinity>& targets_affinities, + double min_similarity); + + size_t size() const { return candidates_.size(); } + const_iterator begin() const { return candidates_.begin(); } + const_iterator end() const { return candidates_.end(); } + + private: + // Discovers equivalence candidates between |old_view| and |new_view| and + // stores them in the object. Note that resulting candidates are not sorted + // and might be overlapping in new image. + void CreateCandidates(const std::vector<offset_t>& old_sa, + const EncodedView& old_view, + const EncodedView& new_view, + const std::vector<TargetsAffinity>& targets_affinities, + double min_similarity); + // Sorts candidates by their offset in new image. + void SortByDestination(); + // Visits |candidates_| (sorted by |dst_offset|) and remove all destination + // overlaps. Candidates with low similarity scores are more likely to be + // shrunken. Unfit candidates may be removed. + void Prune(const EncodedView& old_view, + const EncodedView& new_view, + const std::vector<TargetsAffinity>& targets_affinities, + double min_similarity); + + std::vector<EquivalenceCandidate> candidates_; +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_EQUIVALENCE_MAP_H_ diff --git a/equivalence_map_unittest.cc b/equivalence_map_unittest.cc new file mode 100644 index 0000000..b3a4ea4 --- /dev/null +++ b/equivalence_map_unittest.cc @@ -0,0 +1,635 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/equivalence_map.h" + +#include <cstring> +#include <deque> +#include <map> +#include <string> +#include <utility> +#include <vector> + +#include "components/zucchini/encoded_view.h" +#include "components/zucchini/image_index.h" +#include "components/zucchini/suffix_array.h" +#include "components/zucchini/targets_affinity.h" +#include "components/zucchini/test_disassembler.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +namespace { + +using OffsetVector = std::vector<offset_t>; + +// Make all references 2 bytes long. +constexpr offset_t kReferenceSize = 2; + +// Creates and initialize an ImageIndex from |a| and with 2 types of references. +// The result is populated with |refs0| and |refs1|. |a| is expected to be a +// string literal valid for the lifetime of the object. +ImageIndex MakeImageIndexForTesting(const char* a, + std::vector<Reference>&& refs0, + std::vector<Reference>&& refs1) { + TestDisassembler disasm( + {kReferenceSize, TypeTag(0), PoolTag(0)}, std::move(refs0), + {kReferenceSize, TypeTag(1), PoolTag(0)}, std::move(refs1), + {kReferenceSize, TypeTag(2), PoolTag(1)}, {}); + + ImageIndex image_index( + ConstBufferView(reinterpret_cast<const uint8_t*>(a), std::strlen(a))); + + EXPECT_TRUE(image_index.Initialize(&disasm)); + return image_index; +} + +std::vector<TargetsAffinity> MakeTargetsAffinitiesForTesting( + const ImageIndex& old_image_index, + const ImageIndex& new_image_index, + const EquivalenceMap& equivalence_map) { + std::vector<TargetsAffinity> target_affinities(old_image_index.PoolCount()); + for (const auto& old_pool_tag_and_targets : old_image_index.target_pools()) { + PoolTag pool_tag = old_pool_tag_and_targets.first; + target_affinities[pool_tag.value()].InferFromSimilarities( + equivalence_map, old_pool_tag_and_targets.second.targets(), + new_image_index.pool(pool_tag).targets()); + } + return target_affinities; +} + +} // namespace + +TEST(EquivalenceMapTest, GetTokenSimilarity) { + ImageIndex old_index = MakeImageIndexForTesting( + "ab1122334455", {{2, 0}, {4, 1}, {6, 2}, {8, 2}}, {{10, 3}}); + // Note: {4, 1} -> {6, 3} and {6, 2} -> {4, 1}, then result is sorted. + ImageIndex new_index = MakeImageIndexForTesting( + "a11b33224455", {{1, 0}, {4, 1}, {6, 3}, {8, 1}}, {{10, 2}}); + std::vector<TargetsAffinity> affinities = MakeTargetsAffinitiesForTesting( + old_index, new_index, + EquivalenceMap({{{0, 0, 1}, 1.0}, {{1, 3, 1}, 1.0}})); + + // Raw match. + EXPECT_LT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 0, 0)); + // Raw mismatch. + EXPECT_GT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 0, 1)); + EXPECT_GT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 1, 0)); + + // Type mismatch. + EXPECT_EQ(kMismatchFatal, + GetTokenSimilarity(old_index, new_index, affinities, 0, 1)); + EXPECT_EQ(kMismatchFatal, + GetTokenSimilarity(old_index, new_index, affinities, 2, 0)); + EXPECT_EQ(kMismatchFatal, + GetTokenSimilarity(old_index, new_index, affinities, 2, 10)); + EXPECT_EQ(kMismatchFatal, + GetTokenSimilarity(old_index, new_index, affinities, 10, 1)); + + // Reference strong match. + EXPECT_LT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 2, 1)); + EXPECT_LT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 4, 6)); + + // Reference weak match. + EXPECT_LT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 6, 4)); + EXPECT_LT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 6, 8)); + EXPECT_LT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 8, 4)); + + // Weak match is not greater than strong match. + EXPECT_LE(GetTokenSimilarity(old_index, new_index, affinities, 6, 4), + GetTokenSimilarity(old_index, new_index, affinities, 2, 1)); + + // Reference mismatch. + EXPECT_GT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 2, 4)); + EXPECT_GT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 2, 6)); +} + +TEST(EquivalenceMapTest, GetEquivalenceSimilarity) { + ImageIndex image_index = + MakeImageIndexForTesting("abcdef1122", {{6, 0}}, {{8, 1}}); + std::vector<TargetsAffinity> affinities = + MakeTargetsAffinitiesForTesting(image_index, image_index, {}); + + // Sanity check. These are no-op with length-0 equivalences. + EXPECT_EQ(0.0, GetEquivalenceSimilarity(image_index, image_index, affinities, + {0, 0, 0})); + EXPECT_EQ(0.0, GetEquivalenceSimilarity(image_index, image_index, affinities, + {0, 3, 0})); + EXPECT_EQ(0.0, GetEquivalenceSimilarity(image_index, image_index, affinities, + {3, 0, 0})); + + // Now examine larger equivalences. + EXPECT_LT(0.0, GetEquivalenceSimilarity(image_index, image_index, affinities, + {0, 0, 3})); + EXPECT_GE(0.0, GetEquivalenceSimilarity(image_index, image_index, affinities, + {0, 3, 3})); + EXPECT_GE(0.0, GetEquivalenceSimilarity(image_index, image_index, affinities, + {3, 0, 3})); + + EXPECT_LT(0.0, GetEquivalenceSimilarity(image_index, image_index, affinities, + {6, 6, 4})); +} + +TEST(EquivalenceMapTest, ExtendEquivalenceForward) { + auto test_extend_forward = + [](const ImageIndex old_index, const ImageIndex new_index, + const EquivalenceCandidate& equivalence, double base_similarity) { + return ExtendEquivalenceForward( + old_index, new_index, + MakeTargetsAffinitiesForTesting(old_index, new_index, {}), + equivalence, base_similarity) + .eq; + }; + + EXPECT_EQ(Equivalence({0, 0, 0}), + test_extend_forward(MakeImageIndexForTesting("", {}, {}), + MakeImageIndexForTesting("", {}, {}), + {{0, 0, 0}, 0.0}, 8.0)); + + EXPECT_EQ(Equivalence({0, 0, 0}), + test_extend_forward(MakeImageIndexForTesting("banana", {}, {}), + MakeImageIndexForTesting("zzzz", {}, {}), + {{0, 0, 0}, 0.0}, 8.0)); + + EXPECT_EQ(Equivalence({0, 0, 6}), + test_extend_forward(MakeImageIndexForTesting("banana", {}, {}), + MakeImageIndexForTesting("banana", {}, {}), + {{0, 0, 0}, 0.0}, 8.0)); + + EXPECT_EQ(Equivalence({2, 2, 4}), + test_extend_forward(MakeImageIndexForTesting("banana", {}, {}), + MakeImageIndexForTesting("banana", {}, {}), + {{2, 2, 0}, 0.0}, 8.0)); + + EXPECT_EQ(Equivalence({0, 0, 6}), + test_extend_forward(MakeImageIndexForTesting("bananaxx", {}, {}), + MakeImageIndexForTesting("bananayy", {}, {}), + {{0, 0, 0}, 0.0}, 8.0)); + + EXPECT_EQ( + Equivalence({0, 0, 8}), + test_extend_forward(MakeImageIndexForTesting("banana11", {{6, 0}}, {}), + MakeImageIndexForTesting("banana11", {{6, 0}}, {}), + {{0, 0, 0}, 0.0}, 8.0)); + + EXPECT_EQ( + Equivalence({0, 0, 6}), + test_extend_forward(MakeImageIndexForTesting("banana11", {{6, 0}}, {}), + MakeImageIndexForTesting("banana22", {}, {{6, 0}}), + {{0, 0, 0}, 0.0}, 8.0)); + + EXPECT_EQ( + Equivalence({0, 0, 17}), + test_extend_forward(MakeImageIndexForTesting("bananaxxpineapple", {}, {}), + MakeImageIndexForTesting("bananayypineapple", {}, {}), + {{0, 0, 0}, 0.0}, 8.0)); + + EXPECT_EQ( + Equivalence({3, 0, 19}), + test_extend_forward( + MakeImageIndexForTesting("foobanana11xxpineapplexx", {{9, 0}}, {}), + MakeImageIndexForTesting("banana11yypineappleyy", {{6, 0}}, {}), + {{3, 0, 0}, 0.0}, 8.0)); +} + +TEST(EquivalenceMapTest, ExtendEquivalenceBackward) { + auto test_extend_backward = + [](const ImageIndex old_index, const ImageIndex new_index, + const EquivalenceCandidate& equivalence, double base_similarity) { + return ExtendEquivalenceBackward( + old_index, new_index, + MakeTargetsAffinitiesForTesting(old_index, new_index, {}), + equivalence, base_similarity) + .eq; + }; + + EXPECT_EQ(Equivalence({0, 0, 0}), + test_extend_backward(MakeImageIndexForTesting("", {}, {}), + MakeImageIndexForTesting("", {}, {}), + {{0, 0, 0}, 0.0}, 8.0)); + + EXPECT_EQ(Equivalence({6, 4, 0}), + test_extend_backward(MakeImageIndexForTesting("banana", {}, {}), + MakeImageIndexForTesting("zzzz", {}, {}), + {{6, 4, 0}, 0.0}, 8.0)); + + EXPECT_EQ(Equivalence({0, 0, 6}), + test_extend_backward(MakeImageIndexForTesting("banana", {}, {}), + MakeImageIndexForTesting("banana", {}, {}), + {{6, 6, 0}, 0.0}, 8.0)); + + EXPECT_EQ(Equivalence({2, 2, 6}), + test_extend_backward(MakeImageIndexForTesting("xxbanana", {}, {}), + MakeImageIndexForTesting("yybanana", {}, {}), + {{8, 8, 0}, 0.0}, 8.0)); + + EXPECT_EQ( + Equivalence({0, 0, 8}), + test_extend_backward(MakeImageIndexForTesting("11banana", {{0, 0}}, {}), + MakeImageIndexForTesting("11banana", {{0, 0}}, {}), + {{8, 8, 0}, 0.0}, 8.0)); + + EXPECT_EQ( + Equivalence({2, 2, 6}), + test_extend_backward(MakeImageIndexForTesting("11banana", {{0, 0}}, {}), + MakeImageIndexForTesting("22banana", {}, {{0, 0}}), + {{8, 8, 0}, 0.0}, 8.0)); + + EXPECT_EQ(Equivalence({0, 0, 17}), + test_extend_backward( + MakeImageIndexForTesting("bananaxxpineapple", {}, {}), + MakeImageIndexForTesting("bananayypineapple", {}, {}), + {{8, 8, 9}, 9.0}, 8.0)); + + EXPECT_EQ( + Equivalence({3, 0, 19}), + test_extend_backward( + MakeImageIndexForTesting("foobanana11xxpineapplexx", {{9, 0}}, {}), + MakeImageIndexForTesting("banana11yypineappleyy", {{6, 0}}, {}), + {{22, 19, 0}, 0.0}, 8.0)); +} + +TEST(EquivalenceMapTest, PruneEquivalencesAndSortBySource) { + auto PruneEquivalencesAndSortBySourceTest = + [](std::vector<Equivalence>&& equivalences) { + OffsetMapper::PruneEquivalencesAndSortBySource(&equivalences); + return std::move(equivalences); + }; + + EXPECT_EQ(std::vector<Equivalence>(), + PruneEquivalencesAndSortBySourceTest({})); + EXPECT_EQ(std::vector<Equivalence>({{0, 10, 1}}), + PruneEquivalencesAndSortBySourceTest({{0, 10, 1}})); + EXPECT_EQ(std::vector<Equivalence>(), + PruneEquivalencesAndSortBySourceTest({{0, 10, 0}})); + EXPECT_EQ(std::vector<Equivalence>({{0, 10, 1}, {1, 11, 1}}), + PruneEquivalencesAndSortBySourceTest({{0, 10, 1}, {1, 11, 1}})); + EXPECT_EQ(std::vector<Equivalence>({{0, 10, 2}, {2, 13, 1}}), + PruneEquivalencesAndSortBySourceTest({{0, 10, 2}, {1, 12, 2}})); + EXPECT_EQ(std::vector<Equivalence>({{0, 10, 2}}), + PruneEquivalencesAndSortBySourceTest({{0, 10, 2}, {1, 12, 1}})); + EXPECT_EQ(std::vector<Equivalence>({{0, 10, 2}, {2, 14, 1}}), + PruneEquivalencesAndSortBySourceTest({{0, 10, 2}, {1, 13, 2}})); + EXPECT_EQ(std::vector<Equivalence>({{0, 10, 1}, {1, 12, 3}}), + PruneEquivalencesAndSortBySourceTest({{0, 10, 2}, {1, 12, 3}})); + EXPECT_EQ(std::vector<Equivalence>({{0, 10, 3}, {3, 16, 2}}), + PruneEquivalencesAndSortBySourceTest( + {{0, 10, 3}, {1, 13, 3}, {3, 16, 2}})); // Pruning is greedy + + // Consider following pattern that may cause O(n^2) behavior if not handled + // properly. + // *************** + // ********** + // ******** + // ****** + // **** + // ** + // *************** + // This test case makes sure the function does not stall on a large instance + // of this pattern. + EXPECT_EQ(std::vector<Equivalence>({{0, 10, +300000}, {300000, 30, +300000}}), + PruneEquivalencesAndSortBySourceTest([] { + std::vector<Equivalence> equivalenses; + equivalenses.push_back({0, 10, +300000}); + for (offset_t i = 0; i < 100000; ++i) + equivalenses.push_back({200000 + i, 20, +200000 - 2 * i}); + equivalenses.push_back({300000, 30, +300000}); + return equivalenses; + }())); +} + +TEST(EquivalenceMapTest, NaiveExtendedForwardProject) { + constexpr size_t kOldImageSize = 1000U; + constexpr size_t kNewImageSize = 1000U; + OffsetMapper offset_mapper(std::vector<Equivalence>(), kOldImageSize, + kNewImageSize); + + // Convenience function to declutter. + auto project = [&offset_mapper](const Equivalence& eq, offset_t offset) { + return offset_mapper.NaiveExtendedForwardProject(eq, offset); + }; + + // Equivalence with delta = 0. + Equivalence eq_stay = {10, 10, +5}; // [10,15) -> [10,15). + for (offset_t offset = 0U; offset < 1000U; ++offset) { + EXPECT_EQ(offset, project(eq_stay, offset)); + } + // Saturate since result would overflow "new". + EXPECT_EQ(999U, project(eq_stay, 1000U)); + EXPECT_EQ(999U, project(eq_stay, 2000U)); + EXPECT_EQ(999U, project(eq_stay, kOffsetBound - 1)); + + // Equivalence with delta = -10. + Equivalence eq_dec = {20, 10, +12}; // [20,32) --> [10,22). + // Offsets in "old" block. + EXPECT_EQ(10U, project(eq_dec, 20U)); + EXPECT_EQ(11U, project(eq_dec, 21U)); + EXPECT_EQ(21U, project(eq_dec, 31U)); + // Offsets before "old" block, no underflow + EXPECT_EQ(9U, project(eq_dec, 19U)); + EXPECT_EQ(1U, project(eq_dec, 11U)); + EXPECT_EQ(0U, project(eq_dec, 10U)); + // Offsets before "old" block, underflow (possible since delta < 0). + EXPECT_EQ(0U, project(eq_dec, 9U)); + EXPECT_EQ(0U, project(eq_dec, 5U)); + EXPECT_EQ(0U, project(eq_dec, 0U)); + // Offsets after "old" block, no overflow. + EXPECT_EQ(20U, project(eq_dec, 30U)); + EXPECT_EQ(64U, project(eq_dec, 74U)); + EXPECT_EQ(90U, project(eq_dec, 100U)); + EXPECT_EQ(490U, project(eq_dec, 500U)); + EXPECT_EQ(999U, project(eq_dec, 1009U)); + // Offsets after "old" block, overflow. + EXPECT_EQ(999U, project(eq_dec, 1010U)); + EXPECT_EQ(999U, project(eq_dec, 2000U)); + EXPECT_EQ(999U, project(eq_dec, kOffsetBound - 1)); + + // Equivalence with delta = +10. + Equivalence eq_inc = {7, 17, +80}; // [7,87) --> [17,97). + // Offsets in "old" block. + EXPECT_EQ(17U, project(eq_inc, 7U)); + EXPECT_EQ(60U, project(eq_inc, 50U)); + EXPECT_EQ(96U, project(eq_inc, 86U)); + // Offsets before "old" block, underflow impossible since delta >= 0. + EXPECT_EQ(16U, project(eq_inc, 6U)); + EXPECT_EQ(10U, project(eq_inc, 0U)); + // Offsets after "old" block, no overflow. + EXPECT_EQ(97U, project(eq_inc, 87U)); + EXPECT_EQ(510U, project(eq_inc, 500U)); + EXPECT_EQ(999U, project(eq_inc, 989U)); + // Offsets after "old" block, overflow. + EXPECT_EQ(999U, project(eq_inc, 990U)); + EXPECT_EQ(999U, project(eq_inc, 2000U)); + EXPECT_EQ(999U, project(eq_inc, kOffsetBound - 1)); +} + +TEST(EquivalenceMapTest, ExtendedForwardProject) { + // EquivalenceMaps provided must be sorted by "old" offset, and pruned. + // [0,2) --> [10,12), [2,3) --> [13,14), [4,6) --> [16,18). + OffsetMapper offset_mapper1({{0, 10, +2}, {2, 13, +1}, {4, 16, +2}}, 20U, + 25U); + EXPECT_EQ(10U, offset_mapper1.ExtendedForwardProject(0U)); + EXPECT_EQ(11U, offset_mapper1.ExtendedForwardProject(1U)); + EXPECT_EQ(13U, offset_mapper1.ExtendedForwardProject(2U)); + EXPECT_EQ(14U, offset_mapper1.ExtendedForwardProject(3U)); // Previous equiv. + EXPECT_EQ(16U, offset_mapper1.ExtendedForwardProject(4U)); + EXPECT_EQ(17U, offset_mapper1.ExtendedForwardProject(5U)); + EXPECT_EQ(18U, offset_mapper1.ExtendedForwardProject(6U)); // Previous equiv. + // Fake offsets. + EXPECT_EQ(25U, offset_mapper1.ExtendedForwardProject(20U)); + EXPECT_EQ(26U, offset_mapper1.ExtendedForwardProject(21U)); + EXPECT_EQ(1005U, offset_mapper1.ExtendedForwardProject(1000U)); + EXPECT_EQ(kOffsetBound - 1, + offset_mapper1.ExtendedForwardProject(kOffsetBound - 1)); + + // [0,2) --> [10,12), [13,14) --> [2,3), [16,18) --> [4,6). + OffsetMapper offset_mapper2({{0, 10, +2}, {13, 2, +1}, {16, 4, +2}}, 25U, + 20U); + EXPECT_EQ(10U, offset_mapper2.ExtendedForwardProject(0U)); + EXPECT_EQ(11U, offset_mapper2.ExtendedForwardProject(1U)); + EXPECT_EQ(2U, offset_mapper2.ExtendedForwardProject(13U)); + EXPECT_EQ(3U, offset_mapper2.ExtendedForwardProject(14U)); // Previous equiv. + EXPECT_EQ(4U, offset_mapper2.ExtendedForwardProject(16U)); + EXPECT_EQ(5U, offset_mapper2.ExtendedForwardProject(17U)); + EXPECT_EQ(6U, offset_mapper2.ExtendedForwardProject(18U)); // Previous equiv. + // Fake offsets. + EXPECT_EQ(20U, offset_mapper2.ExtendedForwardProject(25U)); + EXPECT_EQ(21U, offset_mapper2.ExtendedForwardProject(26U)); + EXPECT_EQ(995U, offset_mapper2.ExtendedForwardProject(1000U)); + EXPECT_EQ(kOffsetBound - 1 - 5, + offset_mapper2.ExtendedForwardProject(kOffsetBound - 1)); +} + +TEST(EquivalenceMapTest, ExtendedForwardProjectEncoding) { + // Tests OffsetMapper::ExtendedForwardProject(), which maps every "old" offset + // to a "new" offset, with possible overlap (even though blocks don't + // overlap). Not testing real offsets only (no fake offsets). + // |old_spec| is a string like "<<aaAAaabbBBbcCCc>>": + // - Upper case letters are covered "old" offsets. + // - Lower case letters are non-covered offsets that are properly mapped using + // nearest "old" block. + // - '<' denotes underflow (clamped to 0). + // - '>' denotes overflow (clampled to "new" size - 1). + // |new_spec| is a string like "aaAA(ab)(ab)BBb..cCCc": + // - Upper and lower case letters are mapped "new" targets, occurring in the + // order that they appear in |old_spec|. + // - '.' are "new" offsets that appear as output. + // - '(' and ')' surround a single "new" location that are repeated as output. + int case_no = 0; + auto run_test = [&case_no](std::vector<Equivalence>&& equivalences, + const std::string& old_spec, + const std::string& new_spec) { + const size_t old_size = old_spec.length(); + // Build expected "new" offsets, queue up for each letter. + std::map<char, std::deque<offset_t>> expected; + offset_t cur_new_offset = 0; + char state = ')'; // ')' = increase offset, '(' = stay. + for (char ch : new_spec) { + if (ch == '(' || ch == ')') + state = ch; + else + expected[ch].push_back(cur_new_offset); + cur_new_offset += (state == ')') ? 1 : 0; + } + const size_t new_size = cur_new_offset; + // Forward-project for each "old" index, pull from queue from matching + // letter, and compare. + OffsetMapper offset_mapper(std::move(equivalences), old_size, new_size); + for (offset_t old_offset = 0; old_offset < old_size; ++old_offset) { + offset_t new_offset = offset_mapper.ExtendedForwardProject(old_offset); + char ch = old_spec[old_offset]; + if (ch == '<') { // Special case: Underflow. + EXPECT_EQ(0U, new_offset) << "in case " << case_no; + } else if (ch == '>') { // Special case: Overflow. + EXPECT_EQ(static_cast<offset_t>(new_size - 1), new_offset) + << "in case " << case_no; + } else { + std::deque<offset_t>& q = expected[ch]; + ASSERT_FALSE(q.empty()); + EXPECT_EQ(q.front(), new_offset) << "in case " << case_no; + q.pop_front(); + if (q.empty()) + expected.erase(ch); + } + } + // Clear useless '.', and ensure everything is consumed. + expected.erase('.'); + EXPECT_TRUE(expected.empty()) << "in case " << case_no; + ++case_no; + }; + + // Trivial: [5,9) --> [5,9). + run_test({{5, 5, +4}}, "aaaaaAAAAaaaaa", "aaaaaAAAAaaaaa"); + // Swap: [0,4) --> [6,10), [4,10) --> [0,6). + run_test({{0, 6, +4}, {4, 0, +6}}, "AAAABBBBBB", "BBBBBBAAAA"); + // Overlap: [0,4) --> [2,6), [4,10) --> [3,9). + run_test({{0, 2, +4}, {4, 3, +6}}, "AAAABBBBBB", "..A(AB)(AB)(AB)BBB."); + // Converge: [1,3) --> [2,4), [7,8) --> [6,7). + run_test({{1, 2, +2}, {7, 6, +1}}, "aAAaabbBbb", ".aAA(ab)(ab)Bbb."); + // Converge with tie-breaker: [1,3) --> [2,4), [8,9) --> [7,8). + run_test({{1, 2, +2}, {8, 7, +1}}, "aAAaaabbBb", ".aAAa(ab)(ab)Bb."); + // Shift left: [6,8) --> [2,4): Underflow occurs. + run_test({{6, 2, +2}}, "<<<<aaAAaa", "aaAAaa...."); + // Shift right: [2,5) --> [6,9): Overflow occurs. + run_test({{2, 6, +3}}, "aaAAAa>>>>", "....aaAAAa"); + // Diverge: [3,5) --> [1,3], [7,9) --> [9,11). + run_test({{3, 1, +2}, {7, 9, +2}}, "<<aAAabBBb>>", "aAAa....bBBb"); + // Pile-up: [0,2) --> [7,9), [9,11) --> [9,11), [18,20) --> [11,13). + run_test({{0, 7, +2}, {9, 9, +2}, {18, 11, +2}}, "AAaaaabbbBBbbbbcccCC", + "......b(Ab)(Abc)(Bac)(Bac)(Cab)(Cab)bb....."); + // Inverse pile-up: [7,9) --> [0,2), [9,11) --> [9,11), [13,15) --> [18,20). + run_test({{7, 0, +2}, {9, 9, +2}, {11, 18, +2}}, "<<<<<<<AABBCC>>>>>>>", + "AA.......BB.......CC"); + // Sparse rotate: [3,4) -> [10,11), [10,11) --> [17,18), [17,18) --> [3,4). + run_test({{3, 10, +1}, {10, 17, +1}, {17, 3, +1}}, "aaaAaaabbbBbbbcccCccc", + "cccCcccaaaAaaabbbBbbb"); + // Messy swap: [2,4) --> [10,12), [12,16) --> [3,7). + run_test({{2, 10, +2}, {12, 3, +4}}, "aaAAaa>><bbbBBBBbb", + "bbbBBBBb(ab)aAAaa"); + // Messy expand: [6,8) --> [3,5), [10,11) -> [11,12), [14,17) --> [16,19). + run_test({{6, 3, +2}, {10, 11, +1}, {14, 16, +3}}, "<<<aaaAAabBbbcCCCc>>>>>", + "aaaAAa....bBbb.cCCCc"); + // Interleave: [1,2) --> [0,1), [5,6) --> [10,11), [6,8) --> [3,5), + // [11,13) --> [12,14), [14,16) --> [6,8), [17,18) --> [17,18). + run_test({{1, 0, +1}, + {5, 10, +1}, + {6, 3, +2}, + {11, 12, +2}, + {14, 6, +2}, + {17, 17, +1}}, + "<AaabBCCccdDDdEEeFf>", "AaaCCc(Ec)EebBdDDd..Ff"); +} + +TEST(EquivalenceMapTest, ForwardProjectAll) { + auto ForwardProjectAllTest = [](const OffsetMapper& offset_mapper, + std::initializer_list<offset_t> offsets) { + OffsetVector offsets_vec(offsets); + offset_mapper.ForwardProjectAll(&offsets_vec); + return offsets_vec; + }; + + // [0,2) --> [10,12), [2,3) --> [13,14), [4,6) --> [16,18). + OffsetMapper offset_mapper1({{0, 10, +2}, {2, 13, +1}, {4, 16, +2}}, 100U, + 100U); + EXPECT_EQ(OffsetVector({10}), ForwardProjectAllTest(offset_mapper1, {0})); + EXPECT_EQ(OffsetVector({13}), ForwardProjectAllTest(offset_mapper1, {2})); + EXPECT_EQ(OffsetVector({}), ForwardProjectAllTest(offset_mapper1, {3})); + EXPECT_EQ(OffsetVector({10, 13}), + ForwardProjectAllTest(offset_mapper1, {0, 2})); + EXPECT_EQ(OffsetVector({11, 13, 17}), + ForwardProjectAllTest(offset_mapper1, {1, 2, 5})); + EXPECT_EQ(OffsetVector({11, 17}), + ForwardProjectAllTest(offset_mapper1, {1, 3, 5})); + EXPECT_EQ(OffsetVector({10, 11, 13, 16, 17}), + ForwardProjectAllTest(offset_mapper1, {0, 1, 2, 3, 4, 5, 6})); + + // [0,2) --> [10,12), [13,14) --> [2,3), [16,18) --> [4,6). + OffsetMapper offset_mapper2({{0, 10, +2}, {13, 2, +1}, {16, 4, +2}}, 100U, + 100U); + EXPECT_EQ(OffsetVector({2}), ForwardProjectAllTest(offset_mapper2, {13})); + EXPECT_EQ(OffsetVector({10, 2}), + ForwardProjectAllTest(offset_mapper2, {0, 13})); + EXPECT_EQ(OffsetVector({11, 2, 5}), + ForwardProjectAllTest(offset_mapper2, {1, 13, 17})); + EXPECT_EQ(OffsetVector({11, 5}), + ForwardProjectAllTest(offset_mapper2, {1, 14, 17})); + EXPECT_EQ(OffsetVector({10, 11, 2, 4, 5}), + ForwardProjectAllTest(offset_mapper2, {0, 1, 13, 14, 16, 17, 18})); +} + +TEST(EquivalenceMapTest, Build) { + auto test_build_equivalence = [](const ImageIndex old_index, + const ImageIndex new_index, + double minimum_similarity) { + auto affinities = MakeTargetsAffinitiesForTesting(old_index, new_index, {}); + + EncodedView old_view(old_index); + EncodedView new_view(new_index); + + for (const auto& old_pool_tag_and_targets : old_index.target_pools()) { + PoolTag pool_tag = old_pool_tag_and_targets.first; + std::vector<uint32_t> old_labels; + std::vector<uint32_t> new_labels; + size_t label_bound = affinities[pool_tag.value()].AssignLabels( + 1.0, &old_labels, &new_labels); + old_view.SetLabels(pool_tag, std::move(old_labels), label_bound); + new_view.SetLabels(pool_tag, std::move(new_labels), label_bound); + } + + std::vector<offset_t> old_sa = + MakeSuffixArray<InducedSuffixSort>(old_view, old_view.Cardinality()); + + EquivalenceMap equivalence_map; + equivalence_map.Build(old_sa, old_view, new_view, affinities, + minimum_similarity); + + offset_t current_dst_offset = 0; + offset_t coverage = 0; + for (const auto& candidate : equivalence_map) { + EXPECT_GE(candidate.eq.dst_offset, current_dst_offset); + EXPECT_GT(candidate.eq.length, offset_t(0)); + EXPECT_LE(candidate.eq.src_offset + candidate.eq.length, + old_index.size()); + EXPECT_LE(candidate.eq.dst_offset + candidate.eq.length, + new_index.size()); + EXPECT_GE(candidate.similarity, minimum_similarity); + current_dst_offset = candidate.eq.dst_offset; + coverage += candidate.eq.length; + } + return coverage; + }; + + EXPECT_EQ(0U, + test_build_equivalence(MakeImageIndexForTesting("", {}, {}), + MakeImageIndexForTesting("", {}, {}), 4.0)); + + EXPECT_EQ(0U, test_build_equivalence( + MakeImageIndexForTesting("", {}, {}), + MakeImageIndexForTesting("banana", {}, {}), 4.0)); + + EXPECT_EQ(0U, + test_build_equivalence(MakeImageIndexForTesting("banana", {}, {}), + MakeImageIndexForTesting("", {}, {}), 4.0)); + + EXPECT_EQ(0U, test_build_equivalence( + MakeImageIndexForTesting("banana", {}, {}), + MakeImageIndexForTesting("zzzz", {}, {}), 4.0)); + + EXPECT_EQ(6U, test_build_equivalence( + MakeImageIndexForTesting("banana", {}, {}), + MakeImageIndexForTesting("banana", {}, {}), 4.0)); + + EXPECT_EQ(6U, test_build_equivalence( + MakeImageIndexForTesting("bananaxx", {}, {}), + MakeImageIndexForTesting("bananayy", {}, {}), 4.0)); + + EXPECT_EQ(8U, test_build_equivalence( + MakeImageIndexForTesting("banana11", {{6, 0}}, {}), + MakeImageIndexForTesting("banana11", {{6, 0}}, {}), 4.0)); + + EXPECT_EQ(6U, test_build_equivalence( + MakeImageIndexForTesting("banana11", {{6, 0}}, {}), + MakeImageIndexForTesting("banana22", {}, {{6, 0}}), 4.0)); + + EXPECT_EQ( + 15U, + test_build_equivalence( + MakeImageIndexForTesting("banana11pineapple", {{6, 0}}, {}), + MakeImageIndexForTesting("banana22pineapple", {}, {{6, 0}}), 4.0)); + + EXPECT_EQ( + 15U, + test_build_equivalence( + MakeImageIndexForTesting("bananaxxxxxxxxpineapple", {}, {}), + MakeImageIndexForTesting("bananayyyyyyyypineapple", {}, {}), 4.0)); + + EXPECT_EQ( + 19U, + test_build_equivalence( + MakeImageIndexForTesting("foobanana11xxpineapplexx", {{9, 0}}, {}), + MakeImageIndexForTesting("banana11yypineappleyy", {{6, 0}}, {}), + 4.0)); +} + +} // namespace zucchini diff --git a/fuzzers/BUILD.gn b/fuzzers/BUILD.gn new file mode 100644 index 0000000..90c436e --- /dev/null +++ b/fuzzers/BUILD.gn @@ -0,0 +1,210 @@ +# Copyright 2018 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import("//testing/libfuzzer/fuzzer_test.gni") +import("//third_party/protobuf/proto_library.gni") + +static_library("zucchini_fuzz_utils") { + sources = [ + "fuzz_utils.cc", + "fuzz_utils.h", + ] + deps = [ + "//base", + "//components/zucchini:zucchini_lib", + ] +} + +# To download the corpus for local fuzzing use: +# gsutil -m rsync \ +# gs://clusterfuzz-corpus/libfuzzer/zucchini_disassembler_dex_fuzzer \ +# components/zucchini/fuzzing/testdata/disassembler_dex_fuzzer/ +fuzzer_test("zucchini_disassembler_dex_fuzzer") { + sources = [ "disassembler_dex_fuzzer.cc" ] + deps = [ + "//base", + "//components/zucchini:zucchini_lib", + ] +} + +# To download the corpus for local fuzzing use: +# gsutil -m rsync \ +# gs://clusterfuzz-corpus/libfuzzer/zucchini_disassembler_win32_fuzzer \ +# components/zucchini/fuzzing/testdata/disassembler_win32_fuzzer/ +fuzzer_test("zucchini_disassembler_win32_fuzzer") { + sources = [ "disassembler_win32_fuzzer.cc" ] + deps = [ + ":zucchini_fuzz_utils", + "//base", + "//components/zucchini:zucchini_lib", + ] +} + +# To download the corpus for local fuzzing use: +# gsutil -m rsync \ +# gs://clusterfuzz-corpus/libfuzzer/zucchini_disassembler_elf_fuzzer \ +# components/zucchini/fuzzing/testdata/disassembler_elf_fuzzer/ +fuzzer_test("zucchini_disassembler_elf_fuzzer") { + sources = [ "disassembler_elf_fuzzer.cc" ] + deps = [ + ":zucchini_fuzz_utils", + "//base", + "//components/zucchini:zucchini_lib", + ] +} + +fuzzer_test("zucchini_patch_fuzzer") { + sources = [ "patch_fuzzer.cc" ] + deps = [ + "//base", + "//components/zucchini:zucchini_lib", + ] + seed_corpus = "testdata/patch_fuzzer" +} + +proto_library("zucchini_file_pair_proto") { + sources = [ "file_pair.proto" ] +} + +# Ensure protoc is available. +# Disabled on Windows due to crbug/844826. +if (current_toolchain == host_toolchain && !is_win) { + # Raw Apply Fuzzer Seed: + action("zucchini_raw_apply_seed") { + script = "generate_fuzzer_data.py" + + args = [ + "--raw", + "old_eventlog_provider.dll", # <old_file> + "new_eventlog_provider.dll", # <new_file> + + # <patch_file> (temporary) + rebase_path( + "$target_gen_dir/testdata/apply_fuzzer/eventlog_provider.patch", + root_build_dir), + + # <output_file> + rebase_path( + "$target_gen_dir/testdata/apply_fuzzer/raw_apply_seed_proto.bin", + root_build_dir), + ] + + # Files depended upon. + sources = [ + "create_seed_file_pair.py", + "testdata/new_eventlog_provider.dll", + "testdata/old_eventlog_provider.dll", + ] + + # Outputs: necessary for validation. + outputs = + [ "$target_gen_dir/testdata/apply_fuzzer/raw_apply_seed_proto.bin" ] + deps = [ + "//components/zucchini:zucchini", + "//third_party/protobuf:protoc", + ] + } + + # ZTF Apply Fuzzer Seed: + action("zucchini_ztf_apply_seed") { + script = "generate_fuzzer_data.py" + + # *.ztf files are expected to be valid ZTF format. + args = [ + "old.ztf", # <old_file> + "new.ztf", # <new_file> + + # <patch_file> (temporary) + rebase_path("$target_gen_dir/testdata/apply_fuzzer/ztf.patch", + root_build_dir), + + # <output_file> + rebase_path( + "$target_gen_dir/testdata/apply_fuzzer/ztf_apply_seed_proto.bin", + root_build_dir), + ] + + # Files depended upon. + sources = [ + "create_seed_file_pair.py", + "testdata/new.ztf", + "testdata/old.ztf", + ] + + # Outputs: necessary for validation. + outputs = + [ "$target_gen_dir/testdata/apply_fuzzer/ztf_apply_seed_proto.bin" ] + deps = [ + "//components/zucchini:zucchini", + "//third_party/protobuf:protoc", + ] + } + + # Apply Fuzzer: + fuzzer_test("zucchini_apply_fuzzer") { + sources = [ "apply_fuzzer.cc" ] + deps = [ + ":zucchini_file_pair_proto", + "//base", + "//components/zucchini:zucchini_lib", + "//third_party/libprotobuf-mutator", + ] + seed_corpus = "$target_gen_dir/testdata/apply_fuzzer" + seed_corpus_deps = [ + ":zucchini_raw_apply_seed", + ":zucchini_ztf_apply_seed", + ] + } + + # For Gen fuzzers seeds can be created from this directory with: + # python create_seed_file_pair.py <protoc> <old file> <new file> <out file> + # [--imposed=<imposed>] + + # Raw Gen Fuzzer: + # <old file>: testdata/old.ztf + # <new file>: testdata/new.ztf + # <out file>: testdata/raw_or_ztf_gen_fuzzer/seed.asciipb + fuzzer_test("zucchini_raw_gen_fuzzer") { + sources = [ "raw_gen_fuzzer.cc" ] + deps = [ + ":zucchini_file_pair_proto", + "//base", + "//components/zucchini:zucchini_lib", + "//third_party/libprotobuf-mutator", + ] + seed_corpus = "testdata/raw_or_ztf_gen_fuzzer" + } + + # ZTF Gen Fuzzer: + # <old file>: testdata/old.ztf + # <new file>: testdata/new.ztf + # <out file>: testdata/raw_or_ztf_gen_fuzzer/seed.asciipb + fuzzer_test("zucchini_ztf_gen_fuzzer") { + sources = [ "ztf_gen_fuzzer.cc" ] + deps = [ + ":zucchini_file_pair_proto", + "//base", + "//components/zucchini:zucchini_lib", + "//third_party/libprotobuf-mutator", + ] + seed_corpus = "testdata/raw_or_ztf_gen_fuzzer" + } + + # Imposed Ensemble Match Fuzzer: + # <old file>: testdata/old_imposed_archive.txt + # <new file>: testdata/new_imposed_archive.txt + # <out file>: testdata/imposed_ensemble_matcher_fuzzer/seed.asciipb + # <imposed>: 17+420=388+347,452+420=27+347 + # This is a mapping of regions old_offset+old_size=new_offset+new_size,... + fuzzer_test("zucchini_imposed_ensemble_matcher_fuzzer") { + sources = [ "imposed_ensemble_matcher_fuzzer.cc" ] + deps = [ + ":zucchini_file_pair_proto", + "//base", + "//components/zucchini:zucchini_lib", + "//third_party/libprotobuf-mutator", + ] + seed_corpus = "testdata/imposed_ensemble_matcher_fuzzer" + } +} diff --git a/fuzzers/apply_fuzzer.cc b/fuzzers/apply_fuzzer.cc new file mode 100644 index 0000000..baad978 --- /dev/null +++ b/fuzzers/apply_fuzzer.cc @@ -0,0 +1,59 @@ +// Copyright 2018 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <stdint.h> +#include <stdlib.h> + +#include <iostream> +#include <vector> + +#include "base/environment.h" +#include "base/logging.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/fuzzers/file_pair.pb.h" +#include "components/zucchini/patch_reader.h" +#include "components/zucchini/zucchini.h" +#include "testing/libfuzzer/proto/lpm_interface.h" + +struct Environment { + Environment() { + logging::SetMinLogLevel(logging::LOG_FATAL); // Disable console spamming. + } +}; + +Environment* env = new Environment(); + +DEFINE_BINARY_PROTO_FUZZER(const zucchini::fuzzers::FilePair& file_pair) { + // Dump code for debugging. + if (base::Environment::Create()->HasVar("LPM_DUMP_NATIVE_INPUT")) { + std::cout << "Old File: " << file_pair.old_file() << std::endl + << "Patch File: " << file_pair.new_or_patch_file() << std::endl; + } + + // Prepare data. + zucchini::ConstBufferView old_image( + reinterpret_cast<const uint8_t*>(file_pair.old_file().data()), + file_pair.old_file().size()); + zucchini::ConstBufferView patch_file( + reinterpret_cast<const uint8_t*>(file_pair.new_or_patch_file().data()), + file_pair.new_or_patch_file().size()); + + // Generate a patch reader. + auto patch_reader = zucchini::EnsemblePatchReader::Create(patch_file); + // Abort if the patch can't be read. + if (!patch_reader.has_value()) + return; + + // Create the underlying new file. + size_t new_size = patch_reader->header().new_size; + // Reject unreasonably large "new" files that fuzzed patch may specify. + if (new_size > 64 * 1024) + return; + std::vector<uint8_t> new_data(new_size); + zucchini::MutableBufferView new_image(new_data.data(), new_size); + + // Fuzz target. + zucchini::ApplyBuffer(old_image, *patch_reader, new_image); + // No need to check whether output exist, or if so, whether it's valid. +} diff --git a/fuzzers/create_seed_file_pair.py b/fuzzers/create_seed_file_pair.py new file mode 100755 index 0000000..db3843f --- /dev/null +++ b/fuzzers/create_seed_file_pair.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python +# Copyright 2018 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""Create binary protobuf encoding for fuzzer seeds. + +This script is used to generate binary encoded protobuf seeds for fuzzers +related to Zucchini-gen and -apply, which take pairs of files are arguments. The +binary protobuf format is faster to parse so it is the preferred method for +encoding the seeds. For gen related fuzzers this should only need to be run +once. For any apply related fuzzers this should be rerun whenever the patch +format is changed. +""" + +import argparse +import logging +import os +import subprocess +import sys + +ABS_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__))) +PROTO_DEFINITION_FILE = 'file_pair.proto' + +def parse_args(): + """Parse commandline args.""" + parser = argparse.ArgumentParser() + parser.add_argument('protoc_path', help='Path to protoc.') + parser.add_argument('old_file', help='Old file to generate/apply patch.') + parser.add_argument('new_or_patch_file', + help='New file to generate or patch to apply.') + parser.add_argument('output_file', + help='File to write binary protobuf to.') + parser.add_argument('--imposed_matches', + help='Equivalence matches to impose when generating ' + 'the patch.') + return parser.parse_args() + + +def read_to_proto_escaped_string(filename): + """Reads a file and converts it to hex escape sequences.""" + with open(filename, 'rb') as f: + # Note that unicode-escape escapes all non-ASCII printable characters + # excluding ", which needs to be manually escaped. + return f.read().decode('latin1').encode('unicode-escape').replace( + b'"', b'\\"') + + +def main(): + args = parse_args() + # Create an ASCII string representing a protobuf. + content = [b'old_file: "%s"' % read_to_proto_escaped_string(args.old_file), + b'new_or_patch_file: "%s"' % read_to_proto_escaped_string( + args.new_or_patch_file)] + + if args.imposed_matches: + content.append(b'imposed_matches: "%s"' % + args.imposed_matches.encode('unicode-escape')) + + # Encode the ASCII protobuf as a binary protobuf. + ps = subprocess.Popen([args.protoc_path, '--proto_path=%s' % ABS_PATH, + '--encode=zucchini.fuzzers.FilePair', + os.path.join(ABS_PATH, PROTO_DEFINITION_FILE)], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE) + # Write the string to the subprocess. Single line IO is fine as protoc returns + # a string. + output = ps.communicate(input=b'\n'.join(content)) + ps.wait() + if ps.returncode: + logging.error('Binary protobuf encoding failed.') + return ps.returncode + + # Write stdout of the subprocess for protoc to the |output_file|. + with open(args.output_file, 'wb') as f: + f.write(output[0]) + return 0 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/fuzzers/disassembler_dex_fuzzer.cc b/fuzzers/disassembler_dex_fuzzer.cc new file mode 100644 index 0000000..ab08696 --- /dev/null +++ b/fuzzers/disassembler_dex_fuzzer.cc @@ -0,0 +1,54 @@ +// Copyright 2018 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <stddef.h> +#include <stdint.h> + +#include "base/logging.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/disassembler.h" +#include "components/zucchini/disassembler_dex.h" + +namespace { + +struct Environment { + Environment() { logging::SetMinLogLevel(logging::LOG_FATAL); } +}; + +} // namespace + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { + static Environment env; + if (!size) + return 0; + // Prepare data. + std::vector<uint8_t> mutable_data(data, data + size); + zucchini::ConstBufferView image(mutable_data.data(), mutable_data.size()); + + // Create disassembler. Early exit on failure. + auto disassembler_dex = + zucchini::Disassembler::Make<zucchini::DisassemblerDex>(image); + if (!disassembler_dex) + return 0; + CHECK_LE(disassembler_dex->size(), image.size()); + zucchini::MutableBufferView mutable_image(mutable_data.data(), + disassembler_dex->size()); + + std::vector<zucchini::Reference> references; + // Read all references in the file. + auto groups = disassembler_dex->MakeReferenceGroups(); + for (const auto& group : groups) { + auto reader = group.GetReader(disassembler_dex.get()); + for (auto ref = reader->GetNext(); ref.has_value(); + ref = reader->GetNext()) { + references.push_back(ref.value()); + } + reader.reset(); + auto writer = group.GetWriter(mutable_image, disassembler_dex.get()); + for (const auto& ref : references) + writer->PutNext(ref); + references.clear(); + } + return 0; +} diff --git a/fuzzers/disassembler_elf_fuzzer.cc b/fuzzers/disassembler_elf_fuzzer.cc new file mode 100644 index 0000000..16c885d --- /dev/null +++ b/fuzzers/disassembler_elf_fuzzer.cc @@ -0,0 +1,45 @@ +// Copyright 2019 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <stddef.h> +#include <stdint.h> + +#include "base/logging.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/disassembler.h" +#include "components/zucchini/disassembler_elf.h" +#include "components/zucchini/fuzzers/fuzz_utils.h" + +namespace { + +struct Environment { + Environment() { logging::SetMinLogLevel(logging::LOG_FATAL); } +}; + +} // namespace + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { + static Environment env; + if (!size) + return 0; + // Prepare data. + std::vector<uint8_t> mutable_data(data, data + size); + zucchini::ConstBufferView image(mutable_data.data(), mutable_data.size()); + + // Create disassembler. Early exit on failure. + auto disassembler_elf_x64 = + zucchini::Disassembler::Make<zucchini::DisassemblerElfX64>(image); + if (disassembler_elf_x64) { + zucchini::ReadAndWriteReferences(std::move(disassembler_elf_x64), + &mutable_data); + return 0; + } + + auto disassembler_elf_x86 = + zucchini::Disassembler::Make<zucchini::DisassemblerElfX86>(image); + if (disassembler_elf_x86) + zucchini::ReadAndWriteReferences(std::move(disassembler_elf_x86), + &mutable_data); + return 0; +} diff --git a/fuzzers/disassembler_win32_fuzzer.cc b/fuzzers/disassembler_win32_fuzzer.cc new file mode 100644 index 0000000..34a3565 --- /dev/null +++ b/fuzzers/disassembler_win32_fuzzer.cc @@ -0,0 +1,52 @@ +// Copyright 2018 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <stddef.h> +#include <stdint.h> + +#include <map> +#include <memory> +#include <vector> + +#include "base/logging.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/disassembler.h" +#include "components/zucchini/disassembler_win32.h" +#include "components/zucchini/fuzzers/fuzz_utils.h" + +namespace { + +struct Environment { + Environment() { + logging::SetMinLogLevel(logging::LOG_FATAL); // Disable console spamming. + } +}; + +} // namespace + +// Entry point for LibFuzzer. +extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { + static Environment env; + if (!size) + return 0; + // Prepare data. + std::vector<uint8_t> mutable_data(data, data + size); + zucchini::ConstBufferView image(mutable_data.data(), mutable_data.size()); + + // One of x86 or x64 should return a non-nullptr if the data is valid. + auto disassembler_win32x86 = + zucchini::Disassembler::Make<zucchini::DisassemblerWin32X86>(image); + if (disassembler_win32x86) { + zucchini::ReadAndWriteReferences(std::move(disassembler_win32x86), + &mutable_data); + return 0; + } + + auto disassembler_win32x64 = + zucchini::Disassembler::Make<zucchini::DisassemblerWin32X64>(image); + if (disassembler_win32x64) + zucchini::ReadAndWriteReferences(std::move(disassembler_win32x64), + &mutable_data); + return 0; +} diff --git a/fuzzers/file_pair.proto b/fuzzers/file_pair.proto new file mode 100644 index 0000000..7fdc908 --- /dev/null +++ b/fuzzers/file_pair.proto @@ -0,0 +1,21 @@ +// Copyright 2018 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +syntax = "proto2"; + +package zucchini.fuzzers; + +// NEXT_TAG = 4 +message FilePair { + // File to generate patch from or apply patch to. + required bytes old_file = 1; + // New file to generate patch or the patch to apply. + required bytes new_or_patch_file = 2; + // Imposed matches to apply to the equivalence matches. + // Should be of the format: + // "#+#=#+#,#+#=#+#,..." (e.g., "1+2=3+4", "1+2=3+4,5+6=7+8"), + // where "#+#=#+#" encodes a match as 4 unsigned integers: + // [offset in "old", size in "old", offset in "new", size in "new"]. + optional string imposed_matches = 3; +} diff --git a/fuzzers/fuzz_utils.cc b/fuzzers/fuzz_utils.cc new file mode 100644 index 0000000..1fd89fa --- /dev/null +++ b/fuzzers/fuzz_utils.cc @@ -0,0 +1,40 @@ +// Copyright 2019 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/fuzzers/fuzz_utils.h" + +#include <map> +#include <memory> +#include <vector> + +#include "components/zucchini/disassembler.h" + +namespace zucchini { + +void ReadAndWriteReferences( + std::unique_ptr<zucchini::Disassembler> disassembler, + std::vector<uint8_t>* mutable_data) { + zucchini::MutableBufferView mutable_image(mutable_data->data(), + disassembler->size()); + std::vector<zucchini::Reference> references; + auto groups = disassembler->MakeReferenceGroups(); + std::map<zucchini::PoolTag, std::vector<zucchini::Reference>> + references_of_pool; + for (const auto& group : groups) { + auto reader = group.GetReader(disassembler.get()); + std::vector<zucchini::Reference>* refs = + &references_of_pool[group.pool_tag()]; + for (auto ref = reader->GetNext(); ref.has_value(); + ref = reader->GetNext()) { + refs->push_back(ref.value()); + } + } + for (const auto& group : groups) { + auto writer = group.GetWriter(mutable_image, disassembler.get()); + for (const auto& ref : references_of_pool[group.pool_tag()]) + writer->PutNext(ref); + } +} + +} // namespace zucchini diff --git a/fuzzers/fuzz_utils.h b/fuzzers/fuzz_utils.h new file mode 100644 index 0000000..0caaab4 --- /dev/null +++ b/fuzzers/fuzz_utils.h @@ -0,0 +1,25 @@ +// Copyright 2019 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_FUZZERS_FUZZ_UTILS_H_ +#define COMPONENTS_ZUCCHINI_FUZZERS_FUZZ_UTILS_H_ + +#include <stdint.h> + +#include <memory> +#include <vector> + +#include "components/zucchini/disassembler.h" + +namespace zucchini { + +// Helper function that uses |disassembler| to read all references from +// |mutable_data| and write them back. +void ReadAndWriteReferences( + std::unique_ptr<zucchini::Disassembler> disassembler, + std::vector<uint8_t>* mutable_data); + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_FUZZERS_FUZZ_UTILS_H_ diff --git a/fuzzers/generate_fuzzer_data.py b/fuzzers/generate_fuzzer_data.py new file mode 100755 index 0000000..c76cfbc --- /dev/null +++ b/fuzzers/generate_fuzzer_data.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python +# Copyright 2018 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""Script for generating new binary protobuf seeds for fuzzers. + +Currently supports creating a single seed binary protobuf of the form +zucchini.fuzzer.FilePair. +""" + +import argparse +import hashlib +import logging +import os +import platform +import subprocess +import sys + +ABS_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__))) +ABS_TESTDATA_PATH = os.path.join(ABS_PATH, 'testdata') + +def parse_args(): + """Parses arguments from command-line.""" + parser = argparse.ArgumentParser() + parser.add_argument('--raw', help='Whether to use Raw Zucchini.', + action='store_true') + parser.add_argument('old_file', help='Old file to generate/apply patch.') + parser.add_argument('new_file', help='New file to generate patch from.') + parser.add_argument('patch_file', help='Patch filename to use.') + parser.add_argument('output_file', help='File to write binary protobuf to.') + return parser.parse_args() + + +def gen(old_file, new_file, patch_file, output_file, is_raw, is_win): + """Generates a new patch and binary encodes a protobuf pair.""" + # Create output directory if missing. + output_dir = os.path.dirname(output_file) + if not os.path.exists(output_dir): + os.makedirs(output_dir) + + # Handle Windows executable names. + zucchini = 'zucchini' + protoc = 'protoc' + if is_win: + zucchini += '.exe' + protoc += '.exe' + + zuc_cmd = [os.path.abspath(zucchini), '-gen'] + if is_raw: + zuc_cmd.append('-raw') + # Generate a new patch. + ret = subprocess.call(zuc_cmd + [old_file, new_file, patch_file], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + if ret: + logging.error('Patch generation failed for ({}, {})'.format(old_file, + new_file)) + return ret + # Binary encode the protobuf pair. + ret = subprocess.call([sys.executable, + os.path.join(ABS_PATH, 'create_seed_file_pair.py'), + os.path.abspath(protoc), old_file, patch_file, + output_file]) + os.remove(patch_file) + return ret + + +def main(): + args = parse_args() + return gen(os.path.join(ABS_TESTDATA_PATH, args.old_file), + os.path.join(ABS_TESTDATA_PATH, args.new_file), + os.path.abspath(args.patch_file), + os.path.abspath(args.output_file), + args.raw, + platform.system() == 'Windows') + + +if __name__ == '__main__': + sys.exit(main()) + diff --git a/fuzzers/imposed_ensemble_matcher_fuzzer.cc b/fuzzers/imposed_ensemble_matcher_fuzzer.cc new file mode 100644 index 0000000..0dbcf86 --- /dev/null +++ b/fuzzers/imposed_ensemble_matcher_fuzzer.cc @@ -0,0 +1,67 @@ +// Copyright 2018 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <stdint.h> + +#include <iostream> +#include <memory> + +#include "base/environment.h" +#include "base/logging.h" +#include "components/zucchini/buffer_sink.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/fuzzers/file_pair.pb.h" +#include "components/zucchini/patch_writer.h" +#include "components/zucchini/zucchini.h" +#include "testing/libfuzzer/proto/lpm_interface.h" + +namespace { + +constexpr size_t kMinImageSize = 16; +constexpr size_t kMaxImageSize = 1024; + +} // namespace + +struct Environment { + Environment() { + logging::SetMinLogLevel(logging::LOG_FATAL); // Disable console spamming. + } +}; + +DEFINE_BINARY_PROTO_FUZZER(const zucchini::fuzzers::FilePair& file_pair) { + static Environment env; + // Dump code for debugging. + if (base::Environment::Create()->HasVar("LPM_DUMP_NATIVE_INPUT")) { + std::cout << "Imposed Matches: " << file_pair.imposed_matches() << std::endl + << "Old File: " << file_pair.old_file() << std::endl + << "New File: " << file_pair.new_or_patch_file() << std::endl; + } + + // Prepare data. + zucchini::ConstBufferView old_image( + reinterpret_cast<const uint8_t*>(file_pair.old_file().data()), + file_pair.old_file().size()); + zucchini::ConstBufferView new_image( + reinterpret_cast<const uint8_t*>(file_pair.new_or_patch_file().data()), + file_pair.new_or_patch_file().size()); + + // Restrict image sizes to speed up fuzzing. + if (old_image.size() < kMinImageSize || old_image.size() > kMaxImageSize || + new_image.size() < kMinImageSize || new_image.size() > kMaxImageSize) { + return; + } + + // Generate a patch writer. + zucchini::EnsemblePatchWriter patch_writer(old_image, new_image); + + // Fuzz Target. + zucchini::GenerateBufferImposed(old_image, new_image, + file_pair.imposed_matches(), &patch_writer); + + // Write to buffer to avoid IO. + size_t patch_size = patch_writer.SerializedSize(); + std::unique_ptr<uint8_t[]> patch_data(new uint8_t[patch_size]); + zucchini::BufferSink patch(patch_data.get(), patch_size); + patch_writer.SerializeInto(patch); +} diff --git a/fuzzers/patch_fuzzer.cc b/fuzzers/patch_fuzzer.cc new file mode 100644 index 0000000..83bebcf --- /dev/null +++ b/fuzzers/patch_fuzzer.cc @@ -0,0 +1,19 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <stddef.h> +#include <stdint.h> + +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/patch_reader.h" +#include "third_party/abseil-cpp/absl/types/optional.h" + +// Entry point for LibFuzzer. +extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { + logging::SetMinLogLevel(3); // Disable console spamming. + zucchini::ConstBufferView patch(data, size); + absl::optional<zucchini::EnsemblePatchReader> patch_reader = + zucchini::EnsemblePatchReader::Create(patch); + return 0; +} diff --git a/fuzzers/raw_gen_fuzzer.cc b/fuzzers/raw_gen_fuzzer.cc new file mode 100644 index 0000000..de63d95 --- /dev/null +++ b/fuzzers/raw_gen_fuzzer.cc @@ -0,0 +1,71 @@ +// Copyright 2018 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <stdint.h> + +#include <iostream> +#include <memory> + +#include "base/environment.h" +#include "base/logging.h" +#include "components/zucchini/buffer_sink.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/fuzzers/file_pair.pb.h" +#include "components/zucchini/patch_writer.h" +#include "components/zucchini/zucchini_gen.h" +#include "testing/libfuzzer/proto/lpm_interface.h" + +namespace { + +constexpr size_t kMinImageSize = 16; +constexpr size_t kMaxImageSize = 1024; + +} // namespace + +struct Environment { + Environment() { + logging::SetMinLogLevel(logging::LOG_FATAL); // Disable console spamming. + } +}; + +Environment* env = new Environment(); + +DEFINE_BINARY_PROTO_FUZZER(const zucchini::fuzzers::FilePair& file_pair) { + // Dump code for debugging. + if (base::Environment::Create()->HasVar("LPM_DUMP_NATIVE_INPUT")) { + std::cout << "Old File: " << file_pair.old_file() << std::endl + << "New File: " << file_pair.new_or_patch_file() << std::endl; + } + + // Prepare data. + zucchini::ConstBufferView old_image( + reinterpret_cast<const uint8_t*>(file_pair.old_file().data()), + file_pair.old_file().size()); + zucchini::ConstBufferView new_image( + reinterpret_cast<const uint8_t*>(file_pair.new_or_patch_file().data()), + file_pair.new_or_patch_file().size()); + + // Restrict image sizes to speed up fuzzing. + if (old_image.size() < kMinImageSize || old_image.size() > kMaxImageSize || + new_image.size() < kMinImageSize || new_image.size() > kMaxImageSize) { + return; + } + + // Generate a patch writer. + zucchini::EnsemblePatchWriter patch_writer(old_image, new_image); + + // Fuzz Target. + zucchini::GenerateBufferRaw(old_image, new_image, &patch_writer); + + // Check that the patch size is sane. Crash the fuzzer if this isn't the case + // as it is a failure in Zucchini's patch performance that is worth + // investigating. + size_t patch_size = patch_writer.SerializedSize(); + CHECK_LE(patch_size, kMaxImageSize * 2); + + // Write to buffer to avoid IO. + std::unique_ptr<uint8_t[]> patch_data(new uint8_t[patch_size]); + zucchini::BufferSink patch(patch_data.get(), patch_size); + patch_writer.SerializeInto(patch); +} diff --git a/fuzzers/testdata/.gitignore b/fuzzers/testdata/.gitignore new file mode 100644 index 0000000..d345889 --- /dev/null +++ b/fuzzers/testdata/.gitignore @@ -0,0 +1,4 @@ +# Exclude testdata binaries. +*.bin +*.dll +*.patch diff --git a/fuzzers/testdata/imposed_ensemble_matcher_fuzzer/seed.asciipb b/fuzzers/testdata/imposed_ensemble_matcher_fuzzer/seed.asciipb new file mode 100644 index 0000000..abbadd2 --- /dev/null +++ b/fuzzers/testdata/imposed_ensemble_matcher_fuzzer/seed.asciipb @@ -0,0 +1,90 @@ + +ˆABCDEFGHIJKLMNOP +ZTxt +ZucZucZucZucZucZucZucZucZuc +ZucZucZucZucZucZucZucZucZuc +ZucZucZucZucZucZucZucZucZuc +ZucZucZucZucZucZucZucZucZuc +BLOCK1 +Lorem Ipsum, Ipsum Lorem, Alpha Beta Gamma <1,1> +{3,4} [4,5] (90,08) +(1,4) +[+001, +001] +References {-004,-003}, <001,001>, [98,78] +(+01,+00) +AAAAAAAAA + +BLOCK2 +{06,01} Another block. Lorem Ipsum, Ipsum, Ipsum +<><><><><>{}{}{}{}[][][]()()()() +[4,1] + +Old bytes live here as this is reasonable. +txTZ +Hello, World! +ZTxt +ZucZucZucZucZucZucZucZucZuc +ZucZucZucZucZucZucZucZucZuc +ZucZucZucZucZucZucZucZucZuc +ZucZucZucZucZucZucZucZucZuc +BLOCK1 +Lorem Ipsum, Ipsum Lorem, Alpha Beta Gamma <1,1> +{3,4} [4,5] (90,08) +(1,4) +[+001, +001] +References {-004,-003}, <001,001>, [98,78] +(+01,+00) +AAAAAAAAA + +BLOCK2 +{06,01} Another block. Lorem Ipsum, Ipsum, Ipsum +<><><><><>{}{}{}{}[][][]()()()() +[4,1] + +Old bytes live here as this is reasonable. +txTZ +Yet another gap for Raw Zucchini +„ABCDEFGHIJKLMNOPQRSTUVWXYZ +ZTxt +BLOCK2 +{20,01} Another block. Lorem Ipsum, Ipsum, Ipsum +<><><><><>{}{}{}{}[][][]()()()() +[4,1] + +BLOCK1 +Lorem Ipsum, Ipsum Lorem, Alpha Beta Gamma <1,1> +{4,4} [5,8] (90,08) +(1,4) +[+001, +001] +References {-005,-006}, <001,002>, [98,78] +(+01,+04) +AAAAAAAAA + +Other new bytes. + +Old bytes live here as this is reasonable. +New bytes live here. +txTZ +Hello, World! +ZTxt +BLOCK2 +{20,01} Another block. Lorem Ipsum, Ipsum, Ipsum +<><><><><>{}{}{}{}[][][]()()()() +[4,1] + +BLOCK1 +Lorem Ipsum, Ipsum Lorem, Alpha Beta Gamma <1,1> +{4,4} [5,8] (90,08) +(1,4) +[+001, +001] +References {-005,-006}, <001,002>, [98,78] +(+01,+04) +AAAAAAAAA + +Other new bytes. + +Old bytes live here as this is reasonable. +New bytes live here. +txTZ +Yet yet another gap for Raw Zucchini +17+420=388+347,452+420=27+347
\ No newline at end of file diff --git a/fuzzers/testdata/new.ztf b/fuzzers/testdata/new.ztf new file mode 100644 index 0000000..1b1876f --- /dev/null +++ b/fuzzers/testdata/new.ztf @@ -0,0 +1,20 @@ +ZTxt +BLOCK2 +{20,01} Another block. Lorem Ipsum, Ipsum, Ipsum +<><><><><>{}{}{}{}[][][]()()()() +[4,1] + +BLOCK1 +Lorem Ipsum, Ipsum Lorem, Alpha Beta Gamma <1,1> +{4,4} [5,8] (90,08) +(1,4) +[+001, +001] +References {-005,-006}, <001,002>, [98,78] +(+01,+04) +AAAAAAAAA + +Other new bytes. + +Old bytes live here as this is reasonable. +New bytes live here. +txTZ diff --git a/fuzzers/testdata/new_eventlog_provider.dll.sha1 b/fuzzers/testdata/new_eventlog_provider.dll.sha1 new file mode 100644 index 0000000..bbf56f9 --- /dev/null +++ b/fuzzers/testdata/new_eventlog_provider.dll.sha1 @@ -0,0 +1 @@ +89ce67035d2d2dae33cb2d98d4762e955b93df95
\ No newline at end of file diff --git a/fuzzers/testdata/new_imposed_archive.txt b/fuzzers/testdata/new_imposed_archive.txt new file mode 100644 index 0000000..5ce6f70 --- /dev/null +++ b/fuzzers/testdata/new_imposed_archive.txt @@ -0,0 +1,43 @@ +ABCDEFGHIJKLMNOPQRSTUVWXYZ +ZTxt +BLOCK2 +{20,01} Another block. Lorem Ipsum, Ipsum, Ipsum +<><><><><>{}{}{}{}[][][]()()()() +[4,1] + +BLOCK1 +Lorem Ipsum, Ipsum Lorem, Alpha Beta Gamma <1,1> +{4,4} [5,8] (90,08) +(1,4) +[+001, +001] +References {-005,-006}, <001,002>, [98,78] +(+01,+04) +AAAAAAAAA + +Other new bytes. + +Old bytes live here as this is reasonable. +New bytes live here. +txTZ +Hello, World! +ZTxt +BLOCK2 +{20,01} Another block. Lorem Ipsum, Ipsum, Ipsum +<><><><><>{}{}{}{}[][][]()()()() +[4,1] + +BLOCK1 +Lorem Ipsum, Ipsum Lorem, Alpha Beta Gamma <1,1> +{4,4} [5,8] (90,08) +(1,4) +[+001, +001] +References {-005,-006}, <001,002>, [98,78] +(+01,+04) +AAAAAAAAA + +Other new bytes. + +Old bytes live here as this is reasonable. +New bytes live here. +txTZ +Yet yet another gap for Raw Zucchini diff --git a/fuzzers/testdata/old.ztf b/fuzzers/testdata/old.ztf new file mode 100644 index 0000000..12dd536 --- /dev/null +++ b/fuzzers/testdata/old.ztf @@ -0,0 +1,21 @@ +ZTxt +ZucZucZucZucZucZucZucZucZuc +ZucZucZucZucZucZucZucZucZuc +ZucZucZucZucZucZucZucZucZuc +ZucZucZucZucZucZucZucZucZuc +BLOCK1 +Lorem Ipsum, Ipsum Lorem, Alpha Beta Gamma <1,1> +{3,4} [4,5] (90,08) +(1,4) +[+001, +001] +References {-004,-003}, <001,001>, [98,78] +(+01,+00) +AAAAAAAAA + +BLOCK2 +{06,01} Another block. Lorem Ipsum, Ipsum, Ipsum +<><><><><>{}{}{}{}[][][]()()()() +[4,1] + +Old bytes live here as this is reasonable. +txTZ diff --git a/fuzzers/testdata/old_eventlog_provider.dll.sha1 b/fuzzers/testdata/old_eventlog_provider.dll.sha1 new file mode 100644 index 0000000..5daf440 --- /dev/null +++ b/fuzzers/testdata/old_eventlog_provider.dll.sha1 @@ -0,0 +1 @@ +c80fdce994ba043956e192f650d894555460ff9b
\ No newline at end of file diff --git a/fuzzers/testdata/old_imposed_archive.txt b/fuzzers/testdata/old_imposed_archive.txt new file mode 100644 index 0000000..e4daa3f --- /dev/null +++ b/fuzzers/testdata/old_imposed_archive.txt @@ -0,0 +1,45 @@ +ABCDEFGHIJKLMNOP +ZTxt +ZucZucZucZucZucZucZucZucZuc +ZucZucZucZucZucZucZucZucZuc +ZucZucZucZucZucZucZucZucZuc +ZucZucZucZucZucZucZucZucZuc +BLOCK1 +Lorem Ipsum, Ipsum Lorem, Alpha Beta Gamma <1,1> +{3,4} [4,5] (90,08) +(1,4) +[+001, +001] +References {-004,-003}, <001,001>, [98,78] +(+01,+00) +AAAAAAAAA + +BLOCK2 +{06,01} Another block. Lorem Ipsum, Ipsum, Ipsum +<><><><><>{}{}{}{}[][][]()()()() +[4,1] + +Old bytes live here as this is reasonable. +txTZ +Hello, World! +ZTxt +ZucZucZucZucZucZucZucZucZuc +ZucZucZucZucZucZucZucZucZuc +ZucZucZucZucZucZucZucZucZuc +ZucZucZucZucZucZucZucZucZuc +BLOCK1 +Lorem Ipsum, Ipsum Lorem, Alpha Beta Gamma <1,1> +{3,4} [4,5] (90,08) +(1,4) +[+001, +001] +References {-004,-003}, <001,001>, [98,78] +(+01,+00) +AAAAAAAAA + +BLOCK2 +{06,01} Another block. Lorem Ipsum, Ipsum, Ipsum +<><><><><>{}{}{}{}[][][]()()()() +[4,1] + +Old bytes live here as this is reasonable. +txTZ +Yet another gap for Raw Zucchini diff --git a/fuzzers/testdata/patch_fuzzer/empty.zuc b/fuzzers/testdata/patch_fuzzer/empty.zuc Binary files differnew file mode 100644 index 0000000..64eacf5 --- /dev/null +++ b/fuzzers/testdata/patch_fuzzer/empty.zuc diff --git a/fuzzers/testdata/raw_or_ztf_gen_fuzzer/seed_proto.bin b/fuzzers/testdata/raw_or_ztf_gen_fuzzer/seed_proto.bin new file mode 100644 index 0000000..5939c72 --- /dev/null +++ b/fuzzers/testdata/raw_or_ztf_gen_fuzzer/seed_proto.bin @@ -0,0 +1,42 @@ + +¤ZTxt +ZucZucZucZucZucZucZucZucZuc +ZucZucZucZucZucZucZucZucZuc +ZucZucZucZucZucZucZucZucZuc +ZucZucZucZucZucZucZucZucZuc +BLOCK1 +Lorem Ipsum, Ipsum Lorem, Alpha Beta Gamma <1,1> +{3,4} [4,5] (90,08) +(1,4) +[+001, +001] +References {-004,-003}, <001,001>, [98,78] +(+01,+00) +AAAAAAAAA + +BLOCK2 +{06,01} Another block. Lorem Ipsum, Ipsum, Ipsum +<><><><><>{}{}{}{}[][][]()()()() +[4,1] + +Old bytes live here as this is reasonable. +txTZ +ÛZTxt +BLOCK2 +{20,01} Another block. Lorem Ipsum, Ipsum, Ipsum +<><><><><>{}{}{}{}[][][]()()()() +[4,1] + +BLOCK1 +Lorem Ipsum, Ipsum Lorem, Alpha Beta Gamma <1,1> +{4,4} [5,8] (90,08) +(1,4) +[+001, +001] +References {-005,-006}, <001,002>, [98,78] +(+01,+04) +AAAAAAAAA + +Other new bytes. + +Old bytes live here as this is reasonable. +New bytes live here. +txTZ diff --git a/fuzzers/ztf_gen_fuzzer.cc b/fuzzers/ztf_gen_fuzzer.cc new file mode 100644 index 0000000..ee2d47c --- /dev/null +++ b/fuzzers/ztf_gen_fuzzer.cc @@ -0,0 +1,67 @@ +// Copyright 2018 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <stdint.h> + +#include <iostream> +#include <memory> + +#include "base/environment.h" +#include "base/logging.h" +#include "components/zucchini/buffer_sink.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/fuzzers/file_pair.pb.h" +#include "components/zucchini/patch_writer.h" +#include "components/zucchini/zucchini_gen.h" +#include "testing/libfuzzer/proto/lpm_interface.h" + +namespace { + +constexpr size_t kMinImageSize = 16; +constexpr size_t kMaxImageSize = 1024; + +} // namespace + +struct Environment { + Environment() { + logging::SetMinLogLevel(logging::LOG_FATAL); // Disable console spamming. + } +}; + +Environment* env = new Environment(); + +DEFINE_BINARY_PROTO_FUZZER(const zucchini::fuzzers::FilePair& file_pair) { + // Dump code for debugging. + if (base::Environment::Create()->HasVar("LPM_DUMP_NATIVE_INPUT")) { + std::cout << "Old File: " << file_pair.old_file() << std::endl + << "New File: " << file_pair.new_or_patch_file() << std::endl; + } + + // Prepare data. These are originally Zucchini Text Format (ZTF) files but may + // in relatively unlikely circumstances mutate into other formats. + zucchini::ConstBufferView old_image( + reinterpret_cast<const uint8_t*>(file_pair.old_file().data()), + file_pair.old_file().size()); + zucchini::ConstBufferView new_image( + reinterpret_cast<const uint8_t*>(file_pair.new_or_patch_file().data()), + file_pair.new_or_patch_file().size()); + + // Restrict image sizes to speed up fuzzing. + if (old_image.size() < kMinImageSize || old_image.size() > kMaxImageSize || + new_image.size() < kMinImageSize || new_image.size() > kMaxImageSize) { + return; + } + + // Generate a patch writer. + zucchini::EnsemblePatchWriter patch_writer(old_image, new_image); + + // Fuzz Target. + zucchini::GenerateBuffer(old_image, new_image, &patch_writer); + + // Write to buffer to avoid IO. + size_t patch_size = patch_writer.SerializedSize(); + std::unique_ptr<uint8_t[]> patch_data(new uint8_t[patch_size]); + zucchini::BufferSink patch(patch_data.get(), patch_size); + patch_writer.SerializeInto(patch); +} diff --git a/heuristic_ensemble_matcher.cc b/heuristic_ensemble_matcher.cc new file mode 100644 index 0000000..2f01d34 --- /dev/null +++ b/heuristic_ensemble_matcher.cc @@ -0,0 +1,369 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/heuristic_ensemble_matcher.h" + +#include <algorithm> +#include <memory> +#include <string> +#include <utility> +#include <vector> + +#include "base/bind.h" +#include "base/logging.h" +#include "base/numerics/safe_conversions.h" +#include "base/strings/stringprintf.h" +#include "components/zucchini/binary_data_histogram.h" +#include "components/zucchini/element_detection.h" +#include "components/zucchini/image_utils.h" +#include "components/zucchini/io_utils.h" + +namespace zucchini { + +namespace { + +/******** Helper Functions ********/ + +// Uses |detector| to find embedded executables inside |image|, and returns the +// result on success, or absl::nullopt on failure, which occurs if too many (> +// |kElementLimit|) elements are found. +absl::optional<std::vector<Element>> FindEmbeddedElements( + ConstBufferView image, + const std::string& name, + ElementDetector&& detector) { + // Maximum number of Elements in a file. This is enforced because our matching + // algorithm is O(n^2), which suffices for regular archive files that should + // have up to 10's of executable files. An archive containing 100's of + // executables is likely pathological, and is rejected to prevent exploits. + static constexpr size_t kElementLimit = 256; + std::vector<Element> elements; + ElementFinder element_finder(image, std::move(detector)); + for (auto element = element_finder.GetNext(); + element.has_value() && elements.size() <= kElementLimit; + element = element_finder.GetNext()) { + elements.push_back(*element); + } + if (elements.size() >= kElementLimit) { + LOG(WARNING) << name << ": Found too many elements."; + return absl::nullopt; + } + LOG(INFO) << name << ": Found " << elements.size() << " elements."; + return elements; +} + +// Determines whether a proposed comparison between Elements should be rejected +// early, to decrease the likelihood of creating false-positive matches, which +// may be costly for patching. Our heuristic simply prohibits big difference in +// size (relative and absolute) between matched elements. +bool UnsafeDifference(const Element& old_element, const Element& new_element) { + static constexpr double kMaxBloat = 2.0; + static constexpr size_t kMinWorrysomeDifference = 2 << 20; // 2MB + size_t lo_size = std::min(old_element.size, new_element.size); + size_t hi_size = std::max(old_element.size, new_element.size); + if (hi_size - lo_size < kMinWorrysomeDifference) + return false; + if (hi_size < lo_size * kMaxBloat) + return false; + return true; +} + +std::ostream& operator<<(std::ostream& stream, const Element& elt) { + stream << "(" << CastExecutableTypeToString(elt.exe_type) << ", " + << AsHex<8, size_t>(elt.offset) << " +" << AsHex<8, size_t>(elt.size) + << ")"; + return stream; +} + +/******** MatchingInfoOut ********/ + +// A class to output detailed information during ensemble matching. Extracting +// the functionality to a separate class decouples formatting and printing logic +// from matching logic. The base class consists of stubs. +class MatchingInfoOut { + protected: + MatchingInfoOut() = default; + MatchingInfoOut(const MatchingInfoOut&) = delete; + const MatchingInfoOut& operator=(const MatchingInfoOut&) = delete; + + public: + virtual ~MatchingInfoOut() = default; + virtual void InitSizes(size_t old_size, size_t new_size) {} + virtual void DeclareTypeMismatch(int iold, int inew) {} + virtual void DeclareUnsafeDistance(int iold, int inew) {} + virtual void DeclareCandidate(int iold, int inew) {} + virtual void DeclareMatch(int iold, + int inew, + double dist, + bool is_identical) {} + virtual void DeclareOutlier(int iold, int inew) {} + + virtual void OutputCompare(const Element& old_element, + const Element& new_element, + double dist) {} + + virtual void OutputMatch(const Element& best_old_element, + const Element& new_element, + bool is_identical, + double best_dist) {} + + virtual void OutputScores(const std::string& stats) {} + + virtual void OutputTextGrid() {} +}; + +/******** MatchingInfoTerse ********/ + +// A terse MatchingInfoOut that prints only basic information, using LOG(). +class MatchingInfoOutTerse : public MatchingInfoOut { + public: + MatchingInfoOutTerse() = default; + MatchingInfoOutTerse(const MatchingInfoOutTerse&) = delete; + const MatchingInfoOutTerse& operator=(const MatchingInfoOutTerse&) = delete; + ~MatchingInfoOutTerse() override = default; + + void OutputScores(const std::string& stats) override { + LOG(INFO) << "Best dists: " << stats; + } +}; + +/******** MatchingInfoOutVerbose ********/ + +// A verbose MatchingInfoOut that prints detailed information using |out_|, +// including comparison pairs, scores, and a text grid representation of +// pairwise matching results. +class MatchingInfoOutVerbose : public MatchingInfoOut { + public: + explicit MatchingInfoOutVerbose(std::ostream& out) : out_(out) {} + MatchingInfoOutVerbose(const MatchingInfoOutVerbose&) = delete; + const MatchingInfoOutVerbose& operator=(const MatchingInfoOutVerbose&) = + delete; + ~MatchingInfoOutVerbose() override = default; + + // Outputs sizes and initializes |text_grid_|. + void InitSizes(size_t old_size, size_t new_size) override { + out_ << "Comparing old (" << old_size << " elements) and new (" << new_size + << " elements)" << std::endl; + text_grid_.assign(new_size, std::string(old_size, '-')); + best_dist_.assign(new_size, -1.0); + } + + // Functions to update match status in text grid representation. + + void DeclareTypeMismatch(int iold, int inew) override { + text_grid_[inew][iold] = 'T'; + } + void DeclareUnsafeDistance(int iold, int inew) override { + text_grid_[inew][iold] = 'U'; + } + void DeclareCandidate(int iold, int inew) override { + text_grid_[inew][iold] = 'C'; // Provisional. + } + void DeclareMatch(int iold, + int inew, + double dist, + bool is_identical) override { + text_grid_[inew][iold] = is_identical ? 'I' : 'M'; + best_dist_[inew] = dist; + } + void DeclareOutlier(int iold, int inew) override { + text_grid_[inew][iold] = 'O'; + } + + // Functions to print detailed information. + + void OutputCompare(const Element& old_element, + const Element& new_element, + double dist) override { + out_ << "Compare old" << old_element << " to new" << new_element << " --> " + << base::StringPrintf("%.5f", dist) << std::endl; + } + + void OutputMatch(const Element& best_old_element, + const Element& new_element, + bool is_identical, + double best_dist) override { + if (is_identical) { + out_ << "Skipped old" << best_old_element << " - identical to new" + << new_element; + } else { + out_ << "Matched old" << best_old_element << " to new" << new_element + << " --> " << base::StringPrintf("%.5f", best_dist); + } + out_ << std::endl; + } + + void OutputScores(const std::string& stats) override { + out_ << "Best dists: " << stats << std::endl; + } + + void OutputTextGrid() override { + int new_size = static_cast<int>(text_grid_.size()); + for (int inew = 0; inew < new_size; ++inew) { + const std::string& line = text_grid_[inew]; + out_ << " "; + for (char ch : line) { + char prefix = (ch == 'I' || ch == 'M') ? '(' : ' '; + char suffix = (ch == 'I' || ch == 'M') ? ')' : ' '; + out_ << prefix << ch << suffix; + } + if (best_dist_[inew] >= 0) + out_ << " " << base::StringPrintf("%.5f", best_dist_[inew]); + out_ << std::endl; + } + if (!text_grid_.empty()) { + out_ << " Legend: I = identical, M = matched, T = type mismatch, " + "U = unsafe distance, C = candidate, O = outlier, - = skipped." + << std::endl; + } + } + + private: + std::ostream& out_; + + // Text grid representation of matches. Rows correspond to "old" and columns + // correspond to "new". + std::vector<std::string> text_grid_; + + // For each "new" element, distance of best match. -1 denotes no match. + std::vector<double> best_dist_; +}; + +} // namespace + +/******** HeuristicEnsembleMatcher ********/ + +HeuristicEnsembleMatcher::HeuristicEnsembleMatcher(std::ostream* out) + : out_(out) {} + +HeuristicEnsembleMatcher::~HeuristicEnsembleMatcher() = default; + +bool HeuristicEnsembleMatcher::RunMatch(ConstBufferView old_image, + ConstBufferView new_image) { + DCHECK(matches_.empty()); + LOG(INFO) << "Start matching."; + + // Find all elements in "old" and "new". + absl::optional<std::vector<Element>> old_elements = + FindEmbeddedElements(old_image, "Old file", + base::BindRepeating(DetectElementFromDisassembler)); + if (!old_elements.has_value()) + return false; + absl::optional<std::vector<Element>> new_elements = + FindEmbeddedElements(new_image, "New file", + base::BindRepeating(DetectElementFromDisassembler)); + if (!new_elements.has_value()) + return false; + + std::unique_ptr<MatchingInfoOut> info_out; + if (out_) + info_out = std::make_unique<MatchingInfoOutVerbose>(*out_); + else + info_out = std::make_unique<MatchingInfoOutTerse>(); + + const int num_new_elements = base::checked_cast<int>(new_elements->size()); + const int num_old_elements = base::checked_cast<int>(old_elements->size()); + info_out->InitSizes(num_old_elements, num_new_elements); + + // For each "new" element, match it with the "old" element that's nearest to + // it, with distance determined by BinaryDataHistogram. The resulting + // "old"-"new" pairs are stored into |results|. Possibilities: + // - Type mismatch: No match. + // - UnsafeDifference() heuristics fail: No match. + // - Identical match: Skip "new" since this is a trivial case. + // - Non-identical match: Match "new" with "old" with min distance. + // - No match: Skip "new". + struct Results { + int iold; + int inew; + double dist; + }; + std::vector<Results> results; + + // Precompute histograms for "old" since they get reused. + std::vector<BinaryDataHistogram> old_his(num_old_elements); + for (int iold = 0; iold < num_old_elements; ++iold) { + ConstBufferView sub_image(old_image[(*old_elements)[iold]]); + old_his[iold].Compute(sub_image); + // ProgramDetector should have imposed minimal size limit to |sub_image|. + // Therefore resulting histogram are expected to be valid. + CHECK(old_his[iold].IsValid()); + } + + const int kUninitIold = num_old_elements; + for (int inew = 0; inew < num_new_elements; ++inew) { + const Element& cur_new_element = (*new_elements)[inew]; + ConstBufferView cur_new_sub_image(new_image[cur_new_element.region()]); + BinaryDataHistogram new_his; + new_his.Compute(cur_new_sub_image); + CHECK(new_his.IsValid()); + + double best_dist = HUGE_VAL; + int best_iold = kUninitIold; + bool is_identical = false; + + for (int iold = 0; iold < num_old_elements; ++iold) { + const Element& cur_old_element = (*old_elements)[iold]; + if (cur_old_element.exe_type != cur_new_element.exe_type) { + info_out->DeclareTypeMismatch(iold, inew); + continue; + } + if (UnsafeDifference(cur_old_element, cur_new_element)) { + info_out->DeclareUnsafeDistance(iold, inew); + continue; + } + double dist = old_his[iold].Distance(new_his); + info_out->DeclareCandidate(iold, inew); + info_out->OutputCompare(cur_old_element, cur_new_element, dist); + if (best_dist > dist) { // Tie resolution: First-one, first-serve. + best_iold = iold; + best_dist = dist; + if (best_dist == 0) { + ConstBufferView sub_image(old_image[cur_old_element.region()]); + if (sub_image.equals(cur_new_sub_image)) { + is_identical = true; + break; + } + } + } + } + + if (best_iold != kUninitIold) { + const Element& best_old_element = (*old_elements)[best_iold]; + info_out->DeclareMatch(best_iold, inew, best_dist, is_identical); + if (is_identical) // Skip "new" if identical match is found. + ++num_identical_; + else + results.push_back({best_iold, inew, best_dist}); + info_out->OutputMatch(best_old_element, cur_new_element, is_identical, + best_dist); + } + } + + // Populate |matches_| from |result|. To reduce that chance of false-positive + // matches, statistics on dists are computed. If a match's |dist| is an + // outlier then it is rejected. + if (results.size() > 0) { + OutlierDetector detector; + for (const auto& result : results) { + if (result.dist > 0) + detector.Add(result.dist); + } + detector.Prepare(); + info_out->OutputScores(detector.RenderStats()); + for (const Results& result : results) { + if (detector.DecideOutlier(result.dist) > 0) { + info_out->DeclareOutlier(result.iold, result.inew); + } else { + matches_.push_back( + {(*old_elements)[result.iold], (*new_elements)[result.inew]}); + } + } + info_out->OutputTextGrid(); + } + + Trim(); + return true; +} + +} // namespace zucchini diff --git a/heuristic_ensemble_matcher.h b/heuristic_ensemble_matcher.h new file mode 100644 index 0000000..ec40787 --- /dev/null +++ b/heuristic_ensemble_matcher.h @@ -0,0 +1,39 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_HEURISTIC_ENSEMBLE_MATCHER_H_ +#define COMPONENTS_ZUCCHINI_HEURISTIC_ENSEMBLE_MATCHER_H_ + +#include <ostream> + +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/ensemble_matcher.h" + +namespace zucchini { + +// An ensemble matcher that: +// - Detects embedded elements in "old" and "new" archive files. +// - Applies heuristics to create matched pairs. +// It is desired to have matched pairs that: +// - Have "reasonable" size difference (see UnsafeDifference() in the .cc file). +// - Have "minimal distance" among other potential matched pairs. +class HeuristicEnsembleMatcher : public EnsembleMatcher { + public: + explicit HeuristicEnsembleMatcher(std::ostream* out); + HeuristicEnsembleMatcher(const HeuristicEnsembleMatcher&) = delete; + const HeuristicEnsembleMatcher& operator=(const HeuristicEnsembleMatcher&) = + delete; + ~HeuristicEnsembleMatcher() override; + + // EnsembleMatcher: + bool RunMatch(ConstBufferView old_image, ConstBufferView new_image) override; + + private: + // Optional stream to print detailed information during matching. + std::ostream* out_ = nullptr; +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_HEURISTIC_ENSEMBLE_MATCHER_H_ diff --git a/image_index.cc b/image_index.cc new file mode 100644 index 0000000..1efe5d8 --- /dev/null +++ b/image_index.cc @@ -0,0 +1,78 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/image_index.h" + +#include <algorithm> +#include <utility> + +#include "components/zucchini/algorithm.h" +#include "components/zucchini/disassembler.h" + +namespace zucchini { + +ImageIndex::ImageIndex(ConstBufferView image) + : image_(image), type_tags_(image.size(), kNoTypeTag) {} + +ImageIndex::ImageIndex(ImageIndex&&) = default; + +ImageIndex::~ImageIndex() = default; + +bool ImageIndex::Initialize(Disassembler* disasm) { + std::vector<ReferenceGroup> ref_groups = disasm->MakeReferenceGroups(); + for (const auto& group : ref_groups) { + // Build pool-to-type mapping. + DCHECK_NE(kNoPoolTag, group.pool_tag()); + TargetPool& target_pool = target_pools_[group.pool_tag()]; + target_pool.AddType(group.type_tag()); + target_pool.InsertTargets(std::move(*group.GetReader(disasm))); + } + for (const auto& group : ref_groups) { + // Find and store all references for each type, returns false on finding + // any overlap, to signal error. + if (!InsertReferences(group.traits(), + std::move(*group.GetReader(disasm)))) { + return false; + } + } + return true; +} + +bool ImageIndex::IsToken(offset_t location) const { + TypeTag type = LookupType(location); + + // |location| points into raw data. + if (type == kNoTypeTag) + return true; + + // |location| points into a Reference. + Reference reference = refs(type).at(location); + // Only the first byte of a reference is a token. + return location == reference.location; +} + +bool ImageIndex::InsertReferences(const ReferenceTypeTraits& traits, + ReferenceReader&& ref_reader) { + // Store ReferenceSet for current type (of |group|). + DCHECK_NE(kNoTypeTag, traits.type_tag); + auto result = reference_sets_.emplace( + traits.type_tag, ReferenceSet(traits, pool(traits.pool_tag))); + DCHECK(result.second); + + result.first->second.InitReferences(std::move(ref_reader)); + for (auto ref : reference_sets_.at(traits.type_tag)) { + DCHECK(RangeIsBounded(ref.location, traits.width, size())); + auto cur_type_tag = type_tags_.begin() + ref.location; + + // Check for overlap with existing reference. If found, then invalidate. + if (std::any_of(cur_type_tag, cur_type_tag + traits.width, + [](TypeTag type) { return type != kNoTypeTag; })) { + return false; + } + std::fill(cur_type_tag, cur_type_tag + traits.width, traits.type_tag); + } + return true; +} + +} // namespace zucchini diff --git a/image_index.h b/image_index.h new file mode 100644 index 0000000..b5acee1 --- /dev/null +++ b/image_index.h @@ -0,0 +1,116 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_IMAGE_INDEX_H_ +#define COMPONENTS_ZUCCHINI_IMAGE_INDEX_H_ + +#include <stddef.h> +#include <stdint.h> + +#include <map> +#include <vector> + +#include "base/check_op.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/image_utils.h" +#include "components/zucchini/reference_set.h" +#include "components/zucchini/target_pool.h" + +namespace zucchini { + +class Disassembler; + +// A class that holds annotations of an image, allowing quick access to its raw +// and reference content. The memory overhead of storing all references is +// relatively high, so this is only used during patch generation. +class ImageIndex { + public: + explicit ImageIndex(ConstBufferView image); + ImageIndex(const ImageIndex&) = delete; + ImageIndex(ImageIndex&&); + ~ImageIndex(); + + // Inserts all references read from |disasm|. This should be called exactly + // once. If overlap between any two references of any type is encountered, + // returns false and leaves the object in an invalid state. Otherwise, + // returns true. + // TODO(huangs): Refactor ReaderFactory and WriterFactory so + // |const Disassembler&| can be used here. + bool Initialize(Disassembler* disasm); + + // Returns the array size needed to accommodate all reference type values. + size_t TypeCount() const { + if (reference_sets_.empty()) + return 0U; + return reference_sets_.rbegin()->first.value() + 1; // Max key + 1. + } + + // Returns the array size needed to accommodate all pool values. + size_t PoolCount() const { + if (target_pools_.empty()) + return 0U; + return target_pools_.rbegin()->first.value() + 1; // Max key + 1. + } + + // Returns true if |image_[location]| is either: + // - A raw value. + // - The first byte of a reference. + bool IsToken(offset_t location) const; + + // Returns true if |image_[location]| is part of a reference. + bool IsReference(offset_t location) const { + return LookupType(location) != kNoTypeTag; + } + + // Returns the type tag of the reference covering |location|, or kNoTypeTag if + // |location| is not part of a reference. + TypeTag LookupType(offset_t location) const { + DCHECK_LT(location, size()); + return type_tags_[location]; + } + + // Returns the raw value at |location|. + uint8_t GetRawValue(offset_t location) const { + DCHECK_LT(location, size()); + return image_[location]; + } + + const std::map<PoolTag, TargetPool>& target_pools() const { + return target_pools_; + } + const std::map<TypeTag, ReferenceSet>& reference_sets() const { + return reference_sets_; + } + + const TargetPool& pool(PoolTag pool_tag) const { + return target_pools_.at(pool_tag); + } + const ReferenceSet& refs(TypeTag type_tag) const { + return reference_sets_.at(type_tag); + } + + // Returns the size of the image. + size_t size() const { return image_.size(); } + + private: + // Inserts to |*this| index, all references described by |traits| read from + // |ref_reader|, which gets consumed. This should be called exactly once for + // each reference type. If overlap between any two references of any type is + // encountered, returns false and leaves the object in an invalid state. + // Otherwise, returns true. + bool InsertReferences(const ReferenceTypeTraits& traits, + ReferenceReader&& ref_reader); + + const ConstBufferView image_; + + // Used for random access lookup of reference type, for each byte in |image_|. + std::vector<TypeTag> type_tags_; + + std::map<PoolTag, TargetPool> target_pools_; + std::map<TypeTag, ReferenceSet> reference_sets_; +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_IMAGE_INDEX_H_ diff --git a/image_index_unittest.cc b/image_index_unittest.cc new file mode 100644 index 0000000..cf6f8a7 --- /dev/null +++ b/image_index_unittest.cc @@ -0,0 +1,131 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/image_index.h" + +#include <stddef.h> + +#include <numeric> +#include <vector> + +#include "base/test/gtest_util.h" +#include "components/zucchini/image_utils.h" +#include "components/zucchini/test_disassembler.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +class ImageIndexTest : public testing::Test { + protected: + ImageIndexTest() + : buffer_(20), + image_index_(ConstBufferView(buffer_.data(), buffer_.size())) { + std::iota(buffer_.begin(), buffer_.end(), 0); + } + + void InitializeWithDefaultTestData() { + TestDisassembler disasm({2, TypeTag(0), PoolTag(0)}, + {{1, 0}, {8, 1}, {10, 2}}, + {4, TypeTag(1), PoolTag(0)}, {{3, 3}}, + {3, TypeTag(2), PoolTag(1)}, {{12, 4}, {17, 5}}); + EXPECT_TRUE(image_index_.Initialize(&disasm)); + } + + std::vector<uint8_t> buffer_; + ImageIndex image_index_; +}; + +TEST_F(ImageIndexTest, TypeAndPool) { + TestDisassembler disasm({2, TypeTag(0), PoolTag(0)}, {}, + {4, TypeTag(1), PoolTag(0)}, {}, + {3, TypeTag(2), PoolTag(1)}, {}); + EXPECT_TRUE(image_index_.Initialize(&disasm)); + + EXPECT_EQ(3U, image_index_.TypeCount()); + EXPECT_EQ(2U, image_index_.PoolCount()); + + EXPECT_EQ(TypeTag(0), image_index_.refs(TypeTag(0)).type_tag()); + EXPECT_EQ(TypeTag(1), image_index_.refs(TypeTag(1)).type_tag()); + EXPECT_EQ(TypeTag(2), image_index_.refs(TypeTag(2)).type_tag()); + + EXPECT_EQ(PoolTag(0), image_index_.refs(TypeTag(0)).pool_tag()); + EXPECT_EQ(PoolTag(0), image_index_.refs(TypeTag(1)).pool_tag()); + EXPECT_EQ(PoolTag(1), image_index_.refs(TypeTag(2)).pool_tag()); +} + +TEST_F(ImageIndexTest, InvalidInitialize1) { + // Overlap within the same group. + TestDisassembler disasm({2, TypeTag(0), PoolTag(0)}, {{1, 0}, {2, 0}}, + {4, TypeTag(1), PoolTag(0)}, {}, + {3, TypeTag(2), PoolTag(1)}, {}); + EXPECT_FALSE(image_index_.Initialize(&disasm)); +} + +TEST_F(ImageIndexTest, InvalidInitialize2) { + // Overlap across different readers. + TestDisassembler disasm({2, TypeTag(0), PoolTag(0)}, + {{1, 0}, {8, 1}, {10, 2}}, + {4, TypeTag(1), PoolTag(0)}, {{3, 3}}, + {3, TypeTag(2), PoolTag(1)}, {{11, 0}}); + EXPECT_FALSE(image_index_.Initialize(&disasm)); +} + +TEST_F(ImageIndexTest, LookupType) { + InitializeWithDefaultTestData(); + + std::vector<int> expected = { + -1, // raw + 0, 0, // ref 0 + 1, 1, 1, 1, // ref 1 + -1, // raw + 0, 0, // ref 0 + 0, 0, // ref 0 + 2, 2, 2, // ref 2 + -1, -1, // raw + 2, 2, 2, // ref 2 + }; + + for (offset_t i = 0; i < image_index_.size(); ++i) + EXPECT_EQ(TypeTag(expected[i]), image_index_.LookupType(i)); +} + +TEST_F(ImageIndexTest, IsToken) { + InitializeWithDefaultTestData(); + + std::vector<bool> expected = { + 1, // raw + 1, 0, // ref 0 + 1, 0, 0, 0, // ref 1 + 1, // raw + 1, 0, // ref 0 + 1, 0, // ref 0 + 1, 0, 0, // ref 2 + 1, 1, // raw + 1, 0, 0, // ref 2 + }; + + for (offset_t i = 0; i < image_index_.size(); ++i) + EXPECT_EQ(expected[i], image_index_.IsToken(i)); +} + +TEST_F(ImageIndexTest, IsReference) { + InitializeWithDefaultTestData(); + + std::vector<bool> expected = { + 0, // raw + 1, 1, // ref 0 + 1, 1, 1, 1, // ref 1 + 0, // raw + 1, 1, // ref 0 + 1, 1, // ref 0 + 1, 1, 1, // ref 2 + 0, 0, // raw + 1, 1, 1, // ref 2 + }; + + for (offset_t i = 0; i < image_index_.size(); ++i) + EXPECT_EQ(expected[i], image_index_.IsReference(i)); +} + +} // namespace zucchini diff --git a/image_utils.h b/image_utils.h new file mode 100644 index 0000000..748e20b --- /dev/null +++ b/image_utils.h @@ -0,0 +1,225 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_IMAGE_UTILS_H_ +#define COMPONENTS_ZUCCHINI_IMAGE_UTILS_H_ + +#include <stddef.h> +#include <stdint.h> + +#include <string> + +#include "base/format_macros.h" +#include "base/numerics/safe_conversions.h" +#include "base/strings/stringprintf.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/typed_value.h" +#include "third_party/abseil-cpp/absl/types/optional.h" + +namespace zucchini { + +// offset_t is used to describe an offset in an image. +// Files bigger than 4GB are not supported. +using offset_t = uint32_t; +// Divide by 2 since label marking uses the most significant bit. +constexpr offset_t kOffsetBound = static_cast<offset_t>(-1) / 2; +// Use 0xFFFFFFF*E*, since 0xFFFFFFF*F* is a sentinel value for Dex references. +constexpr offset_t kInvalidOffset = static_cast<offset_t>(-2); + +// key_t is used to identify an offset in a table. +using key_t = uint32_t; + +enum Bitness : uint8_t { + // The numerical values are intended to simplify WidthOf() below. + kBit32 = 4, + kBit64 = 8 +}; + +inline uint32_t WidthOf(Bitness bitness) { + return static_cast<uint32_t>(bitness); +} + +// Used to uniquely identify a reference type. +// Strongly typed objects are used to avoid ambiguitees with PoolTag. +struct TypeTag : public TypedValue<TypeTag, uint8_t> { + // inheriting constructor: + using TypedValue<TypeTag, uint8_t>::TypedValue; +}; + +// Used to uniquely identify a pool. +struct PoolTag : public TypedValue<PoolTag, uint8_t> { + // inheriting constructor: + using TypedValue<PoolTag, uint8_t>::TypedValue; +}; + +constexpr TypeTag kNoTypeTag(0xFF); // Typically used to identify raw data. +constexpr PoolTag kNoPoolTag(0xFF); + +// Specification of references in an image file. +struct ReferenceTypeTraits { + constexpr ReferenceTypeTraits(offset_t width_in, + TypeTag type_tag_in, + PoolTag pool_tag_in) + : width(width_in), type_tag(type_tag_in), pool_tag(pool_tag_in) {} + + // |width| specifies number of bytes covered by the reference's binary + // encoding. + const offset_t width; + // |type_tag| identifies the reference type being described. + const TypeTag type_tag; + // |pool_tag| identifies the pool this type belongs to. + const PoolTag pool_tag; +}; + +// There is no need to store |type| because references of the same type are +// always aggregated into the same container, and so during iteration we'd have +// |type| already. +struct Reference { + offset_t location; + offset_t target; +}; + +inline bool operator==(const Reference& a, const Reference& b) { + return a.location == b.location && a.target == b.target; +} + +// Interface for extracting References through member function GetNext(). +// This is used by Disassemblers to extract references from an image file. +// Typically, a Reader lazily extracts values and does not hold any storage. +class ReferenceReader { + public: + virtual ~ReferenceReader() = default; + + // Returns the next available Reference, or nullopt_t if exhausted. + // Extracted References must be ordered by their location in the image. + virtual absl::optional<Reference> GetNext() = 0; +}; + +// Interface for writing References through member function +// PutNext(reference). This is used by Disassemblers to write new References +// in the image file. +class ReferenceWriter { + public: + virtual ~ReferenceWriter() = default; + + // Writes |reference| in the underlying image file. This operation always + // succeeds. + virtual void PutNext(Reference reference) = 0; +}; + +// An Equivalence is a block of length |length| that approximately match in +// |old_image| at an offset of |src_offset| and in |new_image| at an offset of +// |dst_offset|. +struct Equivalence { + offset_t src_offset; + offset_t dst_offset; + offset_t length; + + offset_t src_end() const { return src_offset + length; } + offset_t dst_end() const { return dst_offset + length; } +}; + +inline bool operator==(const Equivalence& a, const Equivalence& b) { + return a.src_offset == b.src_offset && a.dst_offset == b.dst_offset && + a.length == b.length; +} + +// Same as Equivalence, but with a similarity score. This is only used when +// generating the patch. +struct EquivalenceCandidate { + Equivalence eq; + double similarity; +}; + +template <size_t N> +inline constexpr uint32_t ExeTypeToUint32(const char (&exe_type)[N]) { + static_assert(N == 5, "Expected ExeType of length 4 + 1 null byte."); + return (exe_type[3] << 24) | (exe_type[2] << 16) | (exe_type[1] << 8) | + exe_type[0]; +} + +// Enumerations for supported executables. Values in this enum must be distinct. +// Once present, values should never be altered or removed to ensure backwards +// compatibility and patch type collision avoidance. +enum ExecutableType : uint32_t { + kExeTypeUnknown = UINT32_MAX, + kExeTypeNoOp = ExeTypeToUint32("NoOp"), + kExeTypeWin32X86 = ExeTypeToUint32("Px86"), + kExeTypeWin32X64 = ExeTypeToUint32("Px64"), + kExeTypeElfX86 = ExeTypeToUint32("Ex86"), + kExeTypeElfX64 = ExeTypeToUint32("Ex64"), + kExeTypeElfAArch32 = ExeTypeToUint32("EA32"), + kExeTypeElfAArch64 = ExeTypeToUint32("EA64"), + kExeTypeDex = ExeTypeToUint32("DEX "), + kExeTypeZtf = ExeTypeToUint32("ZTF "), +}; + +constexpr ExecutableType CastToExecutableType(uint32_t possible_exe_type) { + switch (static_cast<ExecutableType>(possible_exe_type)) { + case kExeTypeNoOp: // Falls through. + case kExeTypeWin32X86: // Falls through. + case kExeTypeWin32X64: // Falls through. + case kExeTypeElfX86: // Falls through. + case kExeTypeElfX64: // Falls through. + case kExeTypeElfAArch32: // Falls through. + case kExeTypeElfAArch64: // Falls through. + case kExeTypeDex: // Falls through. + case kExeTypeZtf: // Falls through. + case kExeTypeUnknown: + return static_cast<ExecutableType>(possible_exe_type); + default: + return kExeTypeUnknown; + } +} + +inline std::string CastExecutableTypeToString(ExecutableType exe_type) { + uint32_t v = static_cast<uint32_t>(exe_type); + char result[] = {static_cast<char>(v), static_cast<char>(v >> 8), + static_cast<char>(v >> 16), static_cast<char>(v >> 24), 0}; + return result; +} + +// A region in an image with associated executable type |exe_type|. If +// |exe_type == kExeTypeNoOp|, then the Element represents a region of raw data. +struct Element : public BufferRegion { + Element() = default; + constexpr Element(const BufferRegion& region_in, ExecutableType exe_type_in) + : BufferRegion(region_in), exe_type(exe_type_in) {} + constexpr explicit Element(const BufferRegion& region_in) + : BufferRegion(region_in), exe_type(kExeTypeNoOp) {} + + // Similar to lo() and hi(), but returns values in offset_t. + offset_t BeginOffset() const { return base::checked_cast<offset_t>(lo()); } + offset_t EndOffset() const { return base::checked_cast<offset_t>(hi()); } + + BufferRegion region() const { return {offset, size}; } + + friend bool operator==(const Element& a, const Element& b) { + return a.exe_type == b.exe_type && a.offset == b.offset && a.size == b.size; + } + + ExecutableType exe_type; +}; + +// A matched pair of Elements. +struct ElementMatch { + bool IsValid() const { return old_element.exe_type == new_element.exe_type; } + ExecutableType exe_type() const { return old_element.exe_type; } + + // Represents match as "#+#=#+#", where "#" denotes the integers: + // [offset in "old", size in "old", offset in "new", size in "new"]. + // Note that element type is omitted. + std::string ToString() const { + return base::StringPrintf("%" PRIuS "+%" PRIuS "=%" PRIuS "+%" PRIuS "", + old_element.offset, old_element.size, + new_element.offset, new_element.size); + } + + Element old_element; + Element new_element; +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_IMAGE_UTILS_H_ diff --git a/image_utils_unittest.cc b/image_utils_unittest.cc new file mode 100644 index 0000000..2cf6455 --- /dev/null +++ b/image_utils_unittest.cc @@ -0,0 +1,33 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/image_utils.h" + +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +TEST(ImageUtilsTest, Bitness) { + EXPECT_EQ(4U, WidthOf(kBit32)); + EXPECT_EQ(8U, WidthOf(kBit64)); +} + +TEST(ImageUtilsTest, CastExecutableTypeToString) { + EXPECT_EQ("NoOp", CastExecutableTypeToString(kExeTypeNoOp)); + EXPECT_EQ("Px86", CastExecutableTypeToString(kExeTypeWin32X86)); + EXPECT_EQ("EA64", CastExecutableTypeToString(kExeTypeElfAArch64)); + EXPECT_EQ("DEX ", CastExecutableTypeToString(kExeTypeDex)); +} + +TEST(ImageUtilsTest, ElementMatchToString) { + constexpr ExecutableType kAnyType = kExeTypeWin32X86; + EXPECT_EQ("1+2=3+4", + (ElementMatch{{{1, 2}, kAnyType}, {{3, 4}, kAnyType}}).ToString()); + EXPECT_EQ( + "1000000000+1=0+1000000000", + (ElementMatch{{{1000000000, 1}, kAnyType}, {{0, 1000000000}, kAnyType}}) + .ToString()); +} + +} // namespace zucchini diff --git a/imposed_ensemble_matcher.cc b/imposed_ensemble_matcher.cc new file mode 100644 index 0000000..1c1301b --- /dev/null +++ b/imposed_ensemble_matcher.cc @@ -0,0 +1,143 @@ +// Copyright 2018 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/imposed_ensemble_matcher.h" + +#include <algorithm> +#include <sstream> +#include <utility> + +#include "base/bind.h" +#include "base/logging.h" +#include "components/zucchini/io_utils.h" + +namespace zucchini { + +/******** ImposedMatchParser ********/ + +ImposedMatchParser::ImposedMatchParser() = default; + +ImposedMatchParser::~ImposedMatchParser() = default; + +ImposedMatchParser::Status ImposedMatchParser::Parse( + std::string imposed_matches, + ConstBufferView old_image, + ConstBufferView new_image, + ElementDetector&& detector) { + CHECK(matches_.empty()); + CHECK(bad_matches_.empty()); + + // Parse |imposed_matches| and check bounds. + std::istringstream iss(std::move(imposed_matches)); + bool first = true; + iss.peek(); // Makes empty |iss| realize EOF is reached. + while (iss && !iss.eof()) { + // Eat delimiter. + if (first) { + first = false; + } else if (!(iss >> EatChar(','))) { + return kInvalidDelimiter; + } + // Extract parameters for one imposed match. + offset_t old_offset = 0U; + size_t old_size = 0U; + offset_t new_offset = 0U; + size_t new_size = 0U; + if (!(iss >> StrictUInt<offset_t>(old_offset) >> EatChar('+') >> + StrictUInt<size_t>(old_size) >> EatChar('=') >> + StrictUInt<offset_t>(new_offset) >> EatChar('+') >> + StrictUInt<size_t>(new_size))) { + return kParseError; + } + // Check bounds. + if (old_size == 0 || new_size == 0 || + !old_image.covers({old_offset, old_size}) || + !new_image.covers({new_offset, new_size})) { + return kOutOfBound; + } + matches_.push_back( + {{{old_offset, old_size}, kExeTypeUnknown}, // Assign type later. + {{new_offset, new_size}, kExeTypeUnknown}}); // Assign type later. + } + // Sort matches by "new" file offsets. This helps with overlap checks. + std::sort(matches_.begin(), matches_.end(), + [](const ElementMatch& match_a, const ElementMatch& match_b) { + return match_a.new_element.offset < match_b.new_element.offset; + }); + + // Check for overlaps in "new" file. + if (std::adjacent_find( + matches_.begin(), matches_.end(), + [](const ElementMatch& match1, const ElementMatch& match2) { + return match1.new_element.hi() > match2.new_element.lo(); + }) != matches_.end()) { + return kOverlapInNew; + } + + // Compute types and verify consistency. Remove identical matches and matches + // where any sub-image has an unknown type. + size_t write_idx = 0; + for (size_t read_idx = 0; read_idx < matches_.size(); ++read_idx) { + ConstBufferView old_sub_image( + old_image[matches_[read_idx].old_element.region()]); + ConstBufferView new_sub_image( + new_image[matches_[read_idx].new_element.region()]); + // Remove identical match. + if (old_sub_image.equals(new_sub_image)) { + ++num_identical_; + continue; + } + // Check executable types of sub-images. + absl::optional<Element> old_element = detector.Run(old_sub_image); + absl::optional<Element> new_element = detector.Run(new_sub_image); + if (!old_element || !new_element) { + // Skip unknown types, including those mixed with known types. + bad_matches_.push_back(matches_[read_idx]); + continue; + } else if (old_element->exe_type != new_element->exe_type) { + // Error if types are known, but inconsistent. + return kTypeMismatch; + } + + // Keep match and remove gaps. + matches_[read_idx].old_element.exe_type = old_element->exe_type; + matches_[read_idx].new_element.exe_type = new_element->exe_type; + if (write_idx < read_idx) + matches_[write_idx] = matches_[read_idx]; + ++write_idx; + } + matches_.resize(write_idx); + return kSuccess; +} + +/******** ImposedEnsembleMatcher ********/ + +ImposedEnsembleMatcher::ImposedEnsembleMatcher( + const std::string& imposed_matches) + : imposed_matches_(imposed_matches) {} + +ImposedEnsembleMatcher::~ImposedEnsembleMatcher() = default; + +bool ImposedEnsembleMatcher::RunMatch(ConstBufferView old_image, + ConstBufferView new_image) { + DCHECK(matches_.empty()); + LOG(INFO) << "Start matching."; + ImposedMatchParser parser; + ImposedMatchParser::Status status = + parser.Parse(std::move(imposed_matches_), old_image, new_image, + base::BindRepeating(DetectElementFromDisassembler)); + // Print all warnings first. + for (const ElementMatch& bad_match : *parser.mutable_bad_matches()) + LOG(WARNING) << "Skipped match with unknown type: " << bad_match.ToString(); + if (status != ImposedMatchParser::kSuccess) { + LOG(ERROR) << "Imposed match failed with error code " << status << "."; + return false; + } + num_identical_ = parser.num_identical(); + matches_ = std::move(*parser.mutable_matches()); + Trim(); + return true; +} + +} // namespace zucchini diff --git a/imposed_ensemble_matcher.h b/imposed_ensemble_matcher.h new file mode 100644 index 0000000..39b0df5 --- /dev/null +++ b/imposed_ensemble_matcher.h @@ -0,0 +1,83 @@ +// Copyright 2018 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_IMPOSED_ENSEMBLE_MATCHER_H_ +#define COMPONENTS_ZUCCHINI_IMPOSED_ENSEMBLE_MATCHER_H_ + +#include <stddef.h> + +#include <string> +#include <vector> + +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/element_detection.h" +#include "components/zucchini/ensemble_matcher.h" + +namespace zucchini { + +// A class to parse imposed match format, which is either an empty string (no +// imposed patch), or a string formatted as: +// "#+#=#+#,#+#=#+#,..." (e.g., "1+2=3+4", "1+2=3+4,5+6=7+8"), +// where "#+#=#+#" encodes a match as 4 unsigned integers: +// [offset in "old", size in "old", offset in "new", size in "new"]. +class ImposedMatchParser { + public: + enum Status { + kSuccess, + kInvalidDelimiter, + kParseError, + kOutOfBound, + kOverlapInNew, + kTypeMismatch, + }; + + ImposedMatchParser(); + ImposedMatchParser(const ImposedMatchParser&) = delete; + const ImposedMatchParser& operator=(const ImposedMatchParser&) = delete; + ~ImposedMatchParser(); + + // Parses |imposed_matches| and writes the results to member variables. + // |old_image| and |new_image| are used for validation. Returns a Status value + // to signal success or various error modes. |detector| is used to validate + // Element types for matched pairs. This should only be called once for each + // instance. + Status Parse(std::string imposed_matches, + ConstBufferView old_image, + ConstBufferView new_image, + ElementDetector&& detector); + + size_t num_identical() const { return num_identical_; } + std::vector<ElementMatch>* mutable_matches() { return &matches_; } + std::vector<ElementMatch>* mutable_bad_matches() { return &bad_matches_; } + + private: + size_t num_identical_ = 0; + std::vector<ElementMatch> matches_; + // Stores "forgiven" bad matches, so the caller can impose matches for + // unsupported image types (which will simply be ignored). Note that imposing + // matches for known but incompatible image types would result in error. + std::vector<ElementMatch> bad_matches_; +}; + +// An ensemble matcher that parses a format string that describes matches. +class ImposedEnsembleMatcher : public EnsembleMatcher { + public: + // |imposed_matches| specifies imposed maches, using a format described below. + // Validation is performed in RunMatch(). + explicit ImposedEnsembleMatcher(const std::string& imposed_matches); + ImposedEnsembleMatcher(const ImposedEnsembleMatcher&) = delete; + const ImposedEnsembleMatcher& operator=(const ImposedEnsembleMatcher&) = + delete; + ~ImposedEnsembleMatcher() override; + + // EnsembleMatcher: + bool RunMatch(ConstBufferView old_image, ConstBufferView new_image) override; + + private: + const std::string imposed_matches_; +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_IMPOSED_ENSEMBLE_MATCHER_H_ diff --git a/imposed_ensemble_matcher_unittest.cc b/imposed_ensemble_matcher_unittest.cc new file mode 100644 index 0000000..9a6dc7d --- /dev/null +++ b/imposed_ensemble_matcher_unittest.cc @@ -0,0 +1,214 @@ +// Copyright 2018 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <stddef.h> +#include <stdint.h> + +#include <string> +#include <utility> +#include <vector> + +#include "components/zucchini/imposed_ensemble_matcher.h" + +#include "base/bind.h" +#include "base/callback_helpers.h" +#include "base/check_op.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/disassembler.h" +#include "components/zucchini/element_detection.h" +#include "components/zucchini/image_utils.h" +#include "testing/gtest/include/gtest/gtest.h" +#include "third_party/abseil-cpp/absl/types/optional.h" + +namespace zucchini { + +namespace { + +// This test uses a mock archive format where regions are determined by their +// consecutive byte values rather than parsing real executables. In fact, since +// elements are imposed, only the first byte of the element is used to specify +// executable type of the mock data: +// - 'W' and 'w' specify kExeTypeWin32X86. +// - 'E' and 'e' specify kExeTypeElfX86. +// - Everything else specify kExeTypeUnknown. +class TestElementDetector { + public: + TestElementDetector() {} + + absl::optional<Element> Run(ConstBufferView image) const { + DCHECK_GT(image.size(), 0U); + char first_char = *image.begin(); + if (first_char == 'W' || first_char == 'w') + return Element(image.local_region(), kExeTypeWin32X86); + if (first_char == 'E' || first_char == 'e') + return Element(image.local_region(), kExeTypeElfX86); + return absl::nullopt; + } +}; + +} // namespace + +TEST(ImposedMatchParserTest, ImposedMatchParser) { + std::vector<uint8_t> old_data; + std::vector<uint8_t> new_data; + auto populate = [](const std::string& s, std::vector<uint8_t>* data) { + for (char ch : s) + data->push_back(static_cast<uint8_t>(ch)); + }; + // Pos: 11111111 + // 012345678901234567 + populate("1WW222EEEE", &old_data); + populate("33eee2222222wwww44", &new_data); + + ConstBufferView old_image(&old_data[0], old_data.size()); + ConstBufferView new_image(&new_data[0], new_data.size()); + + TestElementDetector detector; + + // Reusable output values. + std::string prev_imposed_matches; + ImposedMatchParser::Status status; + size_t num_identical; + std::vector<ElementMatch> matches; + std::vector<ElementMatch> bad_matches; + + auto run_test = [&](const std::string& imposed_matches) -> bool { + prev_imposed_matches = imposed_matches; + status = ImposedMatchParser::kSuccess; + num_identical = 0; + matches.clear(); + bad_matches.clear(); + ImposedMatchParser parser; + status = parser.Parse(imposed_matches, old_image, new_image, + base::BindRepeating(&TestElementDetector::Run, + base::Unretained(&detector))); + num_identical = parser.num_identical(); + matches = std::move(*parser.mutable_matches()); + bad_matches = std::move(*parser.mutable_bad_matches()); + return status == ImposedMatchParser::kSuccess; + }; + + auto run_check = [&](const ElementMatch& match, ExecutableType exe_type, + offset_t old_offset, size_t old_size, + offset_t new_offset, size_t new_size) { + EXPECT_EQ(exe_type, match.exe_type()) << prev_imposed_matches; + EXPECT_EQ(exe_type, match.old_element.exe_type) << prev_imposed_matches; + EXPECT_EQ(old_offset, match.old_element.offset) << prev_imposed_matches; + EXPECT_EQ(old_size, match.old_element.size) << prev_imposed_matches; + EXPECT_EQ(exe_type, match.new_element.exe_type) << prev_imposed_matches; + EXPECT_EQ(new_offset, match.new_element.offset) << prev_imposed_matches; + EXPECT_EQ(new_size, match.new_element.size) << prev_imposed_matches; + }; + + // Empty string: Vacuous but valid. + EXPECT_TRUE(run_test("")); + EXPECT_EQ(0U, num_identical); + EXPECT_EQ(0U, matches.size()); + EXPECT_EQ(0U, bad_matches.size()); + + // Full matches. Different permutations give same result. + for (const std::string& imposed_matches : + {"1+2=12+4,4+2=5+2,6+4=2+3", "1+2=12+4,6+4=2+3,4+2=5+2", + "4+2=5+2,1+2=12+4,6+4=2+3", "4+2=5+2,6+4=2+3,1+2=12+4", + "6+4=2+3,1+2=12+4,4+2=5+2", "6+4=2+3,1+2=12+4,4+2=5+2"}) { + EXPECT_TRUE(run_test(imposed_matches)); + EXPECT_EQ(1U, num_identical); // "4+2=5+2" + EXPECT_EQ(2U, matches.size()); + // Results are sorted by "new" offsets. + run_check(matches[0], kExeTypeElfX86, 6, 4, 2, 3); + run_check(matches[1], kExeTypeWin32X86, 1, 2, 12, 4); + EXPECT_EQ(0U, bad_matches.size()); + } + + // Single subregion match. + EXPECT_TRUE(run_test("1+2=12+4")); + EXPECT_EQ(0U, num_identical); + EXPECT_EQ(1U, matches.size()); + run_check(matches[0], kExeTypeWin32X86, 1, 2, 12, 4); + EXPECT_EQ(0U, bad_matches.size()); + + // Single subregion match. We're lax with redundant 0. + EXPECT_TRUE(run_test("6+04=02+10")); + EXPECT_EQ(0U, num_identical); + EXPECT_EQ(1U, matches.size()); + run_check(matches[0], kExeTypeElfX86, 6, 4, 2, 10); + EXPECT_EQ(0U, bad_matches.size()); + + // Successive elements, no overlap. + EXPECT_TRUE(run_test("1+1=12+1,2+1=13+1")); + EXPECT_EQ(0U, num_identical); + EXPECT_EQ(2U, matches.size()); + run_check(matches[0], kExeTypeWin32X86, 1, 1, 12, 1); + run_check(matches[1], kExeTypeWin32X86, 2, 1, 13, 1); + EXPECT_EQ(0U, bad_matches.size()); + + // Overlap in "old" file is okay. + EXPECT_TRUE(run_test("1+2=12+2,1+2=14+2")); + EXPECT_EQ(0U, num_identical); + EXPECT_EQ(2U, matches.size()); + run_check(matches[0], kExeTypeWin32X86, 1, 2, 12, 2); + run_check(matches[1], kExeTypeWin32X86, 1, 2, 14, 2); + EXPECT_EQ(0U, bad_matches.size()); + + // Entire files: Have unknown type, so are recognized as such, and ignored. + EXPECT_TRUE(run_test("0+10=0+18")); + EXPECT_EQ(0U, num_identical); + EXPECT_EQ(0U, matches.size()); + EXPECT_EQ(1U, bad_matches.size()); + run_check(bad_matches[0], kExeTypeUnknown, 0, 10, 0, 18); + + // Forgive matches that mix known type with unknown type. + EXPECT_TRUE(run_test("1+2=0+18")); + EXPECT_EQ(0U, num_identical); + EXPECT_EQ(0U, matches.size()); + EXPECT_EQ(1U, bad_matches.size()); + run_check(bad_matches[0], kExeTypeUnknown, 1, 2, 0, 18); + + EXPECT_TRUE(run_test("0+10=12+4")); + EXPECT_EQ(0U, num_identical); + EXPECT_EQ(0U, matches.size()); + EXPECT_EQ(1U, bad_matches.size()); + run_check(bad_matches[0], kExeTypeUnknown, 0, 10, 12, 4); + + // Test invalid delimiter. + for (const std::string& imposed_matches : + {"1+2=12+4,4+2=5+2x", "1+2=12+4 4+2=5+2", "1+2=12+4,4+2=5+2 ", + "1+2=12+4 "}) { + EXPECT_FALSE(run_test(imposed_matches)); + EXPECT_EQ(ImposedMatchParser::kInvalidDelimiter, status); + } + + // Test parse errors, including uint32_t overflow. + for (const std::string& imposed_matches : + {"x1+2=12+4,4+2=5+2,6+4=2+3", "x1+2=12+4,4+2=5+2,6+4=2+3x", ",", " ", + "+2=12+4", "1+2+12+4", "1=2+12+4", " 1+2=12+4", "1+2= 12+4", "1", "1+2", + "1+2=", "1+2=12", "1+2=12+", "4294967296+2=12+4"}) { + EXPECT_FALSE(run_test(imposed_matches)); + EXPECT_EQ(ImposedMatchParser::kParseError, status); + } + + // Test bound errors, include 0-size. + for (const std::string& imposed_matches : + {"1+10=12+4", "1+2=12+7", "0+11=0+18", "0+12=0+17", "10+1=0+18", + "0+10=18+1", "0+0=0+18", "0+10=0+0", "1000000000+1=0+1000000000"}) { + EXPECT_FALSE(run_test(imposed_matches)); + EXPECT_EQ(ImposedMatchParser::kOutOfBound, status); + } + + // Test overlap errors. Matches that get ignored are still tested. + for (const std::string& imposed_matches : + {"1+2=12+4,4+2=5+2,6+4=2+4", "0+10=0+18,1+2=12+4", "6+4=2+10,3+2=5+2"}) { + EXPECT_FALSE(run_test(imposed_matches)); + EXPECT_EQ(ImposedMatchParser::kOverlapInNew, status); + } + + // Test type mismatch errors. + EXPECT_FALSE(run_test("1+2=2+3")); + EXPECT_EQ(ImposedMatchParser::kTypeMismatch, status); + + EXPECT_FALSE(run_test("6+4=12+4")); + EXPECT_EQ(ImposedMatchParser::kTypeMismatch, status); +} + +} // namespace zucchini diff --git a/integration_test.cc b/integration_test.cc new file mode 100644 index 0000000..1baccc3 --- /dev/null +++ b/integration_test.cc @@ -0,0 +1,103 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <stdint.h> + +#include <algorithm> +#include <string> +#include <vector> + +#include "base/files/file_path.h" +#include "base/files/memory_mapped_file.h" +#include "base/path_service.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/patch_reader.h" +#include "components/zucchini/patch_writer.h" +#include "components/zucchini/zucchini.h" +#include "testing/gtest/include/gtest/gtest.h" +#include "third_party/abseil-cpp/absl/types/optional.h" + +namespace zucchini { + +base::FilePath MakeTestPath(const std::string& filename) { + base::FilePath path; + DCHECK(base::PathService::Get(base::DIR_SOURCE_ROOT, &path)); + return path.AppendASCII("components") + .AppendASCII("zucchini") + .AppendASCII("testdata") + .AppendASCII(filename); +} + +void TestGenApply(const std::string& old_filename, + const std::string& new_filename, + bool raw) { + base::FilePath old_path = MakeTestPath(old_filename); + base::FilePath new_path = MakeTestPath(new_filename); + + base::MemoryMappedFile old_file; + ASSERT_TRUE(old_file.Initialize(old_path)); + + base::MemoryMappedFile new_file; + ASSERT_TRUE(new_file.Initialize(new_path)); + + ConstBufferView old_region(old_file.data(), old_file.length()); + ConstBufferView new_region(new_file.data(), new_file.length()); + + EnsemblePatchWriter patch_writer(old_region, new_region); + + // Generate patch from "old" to "new". + ASSERT_EQ(status::kStatusSuccess, + raw ? GenerateBufferRaw(old_region, new_region, &patch_writer) + : GenerateBuffer(old_region, new_region, &patch_writer)); + + size_t patch_size = patch_writer.SerializedSize(); + EXPECT_GE(patch_size, 80U); // Minimum size is empty patch. + // TODO(etiennep): Add check on maximum expected size. + + std::vector<uint8_t> patch_buffer(patch_writer.SerializedSize()); + patch_writer.SerializeInto({patch_buffer.data(), patch_buffer.size()}); + + // Read back generated patch. + absl::optional<EnsemblePatchReader> patch_reader = + EnsemblePatchReader::Create({patch_buffer.data(), patch_buffer.size()}); + ASSERT_TRUE(patch_reader.has_value()); + + // Check basic properties. + EXPECT_TRUE(patch_reader->CheckOldFile(old_region)); + EXPECT_TRUE(patch_reader->CheckNewFile(new_region)); + EXPECT_EQ(old_file.length(), patch_reader->header().old_size); + // If new_size doesn't match expectation, the function is aborted. + ASSERT_EQ(new_file.length(), patch_reader->header().new_size); + + // Apply patch to "old" to get "patched new", ensure it's identical to "new". + std::vector<uint8_t> patched_new_buffer(new_region.size()); + ASSERT_EQ(status::kStatusSuccess, ApplyBuffer(old_region, *patch_reader, + {patched_new_buffer.data(), + patched_new_buffer.size()})); + + // Note that |new_region| and |patched_new_buffer| are the same size. + EXPECT_TRUE(std::equal(new_region.begin(), new_region.end(), + patched_new_buffer.begin())); +} + +TEST(EndToEndTest, GenApplyRaw) { + TestGenApply("setup1.exe", "setup2.exe", true); + TestGenApply("chrome64_1.exe", "chrome64_2.exe", true); +} + +TEST(EndToEndTest, GenApplyIdentity) { + TestGenApply("setup1.exe", "setup1.exe", false); +} + +TEST(EndToEndTest, GenApplySimple) { + TestGenApply("setup1.exe", "setup2.exe", false); + TestGenApply("setup2.exe", "setup1.exe", false); + TestGenApply("chrome64_1.exe", "chrome64_2.exe", false); +} + +TEST(EndToEndTest, GenApplyCross) { + TestGenApply("setup1.exe", "chrome64_1.exe", false); +} + +} // namespace zucchini diff --git a/io_utils.cc b/io_utils.cc new file mode 100644 index 0000000..aa493d0 --- /dev/null +++ b/io_utils.cc @@ -0,0 +1,52 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/io_utils.h" + +#include <iostream> + +namespace zucchini { + +/******** LimitedOutputStream::StreamBuf ********/ + +LimitedOutputStream::StreamBuf::StreamBuf(std::ostream& os, int limit) + : os_(os), limit_(limit) {} + +LimitedOutputStream::StreamBuf::~StreamBuf() { + // Display warning in case we forget to flush data with std::endl. + if (!str().empty()) { + std::cerr << "Warning: LimitedOutputStream has " << str().length() + << " bytes of unflushed output." << std::endl; + } +} + +int LimitedOutputStream::StreamBuf::sync() { + if (full()) { + str(""); + return 0; + } + os_ << str(); + str(""); + if (++counter_ >= limit_) + os_ << "(Additional output suppressed)\n"; + os_.flush(); + return 0; +} + +/******** LimitedOutputStream ********/ + +LimitedOutputStream::LimitedOutputStream(std::ostream& os, int limit) + : std::ostream(&buf_), buf_(os, limit) {} + +/******** PrefixSep ********/ + +std::ostream& operator<<(std::ostream& ostr, PrefixSep& obj) { + if (obj.first_) + obj.first_ = false; + else + ostr << obj.sep_str_; + return ostr; +} + +} // namespace zucchini diff --git a/io_utils.h b/io_utils.h new file mode 100644 index 0000000..63eeec8 --- /dev/null +++ b/io_utils.h @@ -0,0 +1,144 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_IO_UTILS_H_ +#define COMPONENTS_ZUCCHINI_IO_UTILS_H_ + +#include <stdint.h> + +#include <cctype> +#include <istream> +#include <ostream> +#include <sstream> +#include <string> + +namespace zucchini { + +// An std::ostream wrapper that that limits number of std::endl lines to output, +// useful for preventing excessive debug message output. Usage requires some +// work by the caller. Sample: +// static LimitedOutputStream los(std::cerr, 10); +// if (!los.full()) { +// ... // Prepare message. Block may be skipped so don't do other work! +// los << message; +// los << std::endl; // Important! +// } +class LimitedOutputStream : public std::ostream { + private: + class StreamBuf : public std::stringbuf { + public: + StreamBuf(std::ostream& os, int limit); + ~StreamBuf() override; + + int sync() override; + bool full() const { return counter_ >= limit_; } + + private: + std::ostream& os_; + const int limit_; + int counter_ = 0; + }; + + public: + LimitedOutputStream(std::ostream& os, int limit); + LimitedOutputStream(const LimitedOutputStream&) = delete; + const LimitedOutputStream& operator=(const LimitedOutputStream&) = delete; + bool full() const { return buf_.full(); } + + private: + StreamBuf buf_; +}; + +// A class to render hexadecimal numbers for std::ostream with 0-padding. This +// is more concise and flexible than stateful STL manipulator alternatives; so: +// std::ios old_fmt(nullptr); +// old_fmt.copyfmt(std::cout); +// std::cout << std::uppercase << std::hex; +// std::cout << std::setfill('0') << std::setw(8) << int_data << std::endl; +// std::cout.copyfmt(old_fmt); +// can be expressed as: +// std::cout << AxHex<8>(int_data) << std::endl; +template <int N, typename T = uint32_t> +struct AsHex { + explicit AsHex(T value_in) : value(value_in) {} + T value; +}; + +template <int N, typename T> +std::ostream& operator<<(std::ostream& os, const AsHex<N, T>& as_hex) { + char buf[N + 1]; + buf[N] = '\0'; + T value = as_hex.value; + for (int i = N - 1; i >= 0; --i, value >>= 4) + buf[i] = "0123456789ABCDEF"[static_cast<int>(value & 0x0F)]; + if (value) + os << "..."; // To indicate data truncation, or negative values. + os << buf; + return os; +} + +// An output manipulator to simplify printing list separators. Sample usage: +// PrefixSep sep(","); +// for (int i : {3, 1, 4, 1, 5, 9}) +// std::cout << sep << i; +// std::cout << std::endl; // Outputs "3,1,4,1,5,9\n". +class PrefixSep { + public: + explicit PrefixSep(const std::string& sep_str) : sep_str_(sep_str) {} + PrefixSep(const PrefixSep&) = delete; + const PrefixSep& operator=(const PrefixSep&) = delete; + + friend std::ostream& operator<<(std::ostream& ostr, PrefixSep& obj); + + private: + std::string sep_str_; + bool first_ = true; +}; + +// An input manipulator that dictates the expected next character in +// |std::istream|, and invalidates the stream if expectation is not met. +class EatChar { + public: + explicit EatChar(char ch) : ch_(ch) {} + EatChar(const EatChar&) = delete; + const EatChar& operator=(const EatChar&) = delete; + + friend inline std::istream& operator>>(std::istream& istr, + const EatChar& obj) { + if (!istr.fail() && istr.get() != obj.ch_) + istr.setstate(std::ios_base::failbit); + return istr; + } + + private: + char ch_; +}; + +// An input manipulator that reads an unsigned integer from |std::istream|, +// and invalidates the stream on failure. Intolerant of leading white spaces, +template <typename T> +class StrictUInt { + public: + explicit StrictUInt(T& var) : var_(var) {} + StrictUInt(const StrictUInt&) = default; + + friend std::istream& operator>>(std::istream& istr, StrictUInt<T> obj) { + if (!istr.fail() && !::isdigit(istr.peek())) { + istr.setstate(std::ios_base::failbit); + return istr; + } + return istr >> obj.var_; + } + + private: + T& var_; +}; + +// Stub out uint8_t: istream treats it as char, and value won't be read as int! +template <> +struct StrictUInt<uint8_t> {}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_IO_UTILS_H_ diff --git a/io_utils_unittest.cc b/io_utils_unittest.cc new file mode 100644 index 0000000..521e7ce --- /dev/null +++ b/io_utils_unittest.cc @@ -0,0 +1,160 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/io_utils.h" + +#include <stdint.h> + +#include <sstream> +#include <string> + +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +TEST(IOUtilsTest, LimitedOutputStream) { + std::ostringstream oss; + LimitedOutputStream los(oss, 3); + EXPECT_FALSE(los.full()); + EXPECT_EQ("", oss.str()); + // Line 1. + los << "a" << 1 << "b" << 2 << "c" << 3 << std::endl; + EXPECT_FALSE(los.full()); + EXPECT_EQ("a1b2c3\n", oss.str()); + // Line 2. + oss.str(""); + los << "\r\r\n\n" << std::endl; // Manual new lines don't count. + EXPECT_FALSE(los.full()); + EXPECT_EQ("\r\r\n\n\n", oss.str()); + // Line 3. + oss.str(""); + los << "blah" << 137; + EXPECT_FALSE(los.full()); + los << std::endl; + EXPECT_TRUE(los.full()); + EXPECT_EQ("blah137\n(Additional output suppressed)\n", oss.str()); + // Not testing adding more lines: the behavior is undefined since we rely on + // caller suppressing output if |los.full()| is true. +} + +TEST(IOUtilsTest, AsHex) { + std::ostringstream oss; + // Helper for single-line tests. Eats dummy std::ostream& from operator<<(). + auto extract = [&oss](std::ostream&) -> std::string { + std::string ret = oss.str(); + oss.str(""); + return ret; + }; + + EXPECT_EQ("00000000", extract(oss << AsHex<8>(0))); + EXPECT_EQ("12345678", extract(oss << AsHex<8>(0x12345678U))); + EXPECT_EQ("9ABCDEF0", extract(oss << AsHex<8>(0x9ABCDEF0U))); + EXPECT_EQ("(00000064)", extract(oss << "(" << AsHex<8>(100) << ")")); + EXPECT_EQ("00FFFF", extract(oss << AsHex<6>(0xFFFFU))); + EXPECT_EQ("FFFF", extract(oss << AsHex<4>(0xFFFFU))); + EXPECT_EQ("...FF", extract(oss << AsHex<2>(0xFFFFU))); + EXPECT_EQ("...00", extract(oss << AsHex<2>(0x100U))); + EXPECT_EQ("FF\n", extract(oss << AsHex<2>(0xFFU) << std::endl)); + EXPECT_EQ("132457689BACDEF0", + extract(oss << AsHex<16, uint64_t>(0x132457689BACDEF0LLU))); + EXPECT_EQ("000000000001", extract(oss << AsHex<12, uint8_t>(1))); + EXPECT_EQ("00000089", extract(oss << AsHex<8, int32_t>(137))); + EXPECT_EQ("...FFFFFFFF", extract(oss << AsHex<8, int32_t>(-1))); + EXPECT_EQ("7FFF", extract(oss << AsHex<4, int16_t>(0x7FFFU))); + EXPECT_EQ("...8000", extract(oss << AsHex<4, int16_t>(0x8000U))); + EXPECT_EQ("8000", extract(oss << AsHex<4, uint16_t>(0x8000U))); +} + +TEST(IOUtilsTest, PrefixSep) { + std::ostringstream oss; + PrefixSep sep(","); + oss << sep << 3; + EXPECT_EQ("3", oss.str()); + oss << sep << 1; + EXPECT_EQ("3,1", oss.str()); + oss << sep << 4 << sep << 1 << sep << "59"; + EXPECT_EQ("3,1,4,1,59", oss.str()); +} + +TEST(IOUtilsTest, PrefixSepAlt) { + std::ostringstream oss; + PrefixSep sep(" "); + oss << sep << 3; + EXPECT_EQ("3", oss.str()); + oss << sep << 1; + EXPECT_EQ("3 1", oss.str()); + oss << sep << 4 << sep << 1 << sep << "59"; + EXPECT_EQ("3 1 4 1 59", oss.str()); +} + +TEST(IOUtilsTest, EatChar) { + std::istringstream main_iss; + // Helper for single-line tests. + auto iss = [&main_iss](const std::string s) -> std::istringstream& { + main_iss.clear(); + main_iss.str(s); + return main_iss; + }; + + EXPECT_TRUE(iss("a,1") >> EatChar('a') >> EatChar(',') >> EatChar('1')); + EXPECT_FALSE(iss("a,a") >> EatChar('a') >> EatChar(',') >> EatChar('1')); + EXPECT_FALSE(iss("a") >> EatChar('a') >> EatChar(',') >> EatChar('1')); + EXPECT_FALSE(iss("x") >> EatChar('X')); + EXPECT_TRUE(iss("_\n") >> EatChar('_') >> EatChar('\n')); +} + +TEST(IOUtilsTest, StrictUInt) { + std::istringstream main_iss; + // Helper for single-line tests. + auto iss = [&main_iss](const std::string& s) -> std::istringstream& { + main_iss.clear(); + main_iss.str(s); + return main_iss; + }; + + uint32_t u32 = 0; + EXPECT_TRUE(iss("1234") >> StrictUInt<uint32_t>(u32)); + EXPECT_EQ(uint32_t(1234), u32); + EXPECT_TRUE(iss("001234") >> StrictUInt<uint32_t>(u32)); + EXPECT_EQ(uint32_t(1234), u32); + EXPECT_FALSE(iss("blahblah") >> StrictUInt<uint32_t>(u32)); + EXPECT_EQ(uint32_t(1234), u32); // No overwrite on failure. + EXPECT_TRUE(iss("137suffix") >> StrictUInt<uint32_t>(u32)); + EXPECT_EQ(uint32_t(137), u32); + EXPECT_FALSE(iss(" 1234") >> StrictUInt<uint32_t>(u32)); + EXPECT_FALSE(iss("-1234") >> StrictUInt<uint32_t>(u32)); + + uint16_t u16 = 0; + EXPECT_TRUE(iss("65535") >> StrictUInt<uint16_t>(u16)); + EXPECT_EQ(uint16_t(65535), u16); + EXPECT_FALSE(iss("65536") >> StrictUInt<uint16_t>(u16)); // Overflow. + + uint64_t u64 = 0; + EXPECT_TRUE(iss("1000000000001") >> StrictUInt<uint64_t>(u64)); + EXPECT_EQ(uint64_t(1000000000001LL), u64); + + // uint8_t is stubbed out, so no tests for it. +} + +TEST(IOUtilsTest, ParseSimpleEquations) { + std::istringstream iss("123+456=579,4-3=1"); + uint32_t a = 0; + uint32_t b = 0; + uint32_t c = 0; + EXPECT_TRUE(iss >> StrictUInt<uint32_t>(a) >> EatChar('+') >> + StrictUInt<uint32_t>(b) >> EatChar('=') >> + StrictUInt<uint32_t>(c)); + EXPECT_EQ(uint32_t(123), a); + EXPECT_EQ(uint32_t(456), b); + EXPECT_EQ(uint32_t(579), c); + EXPECT_TRUE(iss >> EatChar(',')); + EXPECT_TRUE(iss >> StrictUInt<uint32_t>(a) >> EatChar('-') >> + StrictUInt<uint32_t>(b) >> EatChar('=') >> + StrictUInt<uint32_t>(c)); + EXPECT_EQ(uint32_t(4), a); + EXPECT_EQ(uint32_t(3), b); + EXPECT_EQ(uint32_t(1), c); +} + +} // namespace zucchini diff --git a/main_utils.cc b/main_utils.cc new file mode 100644 index 0000000..8c47c91 --- /dev/null +++ b/main_utils.cc @@ -0,0 +1,255 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/main_utils.h" + +#include <stddef.h> + +#include <memory> +#include <ostream> +#include <string> +#include <vector> + +#include "base/command_line.h" +#include "base/files/file_path.h" +#include "base/files/file_util.h" +#include "base/logging.h" +#include "base/process/process_handle.h" +#include "base/strings/string_number_conversions.h" +#include "base/strings/string_split.h" +#include "base/strings/string_util.h" +#include "base/time/time.h" +#include "build/build_config.h" +#include "components/zucchini/io_utils.h" +#include "components/zucchini/zucchini_commands.h" + +#if defined(OS_WIN) +#include <windows.h> // This include must come first. + +#include <psapi.h> +#endif + +namespace { + +/******** Command ********/ + +// Specifications for a Zucchini command. +struct Command { + constexpr Command(const char* name_in, + const char* usage_in, + int num_args_in, + CommandFunction command_function_in) + : name(name_in), + usage(usage_in), + num_args(num_args_in), + command_function(command_function_in) {} + Command(const Command&) = default; + ~Command() = default; + + // Unique name of command. |-name| is used to select from command-line. + const char* const name; + + // Usage help text of command. + const char* const usage; + + // Number of arguments (assumed to be filenames) used by the command. + const int num_args; + + // Main function to run for the command. + const CommandFunction command_function; +}; + +/******** List of Zucchini commands ********/ + +constexpr Command kCommands[] = { + {"gen", + "-gen <old_file> <new_file> <patch_file> [-raw] [-keep]" + " [-impose=#+#=#+#,#+#=#+#,...]", + 3, &MainGen}, + {"apply", "-apply <old_file> <patch_file> <new_file> [-keep]", 3, + &MainApply}, + {"read", "-read <exe> [-dump]", 1, &MainRead}, + {"detect", "-detect <archive_file>", 1, &MainDetect}, + {"match", "-match <old_file> <new_file> [-impose=#+#=#+#,#+#=#+#,...]", 2, + &MainMatch}, + {"crc32", "-crc32 <file>", 1, &MainCrc32}, +}; + +/******** GetPeakMemoryMetrics ********/ + +#if defined(OS_LINUX) || defined(OS_CHROMEOS) +// Linux does not have an exact mapping to the values used on Windows so use a +// close approximation: +// peak_virtual_memory ~= peak_page_file_usage +// resident_set_size_hwm (high water mark) ~= peak_working_set_size +// +// On failure the input values will be set to 0. +void GetPeakMemoryMetrics(size_t* peak_virtual_memory, + size_t* resident_set_size_hwm) { + *peak_virtual_memory = 0; + *resident_set_size_hwm = 0; + auto status_path = + base::FilePath("/proc") + .Append(base::NumberToString(base::GetCurrentProcessHandle())) + .Append("status"); + std::string contents_string; + base::ReadFileToString(status_path, &contents_string); + std::vector<base::StringPiece> lines = base::SplitStringPiece( + contents_string, "\n", base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY); + + for (const auto& line : lines) { + // Tokens should generally be of the form "Metric: <val> kB" + std::vector<base::StringPiece> tokens = base::SplitStringPiece( + line, " ", base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY); + if (tokens.size() < 2) + continue; + + if (tokens[0] == "VmPeak:") { + if (base::StringToSizeT(tokens[1], peak_virtual_memory)) { + *peak_virtual_memory *= 1024; // in kiB + if (*resident_set_size_hwm) + return; + } + } else if (tokens[0] == "VmHWM:") { + if (base::StringToSizeT(tokens[1], resident_set_size_hwm)) { + *resident_set_size_hwm *= 1024; // in kiB + if (*peak_virtual_memory) + return; + } + } + } +} +#endif // defined(OS_LINUX) || defined(OS_CHROMEOS) + +#if defined(OS_WIN) +// On failure the input values will be set to 0. +void GetPeakMemoryMetrics(size_t* peak_page_file_usage, + size_t* peak_working_set_size) { + *peak_page_file_usage = 0; + *peak_working_set_size = 0; + PROCESS_MEMORY_COUNTERS pmc; + if (::GetProcessMemoryInfo(::GetCurrentProcess(), &pmc, sizeof(pmc))) { + *peak_page_file_usage = pmc.PeakPagefileUsage; + *peak_working_set_size = pmc.PeakWorkingSetSize; + } +} +#endif // defined(OS_WIN) + +/******** ScopedResourceUsageTracker ********/ + +// A class to track and log system resource usage. +class ScopedResourceUsageTracker { + public: + // Initializes states for tracking. + ScopedResourceUsageTracker() { + start_time_ = base::TimeTicks::Now(); + +#if defined(OS_LINUX) || defined(OS_CHROMEOS) || defined(OS_WIN) + GetPeakMemoryMetrics(&start_peak_page_file_usage_, + &start_peak_working_set_size_); +#endif // defined(OS_LINUX) || defined(OS_CHROMEOS) || defined(OS_WIN) + } + + // Computes and prints usage. + ~ScopedResourceUsageTracker() { + base::TimeTicks end_time = base::TimeTicks::Now(); + +#if defined(OS_LINUX) || defined(OS_CHROMEOS) || defined(OS_WIN) + size_t cur_peak_page_file_usage = 0; + size_t cur_peak_working_set_size = 0; + GetPeakMemoryMetrics(&cur_peak_page_file_usage, &cur_peak_working_set_size); + + LOG(INFO) << "Zucchini.PeakPagefileUsage " + << cur_peak_page_file_usage / 1024 << " KiB"; + LOG(INFO) << "Zucchini.PeakPagefileUsageChange " + << (cur_peak_page_file_usage - start_peak_page_file_usage_) / 1024 + << " KiB"; + LOG(INFO) << "Zucchini.PeakWorkingSetSize " + << cur_peak_working_set_size / 1024 << " KiB"; + LOG(INFO) << "Zucchini.PeakWorkingSetSizeChange " + << (cur_peak_working_set_size - start_peak_working_set_size_) / + 1024 + << " KiB"; +#endif // defined(OS_LINUX) || defined(OS_CHROMEOS) || defined(OS_WIN) + + LOG(INFO) << "Zucchini.TotalTime " << (end_time - start_time_).InSecondsF() + << " s"; + } + + private: + base::TimeTicks start_time_; +#if defined(OS_LINUX) || defined(OS_CHROMEOS) || defined(OS_WIN) + size_t start_peak_page_file_usage_ = 0; + size_t start_peak_working_set_size_ = 0; +#endif // defined(OS_LINUX) || defined(OS_CHROMEOS) || defined(OS_WIN) +}; + +/******** Helper functions ********/ + +// Translates |command_line| arguments to a vector of base::FilePath (expecting +// exactly |expected_count|). On success, writes the results to |paths| and +// returns true. Otherwise returns false. +bool CheckAndGetFilePathParams(const base::CommandLine& command_line, + size_t expected_count, + std::vector<base::FilePath>* paths) { + const base::CommandLine::StringVector& args = command_line.GetArgs(); + if (args.size() != expected_count) + return false; + + paths->clear(); + paths->reserve(args.size()); + for (const auto& arg : args) + paths->emplace_back(arg); + return true; +} + +// Prints main Zucchini usage text. +void PrintUsage(std::ostream& err) { + err << "Usage:" << std::endl; + for (const Command& command : kCommands) + err << " zucchini " << command.usage << std::endl; +} + +} // namespace + +/******** Exported Functions ********/ + +zucchini::status::Code RunZucchiniCommand(const base::CommandLine& command_line, + std::ostream& out, + std::ostream& err) { + // Look for a command with name that matches input. + const Command* command_use = nullptr; + for (const Command& command : kCommands) { + if (command_line.HasSwitch(command.name)) { + if (command_use) { // Too many commands found. + command_use = nullptr; // Set to null to flag error. + break; + } + command_use = &command; + } + } + + // Expect exactly 1 matching command. If 0 or >= 2, print usage and quit. + if (!command_use) { + err << "Must have exactly one of:" << std::endl; + err << " ["; + zucchini::PrefixSep sep(", "); + for (const Command& command : kCommands) + err << sep << "-" << command.name; + err << "]" << std::endl; + PrintUsage(err); + return zucchini::status::kStatusInvalidParam; + } + + // Try to parse filename arguments. On failure, print usage and quit. + std::vector<base::FilePath> paths; + if (!CheckAndGetFilePathParams(command_line, command_use->num_args, &paths)) { + err << command_use->usage << std::endl; + PrintUsage(err); + return zucchini::status::kStatusInvalidParam; + } + + ScopedResourceUsageTracker resource_usage_tracker; + return command_use->command_function({command_line, paths, out, err}); +} diff --git a/main_utils.h b/main_utils.h new file mode 100644 index 0000000..6c97aad --- /dev/null +++ b/main_utils.h @@ -0,0 +1,34 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_MAIN_UTILS_H_ +#define COMPONENTS_ZUCCHINI_MAIN_UTILS_H_ + +#include <iosfwd> + +#include "components/zucchini/zucchini.h" + +// Utilities to run Zucchini command based on command-line input, and to print +// help messages. + +namespace base { + +class CommandLine; + +} // namespace base + +// To add a new Zucchini command: +// 1. Declare the command's main function in zucchini_command.h. Its signature +// must match CommandFunction. +// 2. Define the command's main function in zucchini_command.cc. +// 3. Add a new entry into |kCommands| in main_utils.cc. + +// Searches |command_line| for Zucchini commands. If a unique command is found, +// runs it (passes |out| and |err|), and logs resource usage. Otherwise prints +// help message to |err|. Returns Zucchini status code for error handling. +zucchini::status::Code RunZucchiniCommand(const base::CommandLine& command_line, + std::ostream& out, + std::ostream& err); + +#endif // COMPONENTS_ZUCCHINI_MAIN_UTILS_H_ diff --git a/mapped_file.cc b/mapped_file.cc new file mode 100644 index 0000000..a742414 --- /dev/null +++ b/mapped_file.cc @@ -0,0 +1,69 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/mapped_file.h" + +#include <utility> + +#include "base/files/file_util.h" +#include "build/build_config.h" + +namespace zucchini { + +MappedFileReader::MappedFileReader(base::File file) { + if (!file.IsValid()) { + error_ = "Invalid file."; + return; // |buffer_| will be uninitialized, and therefore invalid. + } + if (!buffer_.Initialize(std::move(file))) { + error_ = "Can't map file to memory."; + } +} + +MappedFileWriter::MappedFileWriter(const base::FilePath& file_path, + base::File file, + size_t length) + : file_path_(file_path), delete_behavior_(kManualDeleteOnClose) { + if (!file.IsValid()) { + error_ = "Invalid file."; + return; // |buffer_| will be uninitialized, and therefore invalid. + } + +#if defined(OS_WIN) + file_handle_ = file.Duplicate(); + // Tell the OS to delete the file when all handles are closed. + if (file_handle_.DeleteOnClose(true)) { + delete_behavior_ = kAutoDeleteOnClose; + } else { + error_ = "Failed to mark file for delete-on-close."; + } +#endif // defined(OS_WIN) + + bool is_ok = buffer_.Initialize(std::move(file), {0, length}, + base::MemoryMappedFile::READ_WRITE_EXTEND); + if (!is_ok) { + error_ = "Can't map file to memory."; + } +} + +MappedFileWriter::~MappedFileWriter() { + if (!HasError() && delete_behavior_ == kManualDeleteOnClose && + !file_path_.empty() && !base::DeleteFile(file_path_)) { + error_ = "Failed to delete file."; + } +} + +bool MappedFileWriter::Keep() { +#if defined(OS_WIN) + if (delete_behavior_ == kAutoDeleteOnClose && + !file_handle_.DeleteOnClose(false)) { + error_ = "Failed to prevent deletion of file."; + return false; + } +#endif // defined(OS_WIN) + delete_behavior_ = kKeep; + return true; +} + +} // namespace zucchini diff --git a/mapped_file.h b/mapped_file.h new file mode 100644 index 0000000..f15e09a --- /dev/null +++ b/mapped_file.h @@ -0,0 +1,82 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_MAPPED_FILE_H_ +#define COMPONENTS_ZUCCHINI_MAPPED_FILE_H_ + +#include <stddef.h> +#include <stdint.h> + +#include <string> + +#include "base/files/file.h" +#include "base/files/file_path.h" +#include "base/files/memory_mapped_file.h" +#include "components/zucchini/buffer_view.h" + +namespace zucchini { + +// A file reader wrapper. +class MappedFileReader { + public: + // Maps |file| to memory for reading. Also validates |file|. Errors are + // available via HasError() and error(). + explicit MappedFileReader(base::File file); + MappedFileReader(const MappedFileReader&) = delete; + const MappedFileReader& operator=(const MappedFileReader&) = delete; + + const uint8_t* data() const { return buffer_.data(); } + size_t length() const { return buffer_.length(); } + zucchini::ConstBufferView region() const { return {data(), length()}; } + + bool HasError() { return !error_.empty() || !buffer_.IsValid(); } + const std::string& error() { return error_; } + + private: + std::string error_; + base::MemoryMappedFile buffer_; +}; + +// A file writer wrapper. The target file is deleted on destruction unless +// Keep() is called. +class MappedFileWriter { + public: + // Maps |file| to memory for writing. |file_path| is needed for auto delete on + // UNIX systems, but can be empty if auto delete is not needed. Errors are + // available via HasError() and error(). + MappedFileWriter(const base::FilePath& file_path, + base::File file, + size_t length); + MappedFileWriter(const MappedFileWriter&) = delete; + const MappedFileWriter& operator=(const MappedFileWriter&) = delete; + ~MappedFileWriter(); + + uint8_t* data() { return buffer_.data(); } + size_t length() const { return buffer_.length(); } + zucchini::MutableBufferView region() { return {data(), length()}; } + + bool HasError() { return !error_.empty() || !buffer_.IsValid(); } + const std::string& error() { return error_; } + + // Indicates that the file should not be deleted on destruction. Returns true + // iff the operation succeeds. + bool Keep(); + + private: + enum OnCloseDeleteBehavior { + kKeep, + kAutoDeleteOnClose, + kManualDeleteOnClose + }; + + std::string error_; + base::FilePath file_path_; + base::File file_handle_; + base::MemoryMappedFile buffer_; + OnCloseDeleteBehavior delete_behavior_; +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_MAPPED_FILE_H_ diff --git a/mapped_file_unittest.cc b/mapped_file_unittest.cc new file mode 100644 index 0000000..e3ee6dc --- /dev/null +++ b/mapped_file_unittest.cc @@ -0,0 +1,61 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/mapped_file.h" + +#include <utility> + +#include "base/files/file.h" +#include "base/files/file_path.h" +#include "base/files/file_util.h" +#include "base/files/scoped_temp_dir.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +class MappedFileWriterTest : public testing::Test { + protected: + MappedFileWriterTest() = default; + void SetUp() override { + ASSERT_TRUE(temp_dir_.CreateUniqueTempDir()); + file_path_ = temp_dir_.GetPath().AppendASCII("test-file"); + } + + base::FilePath file_path_; + + private: + base::ScopedTempDir temp_dir_; +}; + +TEST_F(MappedFileWriterTest, Keep) { + EXPECT_FALSE(base::PathExists(file_path_)); + { + using base::File; + File file(file_path_, File::FLAG_CREATE_ALWAYS | File::FLAG_READ | + File::FLAG_WRITE | File::FLAG_SHARE_DELETE | + File::FLAG_CAN_DELETE_ON_CLOSE); + MappedFileWriter file_writer(file_path_, std::move(file), 10); + EXPECT_FALSE(file_writer.HasError()); + EXPECT_TRUE(file_writer.Keep()); + EXPECT_FALSE(file_writer.HasError()); + EXPECT_TRUE(file_writer.error().empty()); + } + EXPECT_TRUE(base::PathExists(file_path_)); +} + +TEST_F(MappedFileWriterTest, DeleteOnClose) { + EXPECT_FALSE(base::PathExists(file_path_)); + { + using base::File; + File file(file_path_, File::FLAG_CREATE_ALWAYS | File::FLAG_READ | + File::FLAG_WRITE | File::FLAG_SHARE_DELETE | + File::FLAG_CAN_DELETE_ON_CLOSE); + MappedFileWriter file_writer(file_path_, std::move(file), 10); + EXPECT_FALSE(file_writer.HasError()); + EXPECT_TRUE(file_writer.error().empty()); + } + EXPECT_FALSE(base::PathExists(file_path_)); +} + +} // namespace zucchini diff --git a/patch_read_write_unittest.cc b/patch_read_write_unittest.cc new file mode 100644 index 0000000..627513c --- /dev/null +++ b/patch_read_write_unittest.cc @@ -0,0 +1,730 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/patch_reader.h" +#include "components/zucchini/patch_writer.h" + +#include <stddef.h> +#include <stdint.h> + +#include <utility> +#include <vector> + +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +namespace { + +// Used for initialization of raw test data. +using ByteVector = std::vector<uint8_t>; + +// Helper function that creates an object of type |T| and intializes it from +// data in |buffer|. Ensures initialization is successful. |buffer| is passed as +// pointer to avoid passing a temporay, which can causes dangling references. +template <class T> +T TestInitialize(const ByteVector* buffer) { + T value; + BufferSource buffer_source(buffer->data(), buffer->size()); + EXPECT_TRUE(value.Initialize(&buffer_source)); + EXPECT_TRUE(buffer_source.empty()); // Make sure all data has been consumed + return value; +} + +// Helper function that creates an object of type |T| and tries to intialize it +// from invalid data in |buffer|, expecting the operation to fail. |buffer| is +// passed as pointer to avoid passing a temporary, which can causes dangling +// references. +template <class T> +void TestInvalidInitialize(const ByteVector* buffer) { + T value; + BufferSource buffer_source(buffer->data(), buffer->size()); + EXPECT_FALSE(value.Initialize(&buffer_source)); +} + +// Helper function that serializes |value| into a buffer. Ensures that +// serialization is successful and that the result matches |expected|. +template <class T> +void TestSerialize(const ByteVector& expected, const T& value) { + size_t size = value.SerializedSize(); + EXPECT_EQ(expected.size(), size); + ByteVector buffer(size); + BufferSink buffer_sink(buffer.data(), buffer.size()); + EXPECT_TRUE(value.SerializeInto(&buffer_sink)); + EXPECT_EQ(expected, buffer); +} + +ByteVector CreatePatchElement() { + return { + // PatchElementHeader + 0x01, 0, 0, 0, // old_offset + 0x51, 0, 0, 0, // old_length + 0x03, 0, 0, 0, // new_offset + 0x13, 0, 0, 0, // new_length + 'P', 'x', '8', '6', // exe_type = EXE_TYPE_WIN32_X86 + // EquivalenceSource + 1, 0, 0, 0, // src_skip size + 0x10, // src_skip content + 1, 0, 0, 0, // dst_skip size + 0x00, // dst_skip content + 1, 0, 0, 0, // copy_count size + 0x12, // copy_count content + // ExtraDataSource + 1, 0, 0, 0, // extra_data size + 0x13, // extra_data content + // RawDeltaSource + 1, 0, 0, 0, // raw_delta_skip size + 0x14, // raw_delta_skip content + 1, 0, 0, 0, // raw_delta_diff size + 0x15, // raw_delta_diff content + // ReferenceDeltaSource + 1, 0, 0, 0, // reference_delta size + 0x16, // reference_delta content + // PatchElementReader + 2, 0, 0, 0, // pool count + 0, // pool_tag + 1, 0, 0, 0, // extra_targets size + 0x17, // extra_targets content + 2, // pool_tag + 1, 0, 0, 0, // extra_targets size + 0x18, // extra_targets content + }; +} + +ByteVector CreateElementMatch() { + return { + // PatchElementHeader + 0x01, 0, 0, 0, // old_offset + 0x02, 0, 0, 0, // old_length + 0x03, 0, 0, 0, // new_offset + 0x04, 0, 0, 0, // new_length + 'D', 'E', 'X', ' ', // exe_type = kExeTypeDex + }; +} + +// Helper to mutate test |data| (e.g., from CreatePatchElement()) at |idx| from +// |from_val| (as sanity check) to |to_val|. +void ModifyByte(size_t idx, + uint8_t from_val, + uint8_t to_val, + std::vector<uint8_t>* data) { + ASSERT_EQ(from_val, (*data)[idx]); + (*data)[idx] = to_val; +} + +} // namespace + +bool operator==(const ByteVector& a, ConstBufferView b) { + return a == ByteVector(b.begin(), b.end()); +} + +TEST(PatchTest, ParseSerializeElementMatch) { + ByteVector data = CreateElementMatch(); + BufferSource buffer_source(data.data(), data.size()); + ElementMatch element_match = {}; + EXPECT_TRUE(patch::ParseElementMatch(&buffer_source, &element_match)); + EXPECT_EQ(kExeTypeDex, element_match.exe_type()); + EXPECT_EQ(kExeTypeDex, element_match.old_element.exe_type); + EXPECT_EQ(kExeTypeDex, element_match.new_element.exe_type); + EXPECT_EQ(0x1U, element_match.old_element.offset); + EXPECT_EQ(0x2U, element_match.old_element.size); + EXPECT_EQ(0x3U, element_match.new_element.offset); + EXPECT_EQ(0x4U, element_match.new_element.size); + + size_t size = patch::SerializedElementMatchSize(element_match); + EXPECT_EQ(data.size(), size); + ByteVector buffer(size); + BufferSink buffer_sink(buffer.data(), buffer.size()); + EXPECT_TRUE(patch::SerializeElementMatch(element_match, &buffer_sink)); + EXPECT_EQ(data, buffer); +} + +TEST(PatchTest, ParseElementMatchTooSmall) { + ByteVector data = {4}; + BufferSource buffer_source(data.data(), data.size()); + ElementMatch element_match = {}; + EXPECT_FALSE(patch::ParseElementMatch(&buffer_source, &element_match)); +} + +TEST(PatchTest, ParseElementMatchNoLength) { + // Set old_length to 0 to trigger an error. + { + ByteVector data = CreateElementMatch(); + // old_length := 0. + ModifyByte(offsetof(PatchElementHeader, old_length), 0x02, 0x00, &data); + BufferSource buffer_source(data.data(), data.size()); + ElementMatch element_match = {}; + EXPECT_FALSE(patch::ParseElementMatch(&buffer_source, &element_match)); + } + // Set new_length to 0 to trigger an error. + { + ByteVector data = CreateElementMatch(); + // new_length := 0. + ModifyByte(offsetof(PatchElementHeader, new_length), 0x04, 0x00, &data); + BufferSource buffer_source(data.data(), data.size()); + ElementMatch element_match = {}; + EXPECT_FALSE(patch::ParseElementMatch(&buffer_source, &element_match)); + } + // Set both new_length and old_length to 0 to trigger an error. + { + ByteVector data = CreateElementMatch(); + // old_length := 0. + ModifyByte(offsetof(PatchElementHeader, old_length), 0x02, 0x00, &data); + // new_length := 0. + ModifyByte(offsetof(PatchElementHeader, new_length), 0x04, 0x00, &data); + BufferSource buffer_source(data.data(), data.size()); + ElementMatch element_match = {}; + EXPECT_FALSE(patch::ParseElementMatch(&buffer_source, &element_match)); + } +} + +TEST(PatchTest, ParseSerializeElementMatchExeMismatch) { + ByteVector buffer(28); + BufferSink buffer_sink(buffer.data(), buffer.size()); + EXPECT_FALSE(patch::SerializeElementMatch( + ElementMatch{{{1, 2}, kExeTypeNoOp}, {{3, 4}, kExeTypeWin32X86}}, + &buffer_sink)); +} + +TEST(PatchTest, SerializeElementMatchTooSmall) { + ByteVector buffer(4); + BufferSink buffer_sink(buffer.data(), buffer.size()); + EXPECT_FALSE(patch::SerializeElementMatch( + ElementMatch{{{1, 2}, kExeTypeDex}, {{3, 4}, kExeTypeDex}}, + &buffer_sink)); +} + +TEST(PatchTest, ParseSerializeBuffer) { + auto TestSerialize = [](const ByteVector& expected, const ByteVector& value) { + size_t size = patch::SerializedBufferSize(value); + EXPECT_EQ(expected.size(), size); + ByteVector buffer(size); + BufferSink buffer_sink(buffer.data(), buffer.size()); + EXPECT_TRUE(patch::SerializeBuffer(value, &buffer_sink)); + EXPECT_EQ(expected, buffer); + }; + + // |data| is passed as pointer to avoid passing a temporay, which can causes + // dangling references. + auto TestParse = [](const ByteVector* data) { + BufferSource value; + BufferSource buffer_source(data->data(), data->size()); + EXPECT_TRUE(patch::ParseBuffer(&buffer_source, &value)); + // Make sure all data has been consumed. + EXPECT_TRUE(buffer_source.empty()); + return value; + }; + + ByteVector data = { + 0, 0, 0, 0, // size + }; + BufferSource buffer = TestParse(&data); + EXPECT_TRUE(buffer.empty()); + TestSerialize(data, ByteVector({})); + + data = { + 3, 0, 0, 0, // size + 1, 2, 3 // content + }; + buffer = TestParse(&data); + EXPECT_EQ(3U, buffer.size()); + EXPECT_EQ(ByteVector({1, 2, 3}), ByteVector(buffer.begin(), buffer.end())); + TestSerialize(data, ByteVector({1, 2, 3})); + + // Ill-formed input. + data = { + 3, 0, 0, 0, // size + 1, 2 // insufficient content + }; + BufferSource value; + BufferSource buffer_source(data.data(), data.size()); + EXPECT_FALSE(patch::ParseBuffer(&buffer_source, &value)); + EXPECT_TRUE(value.empty()); +} + +TEST(PatchTest, SerializeBufferTooSmall) { + ByteVector buffer(3); + BufferSink buffer_sink(buffer.data(), buffer.size()); + EXPECT_FALSE(patch::SerializeBuffer(ByteVector(), &buffer_sink)); +} + +TEST(EquivalenceSinkSourceTest, Empty) { + ByteVector data = { + // EquivalenceSource + 0, 0, 0, 0, // src_skip size + 0, 0, 0, 0, // dst_skip size + 0, 0, 0, 0, // copy_count size + }; + EquivalenceSource equivalence_source = + TestInitialize<EquivalenceSource>(&data); + + EXPECT_FALSE(equivalence_source.GetNext()); + EXPECT_TRUE(equivalence_source.Done()); + + TestSerialize(data, EquivalenceSink()); +} + +TEST(EquivalenceSourceSinkTest, Normal) { + ByteVector data = { + // EquivalenceSource + 2, 0, 0, 0, // src_skip size + 6, 7, // src_skip content + 2, 0, 0, 0, // dst_skip size + 7, 1, // dst_skip content + 2, 0, 0, 0, // copy_count size + 2, 1 // copy_count content + }; + EquivalenceSource equivalence_source = + TestInitialize<EquivalenceSource>(&data); + auto equivalence = equivalence_source.GetNext(); + EXPECT_FALSE(equivalence_source.Done()); + EXPECT_TRUE(equivalence.has_value()); + EXPECT_EQ(offset_t(3), equivalence->src_offset); + EXPECT_EQ(offset_t(7), equivalence->dst_offset); + EXPECT_EQ(offset_t(2), equivalence->length); + + equivalence = equivalence_source.GetNext(); + EXPECT_TRUE(equivalence_source.Done()); + EXPECT_TRUE(equivalence.has_value()); + EXPECT_EQ(offset_t(1), equivalence->src_offset); + EXPECT_EQ(offset_t(10), equivalence->dst_offset); + EXPECT_EQ(offset_t(1), equivalence->length); + + equivalence = equivalence_source.GetNext(); + EXPECT_FALSE(equivalence.has_value()); + + EquivalenceSink equivalence_sink; + equivalence_sink.PutNext(Equivalence{3, 7, 2}); + equivalence_sink.PutNext(Equivalence{1, 10, 1}); + TestSerialize(data, equivalence_sink); +} + +TEST(ExtraDataSourceSinkTest, Empty) { + ByteVector data = { + // ExtraDataSource + 0, 0, 0, 0, // extra_data size + }; + ExtraDataSource extra_data_source = TestInitialize<ExtraDataSource>(&data); + + EXPECT_FALSE(extra_data_source.GetNext(2)); + EXPECT_TRUE(extra_data_source.Done()); + + TestSerialize(data, ExtraDataSink()); +} + +TEST(ExtraDataSourceSinkTest, Normal) { + ByteVector data = { + // ExtraDataSource + 5, 0, 0, 0, // extra_data size + 1, 2, 3, 4, 5, // extra_data content + }; + ExtraDataSource extra_data_source = TestInitialize<ExtraDataSource>(&data); + EXPECT_FALSE(extra_data_source.Done()); + + auto extra_data = extra_data_source.GetNext(3); + EXPECT_FALSE(extra_data_source.Done()); + EXPECT_TRUE(extra_data.has_value()); + EXPECT_EQ(size_t(3), extra_data->size()); + EXPECT_EQ(ByteVector({1, 2, 3}), + ByteVector(extra_data->begin(), extra_data->end())); + + extra_data = extra_data_source.GetNext(2); + EXPECT_TRUE(extra_data_source.Done()); + EXPECT_TRUE(extra_data.has_value()); + EXPECT_EQ(ByteVector({4, 5}), + ByteVector(extra_data->begin(), extra_data->end())); + + extra_data = extra_data_source.GetNext(2); + EXPECT_FALSE(extra_data.has_value()); + + ExtraDataSink extra_data_sink; + + ByteVector content = {1, 2, 3}; + extra_data_sink.PutNext({content.data(), content.size()}); + content = {4, 5}; + extra_data_sink.PutNext({content.data(), content.size()}); + TestSerialize(data, extra_data_sink); +} + +TEST(RawDeltaSourceSinkTest, Empty) { + ByteVector data = { + // RawDeltaSource + 0, 0, 0, 0, // raw_delta_skip size + 0, 0, 0, 0, // raw_delta_diff size + }; + RawDeltaSource raw_delta_source = TestInitialize<RawDeltaSource>(&data); + + EXPECT_FALSE(raw_delta_source.GetNext()); + EXPECT_TRUE(raw_delta_source.Done()); + + TestSerialize(data, RawDeltaSink()); +} + +TEST(RawDeltaSinkSourceSinkTest, Normal) { + ByteVector data = { + // RawDeltaSource + 3, 0, 0, 0, // raw_delta_skip size + 1, 3, 0, // raw_delta_skip content + 3, 0, 0, 0, // raw_delta_diff size + 42, 24, 235, // raw_delta_diff content + }; + RawDeltaSource raw_delta_source = TestInitialize<RawDeltaSource>(&data); + EXPECT_FALSE(raw_delta_source.Done()); + + auto raw_delta = raw_delta_source.GetNext(); + EXPECT_FALSE(raw_delta_source.Done()); + EXPECT_TRUE(raw_delta.has_value()); + EXPECT_EQ(1U, raw_delta->copy_offset); + EXPECT_EQ(42, raw_delta->diff); + + raw_delta = raw_delta_source.GetNext(); + EXPECT_FALSE(raw_delta_source.Done()); + EXPECT_TRUE(raw_delta.has_value()); + EXPECT_EQ(5U, raw_delta->copy_offset); + EXPECT_EQ(24, raw_delta->diff); + + raw_delta = raw_delta_source.GetNext(); + EXPECT_TRUE(raw_delta_source.Done()); + EXPECT_TRUE(raw_delta.has_value()); + EXPECT_EQ(6U, raw_delta->copy_offset); + EXPECT_EQ(-21, raw_delta->diff); + + EXPECT_FALSE(raw_delta_source.GetNext()); + EXPECT_TRUE(raw_delta_source.Done()); + + RawDeltaSink raw_delta_sink; + raw_delta_sink.PutNext({1, 42}); + raw_delta_sink.PutNext({5, 24}); + raw_delta_sink.PutNext({6, -21}); + TestSerialize(data, raw_delta_sink); +} + +TEST(RawDeltaSourceSinkTest, InvalidContent) { + ByteVector data = { + // RawDeltaSource + 2, 0, 0, 0, // raw_delta_skip size + 1, 3, // raw_delta_skip content + 2, 0, 0, 0, // raw_delta_diff size + 0, 4, // raw_delta_diff content + }; + RawDeltaSource raw_delta_source = TestInitialize<RawDeltaSource>(&data); + EXPECT_FALSE(raw_delta_source.GetNext()); + EXPECT_FALSE(raw_delta_source.Done()); +} + +TEST(ReferenceDeltaSourceSinkTest, Empty) { + ByteVector data = { + // ReferenceDeltaSource + 0, 0, 0, 0, // reference_delta size + }; + ReferenceDeltaSource reference_delta_source = + TestInitialize<ReferenceDeltaSource>(&data); + + EXPECT_FALSE(reference_delta_source.GetNext()); + EXPECT_TRUE(reference_delta_source.Done()); + + TestSerialize(data, ReferenceDeltaSink()); +} + +TEST(ReferenceDeltaSourceSinkTest, Normal) { + ByteVector data = { + // ReferenceDeltaSource + 2, 0, 0, 0, // reference_delta size + 84, 47, // reference_delta content + }; + ReferenceDeltaSource reference_delta_source = + TestInitialize<ReferenceDeltaSource>(&data); + EXPECT_FALSE(reference_delta_source.Done()); + + auto delta = reference_delta_source.GetNext(); + EXPECT_FALSE(reference_delta_source.Done()); + EXPECT_TRUE(delta.has_value()); + EXPECT_EQ(42, *delta); + + delta = reference_delta_source.GetNext(); + EXPECT_TRUE(reference_delta_source.Done()); + EXPECT_TRUE(delta.has_value()); + EXPECT_EQ(-24, *delta); + + EXPECT_FALSE(reference_delta_source.GetNext()); + EXPECT_TRUE(reference_delta_source.Done()); + + ReferenceDeltaSink reference_delta; + reference_delta.PutNext(42); + reference_delta.PutNext(-24); + TestSerialize(data, reference_delta); +} + +TEST(TargetSourceSinkTest, Empty) { + ByteVector data = { + // TargetSource + 0, 0, 0, 0, // extra_targets size + }; + TargetSource target_source = TestInitialize<TargetSource>(&data); + + EXPECT_FALSE(target_source.GetNext()); + EXPECT_TRUE(target_source.Done()); + + TestSerialize(data, TargetSink()); +} + +TEST(TargetSourceSinkTest, Normal) { + ByteVector data = { + // TargetSource + 2, 0, 0, 0, // extra_targets size + 3, 1, // extra_targets content + }; + TargetSource target_source = TestInitialize<TargetSource>(&data); + EXPECT_FALSE(target_source.Done()); + + auto target = target_source.GetNext(); + EXPECT_FALSE(target_source.Done()); + EXPECT_TRUE(target.has_value()); + EXPECT_EQ(3U, *target); + + target = target_source.GetNext(); + EXPECT_TRUE(target_source.Done()); + EXPECT_TRUE(target.has_value()); + EXPECT_EQ(5U, *target); + + EXPECT_FALSE(target_source.GetNext()); + EXPECT_TRUE(target_source.Done()); + + TargetSink target_sink; + target_sink.PutNext(3); + target_sink.PutNext(5); + TestSerialize(data, target_sink); +} + +TEST(PatchElementTest, Normal) { + ByteVector data = CreatePatchElement(); + + PatchElementReader patch_element_reader = + TestInitialize<PatchElementReader>(&data); + + ElementMatch element_match = patch_element_reader.element_match(); + EXPECT_EQ(kExeTypeWin32X86, element_match.exe_type()); + EXPECT_EQ(kExeTypeWin32X86, element_match.old_element.exe_type); + EXPECT_EQ(kExeTypeWin32X86, element_match.new_element.exe_type); + EXPECT_EQ(0x1U, element_match.old_element.offset); + EXPECT_EQ(0x51U, element_match.old_element.size); + EXPECT_EQ(0x3U, element_match.new_element.offset); + EXPECT_EQ(0x13U, element_match.new_element.size); + + EquivalenceSource equivalence_source = + patch_element_reader.GetEquivalenceSource(); + EXPECT_EQ(ByteVector({0x10}), equivalence_source.src_skip()); + EXPECT_EQ(ByteVector({0x00}), equivalence_source.dst_skip()); + EXPECT_EQ(ByteVector({0x12}), equivalence_source.copy_count()); + + ExtraDataSource extra_data_source = patch_element_reader.GetExtraDataSource(); + EXPECT_EQ(ByteVector({0x13}), extra_data_source.extra_data()); + + RawDeltaSource raw_delta_source = patch_element_reader.GetRawDeltaSource(); + EXPECT_EQ(ByteVector({0x14}), raw_delta_source.raw_delta_skip()); + EXPECT_EQ(ByteVector({0x15}), raw_delta_source.raw_delta_diff()); + + ReferenceDeltaSource reference_delta_source = + patch_element_reader.GetReferenceDeltaSource(); + EXPECT_EQ(ByteVector({0x16}), reference_delta_source.reference_delta()); + + TargetSource target_source1 = + patch_element_reader.GetExtraTargetSource(PoolTag(0)); + EXPECT_EQ(ByteVector({0x17}), target_source1.extra_targets()); + TargetSource target_source2 = + patch_element_reader.GetExtraTargetSource(PoolTag(1)); + EXPECT_EQ(ByteVector({}), target_source2.extra_targets()); + TargetSource target_source3 = + patch_element_reader.GetExtraTargetSource(PoolTag(2)); + EXPECT_EQ(ByteVector({0x18}), target_source3.extra_targets()); + + PatchElementWriter patch_element_writer(element_match); + + patch_element_writer.SetEquivalenceSink( + EquivalenceSink({0x10}, {0x00}, {0x12})); + patch_element_writer.SetExtraDataSink(ExtraDataSink({0x13})); + patch_element_writer.SetRawDeltaSink(RawDeltaSink({0x14}, {0x15})); + patch_element_writer.SetReferenceDeltaSink(ReferenceDeltaSink({0x16})); + patch_element_writer.SetTargetSink(PoolTag(0), TargetSink({0x17})); + patch_element_writer.SetTargetSink(PoolTag(2), TargetSink({0x18})); + TestSerialize(data, patch_element_writer); +} + +TEST(PatchElementTest, BadEquivalence) { + // If the "old" element is too small then the test should fail. + { + ByteVector data = CreatePatchElement(); + // old_length := 0x4 (too small). + ModifyByte(offsetof(PatchElementHeader, old_length), 0x51, 0x04, &data); + TestInvalidInitialize<PatchElementReader>(&data); + } + + // If the "new" element is too small then the test should fail. + { + ByteVector data = CreatePatchElement(); + // new_length := 0x5 (too small). + ModifyByte(offsetof(PatchElementHeader, new_length), 0x13, 0x05, &data); + TestInvalidInitialize<PatchElementReader>(&data); + } +} + +TEST(PatchElementTest, WrongExtraData) { + // Make "new" too large so insufficient extra data exists to cover the image. + { + ByteVector data = CreatePatchElement(); + // new_length := 0x14 (too large). + ModifyByte(offsetof(PatchElementHeader, new_length), 0x13, 0x14, &data); + TestInvalidInitialize<PatchElementReader>(&data); + } + // Make "new" too small so there is too much extra data. + { + ByteVector data = CreatePatchElement(); + // new_length := 0x12 (too small). + ModifyByte(offsetof(PatchElementHeader, new_length), 0x13, 0x12, &data); + TestInvalidInitialize<PatchElementReader>(&data); + } +} + +TEST(EnsemblePatchTest, RawPatch) { + ByteVector data = { + // PatchHeader + 0x5A, 0x75, 0x63, 0x00, // magic + 0x10, 0x32, 0x54, 0x76, // old_size + 0x00, 0x11, 0x22, 0x33, // old_crc + 0x01, 0, 0, 0, // new_size + 0x44, 0x55, 0x66, 0x77, // new_crc + + 1, 0, 0, 0, // number of element + + // PatchElementHeader + 0x01, 0, 0, 0, // old_offset + 0x02, 0, 0, 0, // old_length + 0x00, 0, 0, 0, // new_offset + 0x01, 0, 0, 0, // new_length + 'P', 'x', '8', '6', // exe_type = EXE_TYPE_WIN32_X86 + // EquivalenceSource + 0, 0, 0, 0, // src_skip size + 0, 0, 0, 0, // dst_skip size + 0, 0, 0, 0, // copy_count size + // ExtraDataSource + 0x01, 0, 0, 0, // extra_data size + 0x04, // extra_data content + // RawDeltaSource + 0, 0, 0, 0, // raw_delta_skip size + 0, 0, 0, 0, // raw_delta_diff size + // ReferenceDeltaSource + 0, 0, 0, 0, // reference_delta size + // PatchElementReader + 0, 0, 0, 0, // pool count + }; + + EnsemblePatchReader ensemble_patch_reader = + TestInitialize<EnsemblePatchReader>(&data); + + PatchHeader header = ensemble_patch_reader.header(); + EXPECT_EQ(PatchHeader::kMagic, header.magic); + EXPECT_EQ(0x76543210U, header.old_size); + EXPECT_EQ(0x33221100U, header.old_crc); + EXPECT_EQ(0x01U, header.new_size); + EXPECT_EQ(0x77665544U, header.new_crc); + + const std::vector<PatchElementReader>& elements = + ensemble_patch_reader.elements(); + EXPECT_EQ(size_t(1), elements.size()); + + EnsemblePatchWriter ensemble_patch_writer(header); + PatchElementWriter patch_element_writer(elements[0].element_match()); + patch_element_writer.SetEquivalenceSink({}); + patch_element_writer.SetExtraDataSink(ExtraDataSink({0x04})); + patch_element_writer.SetRawDeltaSink({}); + patch_element_writer.SetReferenceDeltaSink({}); + ensemble_patch_writer.AddElement(std::move(patch_element_writer)); + + TestSerialize(data, ensemble_patch_writer); +} + +TEST(EnsemblePatchTest, CheckFile) { + ByteVector data = { + // PatchHeader + 0x5A, 0x75, 0x63, 0x00, // magic + 0x05, 0x00, 0x00, 0x00, // old_size + 0xDF, 0x13, 0xE4, 0x10, // old_crc + 0x03, 0x00, 0x00, 0x00, // new_size + 0xDC, 0xF7, 0x00, 0x40, // new_crc + + 1, 0, 0, 0, // number of element + + // PatchElementHeader + 0x01, 0, 0, 0, // old_offset + 0x02, 0, 0, 0, // old_length + 0x00, 0, 0, 0, // new_offset + 0x03, 0, 0, 0, // new_length + 'P', 'x', '8', '6', // exe_type = EXE_TYPE_WIN32_X86 + // EquivalenceSource + 0, 0, 0, 0, // src_skip size + 0, 0, 0, 0, // dst_skip size + 0, 0, 0, 0, // copy_count size + // ExtraDataSource + 0x03, 0, 0, 0, // extra_data size + 'A', 'B', 'C', // extra_data content + // RawDeltaSource + 0, 0, 0, 0, // raw_delta_skip size + 0, 0, 0, 0, // raw_delta_diff size + // ReferenceDeltaSource + 0, 0, 0, 0, // reference_delta size + // PatchElementReader + 0, 0, 0, 0, // pool count + }; + + EnsemblePatchReader ensemble_patch_reader = + TestInitialize<EnsemblePatchReader>(&data); + + ByteVector old_file = {0x10, 0x32, 0x54, 0x76, 0x98}; + ByteVector new_file = {0xBA, 0xDC, 0xFE}; + + ConstBufferView old_image(old_file.data(), old_file.size()); + ConstBufferView new_image(new_file.data(), new_file.size()); + + EXPECT_TRUE(ensemble_patch_reader.CheckOldFile(old_image)); + EXPECT_TRUE(ensemble_patch_reader.CheckNewFile(new_image)); + EXPECT_FALSE(ensemble_patch_reader.CheckOldFile(new_image)); + EXPECT_FALSE(ensemble_patch_reader.CheckNewFile(old_image)); +} + +TEST(EnsemblePatchTest, InvalidMagic) { + ByteVector data = { + // PatchHeader + 0x42, 0x42, 0x42, 0x00, // magic + 0x10, 0x32, 0x54, 0x76, // old_size + 0x00, 0x11, 0x22, 0x33, // old_crc + 0x03, 0x00, 0x00, 0x00, // new_size + 0x44, 0x55, 0x66, 0x77, // new_crc + + 1, 0, 0, 0, // number of element + + // PatchElementHeader + 0x01, 0, 0, 0, // old_offset + 0x02, 0, 0, 0, // old_length + 0x00, 0, 0, 0, // new_offset + 0x03, 0, 0, 0, // new_length + 'P', 'x', '8', '6', // exe_type = EXE_TYPE_WIN32_X86 + // EquivalenceSource + 0, 0, 0, 0, // src_skip size + 0, 0, 0, 0, // dst_skip size + 0, 0, 0, 0, // copy_count size + // ExtraDataSource + 0, 0, 0, 0, // extra_data size + // RawDeltaSource + 0, 0, 0, 0, // raw_delta_skip size + 0, 0, 0, 0, // raw_delta_diff size + // ReferenceDeltaSource + 0, 0, 0, 0, // reference_delta size + // PatchElementReader + 0, 0, 0, 0, // pool count + }; + + TestInvalidInitialize<EnsemblePatchReader>(&data); +} + +} // namespace zucchini diff --git a/patch_reader.cc b/patch_reader.cc new file mode 100644 index 0000000..99951da --- /dev/null +++ b/patch_reader.cc @@ -0,0 +1,388 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/patch_reader.h" + +#include <type_traits> +#include <utility> + +#include "base/numerics/safe_conversions.h" +#include "components/zucchini/algorithm.h" +#include "components/zucchini/crc32.h" + +namespace zucchini { + +namespace patch { + +bool ParseElementMatch(BufferSource* source, ElementMatch* element_match) { + PatchElementHeader unsafe_element_header; + if (!source->GetValue(&unsafe_element_header)) { + LOG(ERROR) << "Impossible to read ElementMatch from source."; + return false; + } + ExecutableType exe_type = + CastToExecutableType(unsafe_element_header.exe_type); + if (exe_type == kExeTypeUnknown) { + LOG(ERROR) << "Invalid ExecutableType found."; + return false; + } + if (!unsafe_element_header.old_length || !unsafe_element_header.new_length) { + LOG(ERROR) << "Empty patch element found."; + return false; + } + // |unsafe_element_header| is now considered to be safe as it has a valid + // |exe_type| and the length fields are of sufficient size. + const auto& element_header = unsafe_element_header; + + // Caveat: Element offsets and lengths can still be invalid (e.g., exceeding + // archive bounds), but this will be checked later. + element_match->old_element.offset = element_header.old_offset; + element_match->old_element.size = element_header.old_length; + element_match->new_element.offset = element_header.new_offset; + element_match->new_element.size = element_header.new_length; + element_match->old_element.exe_type = exe_type; + element_match->new_element.exe_type = exe_type; + return true; +} + +bool ParseBuffer(BufferSource* source, BufferSource* buffer) { + uint32_t unsafe_size = 0; // Bytes. + static_assert(sizeof(size_t) >= sizeof(unsafe_size), + "size_t is expected to be larger than uint32_t."); + if (!source->GetValue(&unsafe_size)) { + LOG(ERROR) << "Impossible to read buffer size from source."; + return false; + } + if (!source->GetRegion(static_cast<size_t>(unsafe_size), buffer)) { + LOG(ERROR) << "Impossible to read buffer content from source."; + return false; + } + // Caveat: |buffer| is considered to be safe as it was possible to extract it + // from the patch. However, this does not mean its contents are safe and when + // parsed must be validated if possible. + return true; +} + +} // namespace patch + +/******** EquivalenceSource ********/ + +EquivalenceSource::EquivalenceSource() = default; +EquivalenceSource::EquivalenceSource(const EquivalenceSource&) = default; +EquivalenceSource::~EquivalenceSource() = default; + +bool EquivalenceSource::Initialize(BufferSource* source) { + return patch::ParseBuffer(source, &src_skip_) && + patch::ParseBuffer(source, &dst_skip_) && + patch::ParseBuffer(source, ©_count_); +} + +absl::optional<Equivalence> EquivalenceSource::GetNext() { + if (src_skip_.empty() || dst_skip_.empty() || copy_count_.empty()) + return absl::nullopt; + + Equivalence equivalence = {}; + + uint32_t length = 0; + if (!patch::ParseVarUInt<uint32_t>(©_count_, &length)) + return absl::nullopt; + equivalence.length = base::strict_cast<offset_t>(length); + + int32_t src_offset_diff = 0; // Intentionally signed. + if (!patch::ParseVarInt<int32_t>(&src_skip_, &src_offset_diff)) + return absl::nullopt; + base::CheckedNumeric<offset_t> src_offset = + previous_src_offset_ + src_offset_diff; + if (!src_offset.IsValid()) + return absl::nullopt; + + equivalence.src_offset = src_offset.ValueOrDie(); + previous_src_offset_ = src_offset + equivalence.length; + if (!previous_src_offset_.IsValid()) + return absl::nullopt; + + uint32_t dst_offset_diff = 0; // Intentionally unsigned. + if (!patch::ParseVarUInt<uint32_t>(&dst_skip_, &dst_offset_diff)) + return absl::nullopt; + base::CheckedNumeric<offset_t> dst_offset = + previous_dst_offset_ + dst_offset_diff; + if (!dst_offset.IsValid()) + return absl::nullopt; + + equivalence.dst_offset = dst_offset.ValueOrDie(); + previous_dst_offset_ = equivalence.dst_offset + equivalence.length; + if (!previous_dst_offset_.IsValid()) + return absl::nullopt; + + // Caveat: |equivalence| is assumed to be safe only once the + // ValidateEquivalencesAndExtraData() method has returned true. Prior to this + // any equivalence returned is assumed to be unsafe. + return equivalence; +} + +/******** ExtraDataSource ********/ + +ExtraDataSource::ExtraDataSource() = default; +ExtraDataSource::ExtraDataSource(const ExtraDataSource&) = default; +ExtraDataSource::~ExtraDataSource() = default; + +bool ExtraDataSource::Initialize(BufferSource* source) { + return patch::ParseBuffer(source, &extra_data_); +} + +absl::optional<ConstBufferView> ExtraDataSource::GetNext(offset_t size) { + ConstBufferView buffer; + if (!extra_data_.GetRegion(size, &buffer)) + return absl::nullopt; + // |buffer| is assumed to always be safe/valid. + return buffer; +} + +/******** RawDeltaSource ********/ + +RawDeltaSource::RawDeltaSource() = default; +RawDeltaSource::RawDeltaSource(const RawDeltaSource&) = default; +RawDeltaSource::~RawDeltaSource() = default; + +bool RawDeltaSource::Initialize(BufferSource* source) { + return patch::ParseBuffer(source, &raw_delta_skip_) && + patch::ParseBuffer(source, &raw_delta_diff_); +} + +absl::optional<RawDeltaUnit> RawDeltaSource::GetNext() { + if (raw_delta_skip_.empty() || raw_delta_diff_.empty()) + return absl::nullopt; + + RawDeltaUnit raw_delta = {}; + uint32_t copy_offset_diff = 0; + if (!patch::ParseVarUInt<uint32_t>(&raw_delta_skip_, ©_offset_diff)) + return absl::nullopt; + base::CheckedNumeric<offset_t> copy_offset = + copy_offset_diff + copy_offset_compensation_; + if (!copy_offset.IsValid()) + return absl::nullopt; + raw_delta.copy_offset = copy_offset.ValueOrDie(); + + if (!raw_delta_diff_.GetValue<int8_t>(&raw_delta.diff)) + return absl::nullopt; + + // A 0 value for a delta.diff is considered invalid since it has no meaning. + if (!raw_delta.diff) + return absl::nullopt; + + // We keep track of the compensation needed for next offset, taking into + // account delta encoding and bias of -1. + copy_offset_compensation_ = copy_offset + 1; + if (!copy_offset_compensation_.IsValid()) + return absl::nullopt; + // |raw_delta| is assumed to always be safe/valid. + return raw_delta; +} + +/******** ReferenceDeltaSource ********/ + +ReferenceDeltaSource::ReferenceDeltaSource() = default; +ReferenceDeltaSource::ReferenceDeltaSource(const ReferenceDeltaSource&) = + default; +ReferenceDeltaSource::~ReferenceDeltaSource() = default; + +bool ReferenceDeltaSource::Initialize(BufferSource* source) { + return patch::ParseBuffer(source, &source_); +} + +absl::optional<int32_t> ReferenceDeltaSource::GetNext() { + if (source_.empty()) + return absl::nullopt; + int32_t ref_delta = 0; + if (!patch::ParseVarInt<int32_t>(&source_, &ref_delta)) + return absl::nullopt; + // |ref_delta| is assumed to always be safe/valid. + return ref_delta; +} + +/******** TargetSource ********/ + +TargetSource::TargetSource() = default; +TargetSource::TargetSource(const TargetSource&) = default; +TargetSource::~TargetSource() = default; + +bool TargetSource::Initialize(BufferSource* source) { + return patch::ParseBuffer(source, &extra_targets_); +} + +absl::optional<offset_t> TargetSource::GetNext() { + if (extra_targets_.empty()) + return absl::nullopt; + + uint32_t target_diff = 0; + if (!patch::ParseVarUInt<uint32_t>(&extra_targets_, &target_diff)) + return absl::nullopt; + base::CheckedNumeric<offset_t> target = target_diff + target_compensation_; + if (!target.IsValid()) + return absl::nullopt; + + // We keep track of the compensation needed for next target, taking into + // account delta encoding and bias of -1. + target_compensation_ = target + 1; + if (!target_compensation_.IsValid()) + return absl::nullopt; + // Caveat: |target| will be a valid offset_t, but it's up to the caller to + // check whether it's a valid offset for an image. + return offset_t(target.ValueOrDie()); +} + +/******** PatchElementReader ********/ + +PatchElementReader::PatchElementReader() = default; +PatchElementReader::PatchElementReader(PatchElementReader&&) = default; +PatchElementReader::~PatchElementReader() = default; + +bool PatchElementReader::Initialize(BufferSource* source) { + bool ok = + patch::ParseElementMatch(source, &element_match_) && + equivalences_.Initialize(source) && extra_data_.Initialize(source) && + ValidateEquivalencesAndExtraData() && raw_delta_.Initialize(source) && + reference_delta_.Initialize(source); + if (!ok) + return false; + uint32_t pool_count = 0; + if (!source->GetValue(&pool_count)) { + LOG(ERROR) << "Impossible to read pool_count from source."; + return false; + } + for (uint32_t i = 0; i < pool_count; ++i) { + uint8_t pool_tag_value = 0; + if (!source->GetValue(&pool_tag_value)) { + LOG(ERROR) << "Impossible to read pool_tag from source."; + return false; + } + PoolTag pool_tag(pool_tag_value); + if (pool_tag == kNoPoolTag) { + LOG(ERROR) << "Invalid pool_tag encountered in ExtraTargetList."; + return false; + } + auto insert_result = extra_targets_.insert({pool_tag, {}}); + if (!insert_result.second) { // Element already present. + LOG(ERROR) << "Multiple ExtraTargetList found for the same pool_tag."; + return false; + } + if (!insert_result.first->second.Initialize(source)) + return false; + } + return true; +} + +bool PatchElementReader::ValidateEquivalencesAndExtraData() { + EquivalenceSource equivalences_copy = equivalences_; + + const size_t old_region_size = element_match_.old_element.size; + const size_t new_region_size = element_match_.new_element.size; + + base::CheckedNumeric<uint32_t> total_length = 0; + // Validate that each |equivalence| falls within the bounds of the + // |element_match_| and are in order. + offset_t prev_dst_end = 0; + for (auto equivalence = equivalences_copy.GetNext(); equivalence.has_value(); + equivalence = equivalences_copy.GetNext()) { + if (!RangeIsBounded(equivalence->src_offset, equivalence->length, + old_region_size) || + !RangeIsBounded(equivalence->dst_offset, equivalence->length, + new_region_size)) { + LOG(ERROR) << "Out of bounds equivalence detected."; + return false; + } + if (prev_dst_end > equivalence->dst_end()) { + LOG(ERROR) << "Out of order equivalence detected."; + return false; + } + prev_dst_end = equivalence->dst_end(); + total_length += equivalence->length; + } + if (!total_length.IsValid() || + element_match_.new_element.region().size < total_length.ValueOrDie() || + extra_data_.extra_data().size() != + element_match_.new_element.region().size - + static_cast<size_t>(total_length.ValueOrDie())) { + LOG(ERROR) << "Incorrect amount of extra_data."; + return false; + } + return true; +} + +/******** EnsemblePatchReader ********/ + +absl::optional<EnsemblePatchReader> EnsemblePatchReader::Create( + ConstBufferView buffer) { + BufferSource source(buffer); + EnsemblePatchReader patch; + if (!patch.Initialize(&source)) + return absl::nullopt; + return patch; +} + +EnsemblePatchReader::EnsemblePatchReader() = default; +EnsemblePatchReader::EnsemblePatchReader(EnsemblePatchReader&&) = default; +EnsemblePatchReader::~EnsemblePatchReader() = default; + +bool EnsemblePatchReader::Initialize(BufferSource* source) { + if (!source->GetValue(&header_)) { + LOG(ERROR) << "Impossible to read header from source."; + return false; + } + if (header_.magic != PatchHeader::kMagic) { + LOG(ERROR) << "Patch contains invalid magic."; + return false; + } + // |header_| is assumed to be safe from this point forward. + + uint32_t element_count = 0; + if (!source->GetValue(&element_count)) { + LOG(ERROR) << "Impossible to read element_count from source."; + return false; + } + + offset_t current_dst_offset = 0; + for (uint32_t i = 0; i < element_count; ++i) { + PatchElementReader element_patch; + if (!element_patch.Initialize(source)) + return false; + + if (!element_patch.old_element().FitsIn(header_.old_size) || + !element_patch.new_element().FitsIn(header_.new_size)) { + LOG(ERROR) << "Invalid element encountered."; + return false; + } + + if (element_patch.new_element().offset != current_dst_offset) { + LOG(ERROR) << "Invalid element encountered."; + return false; + } + current_dst_offset = element_patch.new_element().EndOffset(); + + elements_.push_back(std::move(element_patch)); + } + if (current_dst_offset != header_.new_size) { + LOG(ERROR) << "Patch elements don't fully cover new image file."; + return false; + } + + if (!source->empty()) { + LOG(ERROR) << "Patch was not fully consumed."; + return false; + } + + return true; +} + +bool EnsemblePatchReader::CheckOldFile(ConstBufferView old_image) const { + return old_image.size() == header_.old_size && + CalculateCrc32(old_image.begin(), old_image.end()) == header_.old_crc; +} + +bool EnsemblePatchReader::CheckNewFile(ConstBufferView new_image) const { + return new_image.size() == header_.new_size && + CalculateCrc32(new_image.begin(), new_image.end()) == header_.new_crc; +} + +} // namespace zucchini diff --git a/patch_reader.h b/patch_reader.h new file mode 100644 index 0000000..93d64b0 --- /dev/null +++ b/patch_reader.h @@ -0,0 +1,285 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_PATCH_READER_H_ +#define COMPONENTS_ZUCCHINI_PATCH_READER_H_ + +#include <stddef.h> +#include <stdint.h> + +#include <map> +#include <vector> + +#include "base/debug/stack_trace.h" +#include "base/logging.h" +#include "base/numerics/checked_math.h" +#include "components/zucchini/buffer_source.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/image_utils.h" +#include "components/zucchini/patch_utils.h" +#include "third_party/abseil-cpp/absl/types/optional.h" + +namespace zucchini { + +namespace patch { + +// The Parse*() functions below attempt to extract data of a specific type from +// the beginning of |source|. A parse function: On success, consumes the used +// portion of |source|, writes data into the output parameter, and returns +// true. Otherwise returns false and does not consume |source|. + +// Parses |source| for the next ElementMatch. +bool ParseElementMatch(BufferSource* source, ElementMatch* element_match); + +// Parses |source| for the next embedded BufferSource. +bool ParseBuffer(BufferSource* source, BufferSource* buffer); + +// Parses |source| for the next VarUInt. +template <class T> +bool ParseVarUInt(BufferSource* source, T* value) { + auto bytes_read = DecodeVarUInt(source->begin(), source->end(), value); + if (!bytes_read) { + LOG(ERROR) << "Impossible to read VarUInt from source."; + LOG(ERROR) << base::debug::StackTrace().ToString(); + return false; + } + // Advance |source| beyond the VarUInt value. + source->Skip(bytes_read); + return true; +} + +// Parses |source| for the next VarInt. +template <class T> +bool ParseVarInt(BufferSource* source, T* value) { + auto bytes_read = DecodeVarInt(source->begin(), source->end(), value); + if (!bytes_read) { + LOG(ERROR) << "Impossible to read VarInt from source."; + LOG(ERROR) << base::debug::StackTrace().ToString(); + return false; + } + // Advance |source| beyond the VarInt value. + source->Skip(bytes_read); + return true; +} + +} // namespace patch + +// The *Source classes below are light-weight (i.e., allows copying) visitors to +// read patch data. Each of them has an associated "main type", and performs the +// following: +// - Consumes portions of a BufferSource (required to remain valid for the +// lifetime of the object). +// - Decodes consumed data, which represent a list of items with "main type". +// - Dispenses "main type" elements (hence "Source" in the name). +// +// Common "core functions" implemented by *Source classes are: +// - bool Initialize(BufferSource* source): Consumes data from BufferSource and +// initializes internal states. Returns true if successful, and false +// otherwise (|source| may be partially consumed). +// - absl::optional<MAIN_TYPE> GetNext(OPT_PARAMS): Decodes consumed data and +// returns the next item as absl::optional (returns absl::nullopt on failure). +// - bool Done() const: Returns true if no more items remain; otherwise false. +// +// Usage of *Source instances don't mix, and GetNext() have dissimilar +// interfaces. Therefore we do not use inheritance to relate *Source classes, +// and simply implement "core functions" with matching names. + +// Source for Equivalences. +class EquivalenceSource { + public: + EquivalenceSource(); + EquivalenceSource(const EquivalenceSource&); + ~EquivalenceSource(); + + // Core functions. + bool Initialize(BufferSource* source); + absl::optional<Equivalence> GetNext(); + bool Done() const { + return src_skip_.empty() && dst_skip_.empty() && copy_count_.empty(); + } + + // Accessors for unittest. + BufferSource src_skip() const { return src_skip_; } + BufferSource dst_skip() const { return dst_skip_; } + BufferSource copy_count() const { return copy_count_; } + + private: + BufferSource src_skip_; + BufferSource dst_skip_; + BufferSource copy_count_; + + base::CheckedNumeric<offset_t> previous_src_offset_ = 0; + base::CheckedNumeric<offset_t> previous_dst_offset_ = 0; +}; + +// Source for extra data. +class ExtraDataSource { + public: + ExtraDataSource(); + ExtraDataSource(const ExtraDataSource&); + ~ExtraDataSource(); + + // Core functions. + bool Initialize(BufferSource* source); + // |size| is the size in bytes of the buffer requested. + absl::optional<ConstBufferView> GetNext(offset_t size); + bool Done() const { return extra_data_.empty(); } + + // Accessors for unittest. + BufferSource extra_data() const { return extra_data_; } + + private: + BufferSource extra_data_; +}; + +// Source for raw delta. +class RawDeltaSource { + public: + RawDeltaSource(); + RawDeltaSource(const RawDeltaSource&); + ~RawDeltaSource(); + + // Core functions. + bool Initialize(BufferSource* source); + absl::optional<RawDeltaUnit> GetNext(); + bool Done() const { + return raw_delta_skip_.empty() && raw_delta_diff_.empty(); + } + + // Accessors for unittest. + BufferSource raw_delta_skip() const { return raw_delta_skip_; } + BufferSource raw_delta_diff() const { return raw_delta_diff_; } + + private: + BufferSource raw_delta_skip_; + BufferSource raw_delta_diff_; + + base::CheckedNumeric<offset_t> copy_offset_compensation_ = 0; +}; + +// Source for reference delta. +class ReferenceDeltaSource { + public: + ReferenceDeltaSource(); + ReferenceDeltaSource(const ReferenceDeltaSource&); + ~ReferenceDeltaSource(); + + // Core functions. + bool Initialize(BufferSource* source); + absl::optional<int32_t> GetNext(); + bool Done() const { return source_.empty(); } + + // Accessors for unittest. + BufferSource reference_delta() const { return source_; } + + private: + BufferSource source_; +}; + +// Source for additional targets. +class TargetSource { + public: + TargetSource(); + TargetSource(const TargetSource&); + ~TargetSource(); + + // Core functions. + bool Initialize(BufferSource* source); + absl::optional<offset_t> GetNext(); + bool Done() const { return extra_targets_.empty(); } + + // Accessors for unittest. + BufferSource extra_targets() const { return extra_targets_; } + + private: + BufferSource extra_targets_; + + base::CheckedNumeric<offset_t> target_compensation_ = 0; +}; + +// Following are utility classes providing a structured view on data forming a +// patch. + +// Utility to read a patch element. A patch element contains all the information +// necessary to patch a single element. This class provide access +// to the multiple streams of data forming the patch element. +class PatchElementReader { + public: + PatchElementReader(); + PatchElementReader(PatchElementReader&&); + ~PatchElementReader(); + + // If data read from |source| is well-formed, initialize cached sources to + // read from it, and returns true. Otherwise returns false. + bool Initialize(BufferSource* source); + + const ElementMatch& element_match() const { return element_match_; } + const Element& old_element() const { return element_match_.old_element; } + const Element& new_element() const { return element_match_.new_element; } + + // The Get*() functions below return copies of cached sources. Callers may + // assume the following: + // - Equivalences satisfy basic boundary constraints + // - "Old" / "new" blocks lie entirely in "old" / "new" images. + // - "New" blocks are sorted. + EquivalenceSource GetEquivalenceSource() const { return equivalences_; } + ExtraDataSource GetExtraDataSource() const { return extra_data_; } + RawDeltaSource GetRawDeltaSource() const { return raw_delta_; } + ReferenceDeltaSource GetReferenceDeltaSource() const { + return reference_delta_; + } + TargetSource GetExtraTargetSource(PoolTag tag) const { + auto pos = extra_targets_.find(tag); + return pos != extra_targets_.end() ? pos->second : TargetSource(); + } + + private: + // Checks that "old" and "new" blocks of each item in |equivalences_| satisfy + // basic order and image bound constraints (using |element_match_| data). Also + // validates that the amount of extra data is correct. Returns true if + // successful. + bool ValidateEquivalencesAndExtraData(); + + ElementMatch element_match_; + + // Cached sources. + EquivalenceSource equivalences_; + ExtraDataSource extra_data_; + RawDeltaSource raw_delta_; + ReferenceDeltaSource reference_delta_; + std::map<PoolTag, TargetSource> extra_targets_; +}; + +// Utility to read a Zucchini ensemble patch. An ensemble patch is the +// concatenation of a patch header with a vector of patch elements. +class EnsemblePatchReader { + public: + // If data read from |buffer| is well-formed, initializes and returns + // an instance of EnsemblePatchReader. Otherwise returns absl::nullopt. + static absl::optional<EnsemblePatchReader> Create(ConstBufferView buffer); + + EnsemblePatchReader(); + EnsemblePatchReader(EnsemblePatchReader&&); + ~EnsemblePatchReader(); + + // If data read from |source| is well-formed, initialize internal state to + // read from it, and returns true. Otherwise returns false. + bool Initialize(BufferSource* source); + + // Check old / new image file validity, comparing against expected size and + // CRC32. Return true if file matches expectations, false otherwise. + bool CheckOldFile(ConstBufferView old_image) const; + bool CheckNewFile(ConstBufferView new_image) const; + + const PatchHeader& header() const { return header_; } + const std::vector<PatchElementReader>& elements() const { return elements_; } + + private: + PatchHeader header_; + std::vector<PatchElementReader> elements_; +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_PATCH_READER_H_ diff --git a/patch_utils.h b/patch_utils.h new file mode 100644 index 0000000..5f49195 --- /dev/null +++ b/patch_utils.h @@ -0,0 +1,135 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_PATCH_UTILS_H_ +#define COMPONENTS_ZUCCHINI_PATCH_UTILS_H_ + +#include <stdint.h> + +#include <iterator> +#include <type_traits> + +#include "components/zucchini/image_utils.h" + +namespace zucchini { + +// A Zucchini 'ensemble' patch is the concatenation of a patch header with a +// list of patch 'elements', each containing data for patching individual +// elements. + +// Supported by MSVC, g++, and clang++. Ensures no gaps in packing. +#pragma pack(push, 1) + +// Header for a Zucchini patch, found at the beginning of an ensemble patch. +struct PatchHeader { + // Magic signature at the beginning of a Zucchini patch file. + enum : uint32_t { kMagic = 'Z' | ('u' << 8) | ('c' << 16) }; + + uint32_t magic = 0; + uint32_t old_size = 0; + uint32_t old_crc = 0; + uint32_t new_size = 0; + uint32_t new_crc = 0; +}; + +// Sanity check. +static_assert(sizeof(PatchHeader) == 20, "PatchHeader must be 20 bytes"); + +// Header for a patch element, found at the beginning of every patch element. +struct PatchElementHeader { + uint32_t old_offset; + uint32_t old_length; + uint32_t new_offset; + uint32_t new_length; + uint32_t exe_type; // ExecutableType. +}; + +// Sanity check. +static_assert(sizeof(PatchElementHeader) == 20, + "PatchElementHeader must be 20 bytes"); + +#pragma pack(pop) + +// Descibes a raw FIX operation. +struct RawDeltaUnit { + offset_t copy_offset; // Offset in copy regions. + int8_t diff; // Bytewise difference. +}; + +// A Zucchini patch contains data streams encoded using varint format to reduce +// uncompressed size. + +// Writes |value| as a varint in |dst| and returns an iterator pointing beyond +// the written region. |dst| is assumed to hold enough space. Typically, this +// will write to a vector using back insertion, e.g.: +// EncodeVarUInt(value, std::back_inserter(vector)); +template <class T, class It> +It EncodeVarUInt(T value, It dst) { + static_assert(std::is_unsigned<T>::value, "Value type must be unsigned"); + + while (value >= 0x80) { + *dst++ = static_cast<uint8_t>(value) | 0x80; + value >>= 7; + } + *dst++ = static_cast<uint8_t>(value); + return dst; +} + +// Same as EncodeVarUInt(), but for signed values. +template <class T, class It> +It EncodeVarInt(T value, It dst) { + static_assert(std::is_signed<T>::value, "Value type must be signed"); + + using unsigned_value_type = typename std::make_unsigned<T>::type; + if (value < 0) + return EncodeVarUInt((unsigned_value_type(~value) << 1) | 1, dst); + else + return EncodeVarUInt(unsigned_value_type(value) << 1, dst); +} + +// Tries to read a varint unsigned integer from |[first, last)|. If +// succesful, writes result into |value| and returns the number of bytes +// read from |[first, last)|. Otherwise returns 0. +template <class T, class It> +typename std::iterator_traits<It>::difference_type DecodeVarUInt(It first, + It last, + T* value) { + static_assert(std::is_unsigned<T>::value, "Value type must be unsigned"); + + uint8_t sh = 0; + T val = 0; + for (auto it = first; it != last;) { + val |= T(*it & 0x7F) << sh; + if (*(it++) < 0x80) { + *value = val; + return it - first; + } + sh += 7; + if (sh >= sizeof(T) * 8) // Overflow! + return 0; + } + return 0; +} + +// Same as DecodeVarUInt(), but for signed values. +template <class T, class It> +typename std::iterator_traits<It>::difference_type DecodeVarInt(It first, + It last, + T* value) { + static_assert(std::is_signed<T>::value, "Value type must be signed"); + + typename std::make_unsigned<T>::type tmp = 0; + auto res = DecodeVarUInt(first, last, &tmp); + if (res) { + if (tmp & 1) + *value = ~static_cast<T>(tmp >> 1); + else + *value = static_cast<T>(tmp >> 1); + } + return res; +} + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_PATCH_UTILS_H_ diff --git a/patch_utils_unittest.cc b/patch_utils_unittest.cc new file mode 100644 index 0000000..81e4e38 --- /dev/null +++ b/patch_utils_unittest.cc @@ -0,0 +1,169 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/patch_utils.h" + +#include <stdint.h> + +#include <iterator> +#include <vector> + +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +template <class T> +void TestEncodeDecodeVarUInt(const std::vector<T>& data) { + std::vector<uint8_t> buffer; + + std::vector<T> values; + for (T basis : data) { + // For variety, test the neighborhood values for each case in |data|. Some + // test cases may result in overflow when computing |value|, but we don't + // care about that. + for (int delta = -4; delta <= 4; ++delta) { + T value = delta + basis; + EncodeVarUInt<T>(value, std::back_inserter(buffer)); + values.push_back(value); + + value = delta - basis; + EncodeVarUInt<T>(value, std::back_inserter(buffer)); + values.push_back(value); + } + } + + auto it = buffer.begin(); + for (T expected : values) { + T value = T(-1); + auto res = DecodeVarUInt(it, buffer.end(), &value); + EXPECT_NE(0, res); + EXPECT_EQ(expected, value); + it += res; + } + EXPECT_EQ(it, buffer.end()); + + T value = T(-1); + auto res = DecodeVarUInt(it, buffer.end(), &value); + EXPECT_EQ(0, res); + EXPECT_EQ(T(-1), value); +} + +template <class T> +void TestEncodeDecodeVarInt(const std::vector<T>& data) { + std::vector<uint8_t> buffer; + + std::vector<T> values; + for (T basis : data) { + // For variety, test the neighborhood values for each case in |data|. Some + // test cases may result in overflow when computing |value|, but we don't + // care about that. + for (int delta = -4; delta <= 4; ++delta) { + T value = delta + basis; + EncodeVarInt(value, std::back_inserter(buffer)); + values.push_back(value); + + value = delta - basis; + EncodeVarInt(value, std::back_inserter(buffer)); + values.push_back(value); + } + } + + auto it = buffer.begin(); + for (T expected : values) { + T value = T(-1); + auto res = DecodeVarInt(it, buffer.end(), &value); + EXPECT_NE(0, res); + EXPECT_EQ(expected, value); + it += res; + } + EXPECT_EQ(it, buffer.end()); + + T value = T(-1); + auto res = DecodeVarInt(it, buffer.end(), &value); + EXPECT_EQ(0, res); + EXPECT_EQ(T(-1), value); +} + +TEST(PatchUtilsTest, EncodeDecodeVarUInt32) { + TestEncodeDecodeVarUInt<uint32_t>({0, 64, 128, 8192, 16384, 1 << 20, 1 << 21, + 1 << 22, 1 << 27, 1 << 28, 0x7FFFFFFFU, + UINT32_MAX}); +} + +TEST(PatchUtilsTest, EncodeDecodeVarInt32) { + TestEncodeDecodeVarInt<int32_t>({0, 64, 128, 8192, 16384, 1 << 20, 1 << 21, + 1 << 22, 1 << 27, 1 << 28, -1, INT32_MIN, + INT32_MAX}); +} + +TEST(PatchUtilsTest, EncodeDecodeVarUInt64) { + TestEncodeDecodeVarUInt<uint64_t>({0, 64, 128, 8192, 16384, 1 << 20, 1 << 21, + 1 << 22, 1ULL << 55, 1ULL << 56, + 0x7FFFFFFFFFFFFFFFULL, UINT64_MAX}); +} + +TEST(PatchUtilsTest, EncodeDecodeVarInt64) { + TestEncodeDecodeVarInt<int64_t>({0, 64, 128, 8192, 16384, 1 << 20, 1 << 21, + 1 << 22, 1LL << 55, 1LL << 56, -1, INT64_MIN, + INT64_MAX}); +} + +TEST(PatchUtilsTest, DecodeVarUInt32Malformed) { + constexpr uint32_t kUninit = static_cast<uint32_t>(-1LL); + + // Output variable to ensure that on failure, the output variable is not + // written to. + uint32_t value = uint32_t(-1); + + auto TestDecodeVarInt = [&value](const std::vector<uint8_t>& buffer) { + value = kUninit; + return DecodeVarUInt(buffer.begin(), buffer.end(), &value); + }; + + // Exhausted. + EXPECT_EQ(0, TestDecodeVarInt(std::vector<uint8_t>{})); + EXPECT_EQ(kUninit, value); + EXPECT_EQ(0, TestDecodeVarInt(std::vector<uint8_t>(4, 128))); + EXPECT_EQ(kUninit, value); + + // Overflow. + EXPECT_EQ(0, TestDecodeVarInt(std::vector<uint8_t>(6, 128))); + EXPECT_EQ(kUninit, value); + EXPECT_EQ(0, TestDecodeVarInt({128, 128, 128, 128, 128, 42})); + EXPECT_EQ(kUninit, value); + + // Following are pathological cases that are not handled for simplicity, + // hence decoding is expected to be successful. + EXPECT_NE(0, TestDecodeVarInt({128, 128, 128, 128, 16})); + EXPECT_EQ(uint32_t(0), value); + EXPECT_NE(0, TestDecodeVarInt({128, 128, 128, 128, 32})); + EXPECT_EQ(uint32_t(0), value); + EXPECT_NE(0, TestDecodeVarInt({128, 128, 128, 128, 64})); + EXPECT_EQ(uint32_t(0), value); +} + +TEST(PatchUtilsTest, DecodeVarUInt64Malformed) { + constexpr uint64_t kUninit = static_cast<uint64_t>(-1); + + uint64_t value = kUninit; + auto TestDecodeVarInt = [&value](const std::vector<uint8_t>& buffer) { + value = kUninit; + return DecodeVarUInt(buffer.begin(), buffer.end(), &value); + }; + + // Exhausted. + EXPECT_EQ(0, TestDecodeVarInt(std::vector<uint8_t>{})); + EXPECT_EQ(kUninit, value); + EXPECT_EQ(0, TestDecodeVarInt(std::vector<uint8_t>(9, 128))); + EXPECT_EQ(kUninit, value); + + // Overflow. + EXPECT_EQ(0, TestDecodeVarInt(std::vector<uint8_t>(10, 128))); + EXPECT_EQ(kUninit, value); + EXPECT_EQ(0, TestDecodeVarInt( + {128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 42})); + EXPECT_EQ(kUninit, value); +} + +} // namespace zucchini diff --git a/patch_writer.cc b/patch_writer.cc new file mode 100644 index 0000000..1206208 --- /dev/null +++ b/patch_writer.cc @@ -0,0 +1,291 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/patch_writer.h" + +#include <algorithm> +#include <iterator> + +#include "base/numerics/checked_math.h" +#include "base/numerics/safe_conversions.h" +#include "components/zucchini/crc32.h" + +namespace zucchini { + +namespace patch { + +bool SerializeElementMatch(const ElementMatch& element_match, + BufferSink* sink) { + if (!element_match.IsValid()) + return false; + + PatchElementHeader element_header; + element_header.old_offset = + base::checked_cast<uint32_t>(element_match.old_element.offset); + element_header.old_length = + base::checked_cast<uint32_t>(element_match.old_element.size); + element_header.new_offset = + base::checked_cast<uint32_t>(element_match.new_element.offset); + element_header.new_length = + base::checked_cast<uint32_t>(element_match.new_element.size); + element_header.exe_type = element_match.exe_type(); + + return sink->PutValue<PatchElementHeader>(element_header); +} + +size_t SerializedElementMatchSize(const ElementMatch& element_match) { + return sizeof(PatchElementHeader); +} + +bool SerializeBuffer(const std::vector<uint8_t>& buffer, BufferSink* sink) { + // buffer.size() is not encoded as varint to simplify SerializedBufferSize(). + base::CheckedNumeric<uint32_t> size = buffer.size(); + if (!size.IsValid()) + return false; + return sink->PutValue<uint32_t>(size.ValueOrDie()) && + sink->PutRange(buffer.begin(), buffer.end()); +} + +size_t SerializedBufferSize(const std::vector<uint8_t>& buffer) { + return sizeof(uint32_t) + buffer.size(); +} + +} // namespace patch + +/******** EquivalenceSink ********/ + +EquivalenceSink::EquivalenceSink() = default; +EquivalenceSink::EquivalenceSink(const std::vector<uint8_t>& src_skip, + const std::vector<uint8_t>& dst_skip, + const std::vector<uint8_t>& copy_count) + : src_skip_(src_skip), dst_skip_(dst_skip), copy_count_(copy_count) {} + +EquivalenceSink::EquivalenceSink(EquivalenceSink&&) = default; +EquivalenceSink::~EquivalenceSink() = default; + +void EquivalenceSink::PutNext(const Equivalence& equivalence) { + // Equivalences are expected to be given ordered by |dst_offset|. + DCHECK_GE(equivalence.dst_offset, dst_offset_); + // Unsigned values are ensured by above check. + + // Result of substracting 2 unsigned integers is unsigned. Overflow is allowed + // for negative values, as long as uint32_t can hold the result. + uint32_t src_offset_diff = + base::strict_cast<uint32_t>(equivalence.src_offset - src_offset_); + EncodeVarInt<int32_t>(static_cast<int32_t>(src_offset_diff), + std::back_inserter(src_skip_)); + + EncodeVarUInt<uint32_t>( + base::strict_cast<uint32_t>(equivalence.dst_offset - dst_offset_), + std::back_inserter(dst_skip_)); + + EncodeVarUInt<uint32_t>(base::strict_cast<uint32_t>(equivalence.length), + std::back_inserter(copy_count_)); + + src_offset_ = equivalence.src_offset + equivalence.length; + dst_offset_ = equivalence.dst_offset + equivalence.length; +} + +size_t EquivalenceSink::SerializedSize() const { + return patch::SerializedBufferSize(src_skip_) + + patch::SerializedBufferSize(dst_skip_) + + patch::SerializedBufferSize(copy_count_); +} + +bool EquivalenceSink::SerializeInto(BufferSink* sink) const { + return patch::SerializeBuffer(src_skip_, sink) && + patch::SerializeBuffer(dst_skip_, sink) && + patch::SerializeBuffer(copy_count_, sink); +} + +/******** ExtraDataSink ********/ + +ExtraDataSink::ExtraDataSink() = default; +ExtraDataSink::ExtraDataSink(const std::vector<uint8_t>& extra_data) + : extra_data_(extra_data) {} + +ExtraDataSink::ExtraDataSink(ExtraDataSink&&) = default; +ExtraDataSink::~ExtraDataSink() = default; + +void ExtraDataSink::PutNext(ConstBufferView region) { + extra_data_.insert(extra_data_.end(), region.begin(), region.end()); +} + +size_t ExtraDataSink::SerializedSize() const { + return patch::SerializedBufferSize(extra_data_); +} + +bool ExtraDataSink::SerializeInto(BufferSink* sink) const { + return patch::SerializeBuffer(extra_data_, sink); +} + +/******** RawDeltaSink ********/ + +RawDeltaSink::RawDeltaSink() = default; +RawDeltaSink::RawDeltaSink(const std::vector<uint8_t>& raw_delta_skip, + const std::vector<uint8_t>& raw_delta_diff) + : raw_delta_skip_(raw_delta_skip), raw_delta_diff_(raw_delta_diff) {} + +RawDeltaSink::RawDeltaSink(RawDeltaSink&&) = default; +RawDeltaSink::~RawDeltaSink() = default; + +void RawDeltaSink::PutNext(const RawDeltaUnit& delta) { + DCHECK_GE(delta.copy_offset, copy_offset_compensation_); + EncodeVarUInt<uint32_t>(base::strict_cast<uint32_t>( + delta.copy_offset - copy_offset_compensation_), + std::back_inserter(raw_delta_skip_)); + + copy_offset_compensation_ = delta.copy_offset + 1; + + raw_delta_diff_.push_back(delta.diff); +} + +size_t RawDeltaSink::SerializedSize() const { + return patch::SerializedBufferSize(raw_delta_skip_) + + patch::SerializedBufferSize(raw_delta_diff_); +} + +bool RawDeltaSink::SerializeInto(BufferSink* sink) const { + return patch::SerializeBuffer(raw_delta_skip_, sink) && + patch::SerializeBuffer(raw_delta_diff_, sink); +} + +/******** ReferenceDeltaSink ********/ + +ReferenceDeltaSink::ReferenceDeltaSink() = default; +ReferenceDeltaSink::ReferenceDeltaSink( + const std::vector<uint8_t>& reference_delta) + : reference_delta_(reference_delta) {} + +ReferenceDeltaSink::ReferenceDeltaSink(ReferenceDeltaSink&&) = default; +ReferenceDeltaSink::~ReferenceDeltaSink() = default; + +void ReferenceDeltaSink::PutNext(int32_t diff) { + EncodeVarInt<int32_t>(diff, std::back_inserter(reference_delta_)); +} + +size_t ReferenceDeltaSink::SerializedSize() const { + return patch::SerializedBufferSize(reference_delta_); +} + +bool ReferenceDeltaSink::SerializeInto(BufferSink* sink) const { + return patch::SerializeBuffer(reference_delta_, sink); +} + +/******** TargetSink ********/ + +TargetSink::TargetSink() = default; +TargetSink::TargetSink(const std::vector<uint8_t>& extra_targets) + : extra_targets_(extra_targets) {} + +TargetSink::TargetSink(TargetSink&&) = default; +TargetSink::~TargetSink() = default; + +void TargetSink::PutNext(uint32_t target) { + DCHECK_GE(target, target_compensation_); + + EncodeVarUInt<uint32_t>( + base::strict_cast<uint32_t>(target - target_compensation_), + std::back_inserter(extra_targets_)); + + target_compensation_ = target + 1; +} + +size_t TargetSink::SerializedSize() const { + return patch::SerializedBufferSize(extra_targets_); +} + +bool TargetSink::SerializeInto(BufferSink* sink) const { + return patch::SerializeBuffer(extra_targets_, sink); +} + +/******** PatchElementWriter ********/ + +PatchElementWriter::PatchElementWriter() = default; +PatchElementWriter::PatchElementWriter(ElementMatch element_match) + : element_match_(element_match) {} + +PatchElementWriter::PatchElementWriter(PatchElementWriter&&) = default; +PatchElementWriter::~PatchElementWriter() = default; + +size_t PatchElementWriter::SerializedSize() const { + size_t serialized_size = + patch::SerializedElementMatchSize(element_match_) + + equivalences_->SerializedSize() + extra_data_->SerializedSize() + + raw_delta_->SerializedSize() + reference_delta_->SerializedSize(); + + serialized_size += sizeof(uint32_t); + for (const auto& extra_symbols : extra_targets_) + serialized_size += extra_symbols.second.SerializedSize() + 1; + return serialized_size; +} + +bool PatchElementWriter::SerializeInto(BufferSink* sink) const { + bool ok = + patch::SerializeElementMatch(element_match_, sink) && + equivalences_->SerializeInto(sink) && extra_data_->SerializeInto(sink) && + raw_delta_->SerializeInto(sink) && reference_delta_->SerializeInto(sink); + if (!ok) + return false; + + if (!sink->PutValue<uint32_t>( + base::checked_cast<uint32_t>(extra_targets_.size()))) + return false; + for (const auto& extra_target_sink : extra_targets_) { + if (!sink->PutValue<uint8_t>(extra_target_sink.first.value())) + return false; + if (!extra_target_sink.second.SerializeInto(sink)) + return false; + } + return true; +} + +/******** EnsemblePatchWriter ********/ + +EnsemblePatchWriter::~EnsemblePatchWriter() = default; + +EnsemblePatchWriter::EnsemblePatchWriter(const PatchHeader& header) + : header_(header) { + DCHECK_EQ(header_.magic, PatchHeader::kMagic); +} + +EnsemblePatchWriter::EnsemblePatchWriter(ConstBufferView old_image, + ConstBufferView new_image) { + header_.magic = PatchHeader::kMagic; + header_.old_size = base::checked_cast<uint32_t>(old_image.size()); + header_.old_crc = CalculateCrc32(old_image.begin(), old_image.end()); + header_.new_size = base::checked_cast<uint32_t>(new_image.size()); + header_.new_crc = CalculateCrc32(new_image.begin(), new_image.end()); +} + +void EnsemblePatchWriter::AddElement(PatchElementWriter&& patch_element) { + DCHECK(patch_element.new_element().offset == current_dst_offset_); + current_dst_offset_ = patch_element.new_element().EndOffset(); + elements_.push_back(std::move(patch_element)); +} + +size_t EnsemblePatchWriter::SerializedSize() const { + size_t serialized_size = sizeof(PatchHeader) + sizeof(uint32_t); + for (const auto& patch_element : elements_) { + serialized_size += patch_element.SerializedSize(); + } + return serialized_size; +} + +bool EnsemblePatchWriter::SerializeInto(BufferSink* sink) const { + DCHECK_EQ(current_dst_offset_, header_.new_size); + bool ok = + sink->PutValue<PatchHeader>(header_) && + sink->PutValue<uint32_t>(base::checked_cast<uint32_t>(elements_.size())); + if (!ok) + return false; + + for (const auto& element : elements_) { + if (!element.SerializeInto(sink)) + return false; + } + return true; +} + +} // namespace zucchini diff --git a/patch_writer.h b/patch_writer.h new file mode 100644 index 0000000..26b7baf --- /dev/null +++ b/patch_writer.h @@ -0,0 +1,272 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_PATCH_WRITER_H_ +#define COMPONENTS_ZUCCHINI_PATCH_WRITER_H_ + +#include <stddef.h> +#include <stdint.h> + +#include <map> +#include <utility> +#include <vector> + +#include "base/check.h" +#include "components/zucchini/buffer_sink.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/image_utils.h" +#include "components/zucchini/patch_utils.h" +#include "third_party/abseil-cpp/absl/types/optional.h" + +namespace zucchini { + +namespace patch { + +// If sufficient space is available, serializes |element_match| into |sink| and +// returns true. Otherwise returns false, and |sink| will be in an undefined +// state. +bool SerializeElementMatch(const ElementMatch& element_match, BufferSink* sink); + +// Returns the size in bytes required to serialize |element_match|. +size_t SerializedElementMatchSize(const ElementMatch& element_match); + +// If sufficient space is available, serializes |buffer| into |sink| and returns +// true. Otherwise returns false, and |sink| will be in an undefined state. +bool SerializeBuffer(const std::vector<uint8_t>& buffer, BufferSink* sink); + +// Returns the size in bytes required to serialize |buffer|. +size_t SerializedBufferSize(const std::vector<uint8_t>& buffer); + +} // namespace patch + +// Each of *Sink classes below has an associated "main type", and performs the +// following: +// - Receives multiple "main type" elements (hence "Sink" in the name). +// - Encodes list of received data, and writes them to internal storage (e.g., +// applying delta encoding). +// - Writes encoded data to BufferSink. +// +// Common "core functions" implemented for *Sink classes are: +// - void PutNext(const MAIN_TYPE& inst): Encodes and writes an instance of +// MAIN_TYPE to internal storage. Assumptions may be applied to successive +// |inst| provided. +// - size_t SerializedSize() const: Returns the serialized size in bytes of +// internal storage. +// - bool SerializeInto(BufferSink* sink) const: If |sink| has enough space, +// serializes internal storage into |sink|, and returns true. Otherwise +// returns false. +// +// Usage of *Sink instances don't mix, and PuttNext() have dissimilar +// interfaces. Therefore we do not use inheritance to relate *Sink classes, +// simply implement "core functions" with matching names. + +// Sink for equivalences. +class EquivalenceSink { + public: + EquivalenceSink(); + EquivalenceSink(const std::vector<uint8_t>& src_skip, + const std::vector<uint8_t>& dst_skip, + const std::vector<uint8_t>& copy_count); + + EquivalenceSink(EquivalenceSink&&); + ~EquivalenceSink(); + + // Core functions. + // Equivalences must be given by increasing |Equivalence::dst_offset|. + void PutNext(const Equivalence& equivalence); + size_t SerializedSize() const; + bool SerializeInto(BufferSink* sink) const; + + private: + // Offset in source, delta-encoded starting from end of last equivalence, and + // stored as signed varint. + std::vector<uint8_t> src_skip_; + // Offset in destination, delta-encoded starting from end of last equivalence, + // and stored as unsigned varint. + std::vector<uint8_t> dst_skip_; + // Length of equivalence stored as unsigned varint. + // TODO(etiennep): Investigate on bias. + std::vector<uint8_t> copy_count_; + + offset_t src_offset_ = 0; // Last offset in source. + offset_t dst_offset_ = 0; // Last offset in destination. +}; + +// Sink for extra data. +class ExtraDataSink { + public: + ExtraDataSink(); + explicit ExtraDataSink(const std::vector<uint8_t>& extra_data); + ExtraDataSink(ExtraDataSink&&); + ~ExtraDataSink(); + + // Core functions. + void PutNext(ConstBufferView region); + size_t SerializedSize() const; + bool SerializeInto(BufferSink* sink) const; + + private: + std::vector<uint8_t> extra_data_; +}; + +// Sink for raw delta. +class RawDeltaSink { + public: + RawDeltaSink(); + RawDeltaSink(const std::vector<uint8_t>& raw_delta_skip, + const std::vector<uint8_t>& raw_delta_diff); + RawDeltaSink(RawDeltaSink&&); + ~RawDeltaSink(); + + // Core functions. + // Deltas must be given by increasing |RawDeltaUnit::copy_offset|. + void PutNext(const RawDeltaUnit& delta); + size_t SerializedSize() const; + bool SerializeInto(BufferSink* sink) const; + + private: + std::vector<uint8_t> raw_delta_skip_; // Copy offset stating from last delta. + std::vector<uint8_t> raw_delta_diff_; // Bytewise difference. + + // We keep track of the compensation needed for next copy offset, taking into + // accound delta encoding and bias of -1. Stored delta are biased by -1, so a + // sequence of single byte deltas is represented as a string of 0's. + offset_t copy_offset_compensation_ = 0; +}; + +// Sink for reference delta. +class ReferenceDeltaSink { + public: + ReferenceDeltaSink(); + explicit ReferenceDeltaSink(const std::vector<uint8_t>& reference_delta); + ReferenceDeltaSink(ReferenceDeltaSink&&); + ~ReferenceDeltaSink(); + + // Core functions. + void PutNext(int32_t diff); + size_t SerializedSize() const; + bool SerializeInto(BufferSink* sink) const; + + private: + std::vector<uint8_t> reference_delta_; +}; + +// Sink for additional targets. +class TargetSink { + public: + TargetSink(); + explicit TargetSink(const std::vector<uint8_t>& extra_targets); + TargetSink(TargetSink&&); + ~TargetSink(); + + // Core functions. + // Targets must be given by increasing order. + void PutNext(uint32_t target); + size_t SerializedSize() const; + bool SerializeInto(BufferSink* sink) const; + + private: + // Targets are delta-encoded and biaised by 1, stored as unsigned varint. + std::vector<uint8_t> extra_targets_; + + // We keep track of the compensation needed for next target, taking into + // accound delta encoding and bias of -1. + offset_t target_compensation_ = 0; +}; + +// Following are utility classes to write structured data forming a patch. + +// Utility to write a patch element. A patch element contains all the +// information necessary to patch a single element. This class +// provides an interface to individually set different building blocks of data +// in the patch element. +class PatchElementWriter { + public: + PatchElementWriter(); + explicit PatchElementWriter(ElementMatch element_match); + PatchElementWriter(PatchElementWriter&&); + ~PatchElementWriter(); + + const ElementMatch& element_match() const { return element_match_; } + const Element& old_element() const { return element_match_.old_element; } + const Element& new_element() const { return element_match_.new_element; } + + // Following methods set individual blocks for this element. Previous + // corresponding block is replaced. All streams must be set before call to + // SerializedSize() of SerializeInto(). + + void SetEquivalenceSink(EquivalenceSink&& equivalences) { + equivalences_.emplace(std::move(equivalences)); + } + void SetExtraDataSink(ExtraDataSink&& extra_data) { + extra_data_.emplace(std::move(extra_data)); + } + void SetRawDeltaSink(RawDeltaSink&& raw_delta) { + raw_delta_.emplace(std::move(raw_delta)); + } + void SetReferenceDeltaSink(ReferenceDeltaSink reference_delta) { + reference_delta_.emplace(std::move(reference_delta)); + } + // Set additional targets for pool identified with |pool_tag|. + void SetTargetSink(PoolTag pool_tag, TargetSink&& extra_targets) { + DCHECK(pool_tag != kNoPoolTag); + extra_targets_.emplace(pool_tag, std::move(extra_targets)); + } + + // Returns the serialized size in bytes of the data this object is holding. + size_t SerializedSize() const; + + // If sufficient space is available, serializes data into |sink|, which is at + // least SerializedSize() bytes, and returns true. Otherwise returns false. + bool SerializeInto(BufferSink* sink) const; + + private: + ElementMatch element_match_; + absl::optional<EquivalenceSink> equivalences_; + absl::optional<ExtraDataSink> extra_data_; + absl::optional<RawDeltaSink> raw_delta_; + absl::optional<ReferenceDeltaSink> reference_delta_; + std::map<PoolTag, TargetSink> extra_targets_; +}; + +// Utility to write a Zucchini ensemble patch. An ensemble patch is the +// concatenation of a patch header with a vector of patch elements. +class EnsemblePatchWriter { + public: + explicit EnsemblePatchWriter(const PatchHeader& header); + EnsemblePatchWriter(ConstBufferView old_image, ConstBufferView new_image); + EnsemblePatchWriter(const EnsemblePatchWriter&) = delete; + const EnsemblePatchWriter& operator=(const EnsemblePatchWriter&) = delete; + ~EnsemblePatchWriter(); + + // Reserves space for |count| patch elements. + void ReserveElements(size_t count) { elements_.reserve(count); } + + // Adds an patch element into the patch. Patch elements must be ordered by + // their location in the new image file. + void AddElement(PatchElementWriter&& patch_element); + + // Returns the serialized size in bytes of the data this object is holding. + size_t SerializedSize() const; + + // If sufficient space is available, serializes data into |sink|, which is at + // least SerializedSize() bytes, and returns true. Otherwise returns false. + bool SerializeInto(BufferSink* sink) const; + + // If sufficient space is available, serializes data into |buffer|, which is + // at least SerializedSize() bytes, and returns true. Otherwise returns false. + bool SerializeInto(MutableBufferView buffer) const { + BufferSink sink(buffer); + return SerializeInto(&sink); + } + + private: + PatchHeader header_; + std::vector<PatchElementWriter> elements_; + offset_t current_dst_offset_ = 0; +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_PATCH_WRITER_H_ diff --git a/reference_bytes_mixer.cc b/reference_bytes_mixer.cc new file mode 100644 index 0000000..6855853 --- /dev/null +++ b/reference_bytes_mixer.cc @@ -0,0 +1,150 @@ +// Copyright 2018 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/reference_bytes_mixer.h" + +#include <algorithm> + +#include "base/check_op.h" +#include "base/logging.h" +#include "base/notreached.h" +#include "components/zucchini/disassembler.h" +#include "components/zucchini/disassembler_elf.h" + +namespace zucchini { + +/******** ReferenceBytesMixer ********/ + +// Default implementation is a stub, i.e., for architectures whose references +// have operation bits and payload bits stored in separate bytes. So during +// patch application, payload bits are copied for matched blocks, ignored by +// bytewise corrections, and fixed by reference target corrections. +ReferenceBytesMixer::ReferenceBytesMixer() {} + +ReferenceBytesMixer::~ReferenceBytesMixer() = default; + +// static. +std::unique_ptr<ReferenceBytesMixer> ReferenceBytesMixer::Create( + const Disassembler& src_dis, + const Disassembler& dst_dis) { + ExecutableType exe_type = src_dis.GetExeType(); + DCHECK_EQ(exe_type, dst_dis.GetExeType()); + if (exe_type == kExeTypeElfAArch32) + return std::make_unique<ReferenceBytesMixerElfArm>(exe_type); + if (exe_type == kExeTypeElfAArch64) + return std::make_unique<ReferenceBytesMixerElfArm>(exe_type); + return std::make_unique<ReferenceBytesMixer>(); +} + +// Stub implementation. +int ReferenceBytesMixer::NumBytes(uint8_t type) const { + return 0; +} + +// Base class implementation is a stub that should not be called. +ConstBufferView ReferenceBytesMixer::Mix(uint8_t type, + ConstBufferView old_view, + offset_t old_offset, + ConstBufferView new_view, + offset_t new_offset) { + NOTREACHED() << "Stub."; + return ConstBufferView(); +} + +/******** ReferenceBytesMixerElfArm ********/ + +ReferenceBytesMixerElfArm::ReferenceBytesMixerElfArm(ExecutableType exe_type) + : exe_type_(exe_type), out_buffer_(4) {} // 4 is a bound on NumBytes(). + +ReferenceBytesMixerElfArm::~ReferenceBytesMixerElfArm() = default; + +int ReferenceBytesMixerElfArm::NumBytes(uint8_t type) const { + if (exe_type_ == kExeTypeElfAArch32) { + switch (type) { + case AArch32ReferenceType::kRel32_A24: // Falls through. + case AArch32ReferenceType::kRel32_T20: + case AArch32ReferenceType::kRel32_T24: + return 4; + case AArch32ReferenceType::kRel32_T8: // Falls through. + case AArch32ReferenceType::kRel32_T11: + return 2; + } + } else if (exe_type_ == kExeTypeElfAArch64) { + switch (type) { + case AArch64ReferenceType::kRel32_Immd14: // Falls through. + case AArch64ReferenceType::kRel32_Immd19: + case AArch64ReferenceType::kRel32_Immd26: + return 4; + } + } + return 0; +} + +ConstBufferView ReferenceBytesMixerElfArm::Mix(uint8_t type, + ConstBufferView old_view, + offset_t old_offset, + ConstBufferView new_view, + offset_t new_offset) { + int num_bytes = NumBytes(type); + ConstBufferView::const_iterator new_it = new_view.begin() + new_offset; + DCHECK_LE(static_cast<size_t>(num_bytes), out_buffer_.size()); + MutableBufferView out_buffer_view(&out_buffer_[0], num_bytes); + std::copy(new_it, new_it + num_bytes, out_buffer_view.begin()); + + ArmCopyDispFun copier = GetCopier(type); + DCHECK_NE(copier, nullptr); + + if (!copier(old_view, old_offset, out_buffer_view, 0U)) { + // Failed to mix old payload bits with new operation bits. The main cause of + // of this rare failure is when BL (encoding T1) with payload bits + // representing disp % 4 == 2 transforms into BLX (encoding T2). Error + // arises because BLX requires payload bits to have disp == 0 (mod 4). + // Mixing failures are not fatal to patching; we simply fall back to direct + // copy and forgo benefits from mixing for these cases. + // TODO(huangs, etiennep): Ongoing discussion on whether we should just + // nullify all payload disp so we won't have to deal with this case, but at + // the cost of having Zucchini-apply do more work. + static int output_quota = 10; + if (output_quota > 0) { + LOG(WARNING) << "Reference byte mix failed with type = " + << static_cast<uint32_t>(type) << "." << std::endl; + --output_quota; + if (!output_quota) + LOG(WARNING) << "(Additional output suppressed)"; + } + // Fall back to direct copy. + std::copy(new_it, new_it + num_bytes, out_buffer_view.begin()); + } + return ConstBufferView(out_buffer_view); +} + +ArmCopyDispFun ReferenceBytesMixerElfArm::GetCopier(uint8_t type) const { + if (exe_type_ == kExeTypeElfAArch32) { + switch (type) { + case AArch32ReferenceType::kRel32_A24: + return ArmCopyDisp<AArch32Rel32Translator::AddrTraits_A24>; + case AArch32ReferenceType::kRel32_T8: + return ArmCopyDisp<AArch32Rel32Translator::AddrTraits_T8>; + case AArch32ReferenceType::kRel32_T11: + return ArmCopyDisp<AArch32Rel32Translator::AddrTraits_T11>; + case AArch32ReferenceType::kRel32_T20: + return ArmCopyDisp<AArch32Rel32Translator::AddrTraits_T20>; + case AArch32ReferenceType::kRel32_T24: + return ArmCopyDisp<AArch32Rel32Translator::AddrTraits_T24>; + } + } else if (exe_type_ == kExeTypeElfAArch64) { + switch (type) { + case AArch64ReferenceType::kRel32_Immd14: + return ArmCopyDisp<AArch64Rel32Translator::AddrTraits_Immd14>; + case AArch64ReferenceType::kRel32_Immd19: + return ArmCopyDisp<AArch64Rel32Translator::AddrTraits_Immd19>; + case AArch64ReferenceType::kRel32_Immd26: + return ArmCopyDisp<AArch64Rel32Translator::AddrTraits_Immd26>; + } + } + DLOG(FATAL) << "NOTREACHED"; + return nullptr; +} + +} // namespace zucchini diff --git a/reference_bytes_mixer.h b/reference_bytes_mixer.h new file mode 100644 index 0000000..f20b0ef --- /dev/null +++ b/reference_bytes_mixer.h @@ -0,0 +1,118 @@ +// Copyright 2018 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_REFERENCE_BYTES_MIXER_H_ +#define COMPONENTS_ZUCCHINI_REFERENCE_BYTES_MIXER_H_ + +#include <stdint.h> + +#include <memory> + +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/image_utils.h" +#include "components/zucchini/rel32_utils.h" + +namespace zucchini { + +class Disassembler; + +// References encoding may be quite complex in some architectures (e.g., ARM), +// requiring bit-level manipulation. In general, bits in a reference body fall +// under 2 categories: +// - Operation bits: Instruction op code, conditionals, or structural data. +// - Payload bits: Actual target data of the reference. These may be absolute, +// or be displacements relative to instruction pointer / program counter. +// During patch application, +// Old reference bytes = {old operation, old payload}, +// is transformed to +// New reference bytes = {new operation, new payload}. +// New image bytes are written by three sources: +// (1) Direct copy from old image to new image for matched blocks. +// (2) Bytewise diff correction. +// (3) Dedicated reference target correction. +// +// For references whose operation and payload bits are stored in easily +// separable bytes (e.g., rel32 reference in X86), (2) can exclude payload bits. +// So during patch application, (1) naively copies everything, (2) fixes +// operation bytes only, and (3) fixes payload bytes only. +// +// For architectures with references whose operation and payload bits may mix +// within shared bytes (e.g., ARM rel32), a dilemma arises: +// - (2) cannot ignores shared bytes, since otherwise new operation bits not +// properly transfer. +// - Having (2) always overwrite these bytes would reduce the benefits of +// reference correction, since references are likely to change. +// +// Our solution applies a hybrid approach: For each matching old / new reference +// pair, define: +// Mixed reference bytes = {new operation, old payload}, +// +// During patch generation, we compute bytewise correction from old reference +// bytes to the mixed reference bytes. So during patch application, (2) only +// corrects operation bit changes (and skips if they don't change), and (3) +// overwrites old payload bits to new payload bits. + +// A base class for (stateful) mixed reference byte generation. This base class +// serves as a stub. Architectures whose references store operation bits and +// payload bits can share common bytes (e.g., ARM rel32) should override this. +class ReferenceBytesMixer { + public: + ReferenceBytesMixer(); + ReferenceBytesMixer(const ReferenceBytesMixer&) = delete; + const ReferenceBytesMixer& operator=(const ReferenceBytesMixer&) = delete; + virtual ~ReferenceBytesMixer(); + + // Returns a new ReferenceBytesMixer instance that's owned by the caller. + static std::unique_ptr<ReferenceBytesMixer> Create( + const Disassembler& src_dis, + const Disassembler& dst_dis); + + // Returns the number of bytes that need to be mixed for references with given + // |type|. Returns 0 if no mixing is required. + virtual int NumBytes(uint8_t type) const; + + // Computes mixed reference bytes by combining (a) "payload bits" from an + // "old" reference of |type| at |old_view[old_offset]| with (b) "operation + // bits" from a "new" reference of |type| at |new_view[new_offset]|. Returns + // the result as ConstBufferView, which is valid only until the next call to + // Mix(). + virtual ConstBufferView Mix(uint8_t type, + ConstBufferView old_view, + offset_t old_offset, + ConstBufferView new_view, + offset_t new_offset); +}; + +// In AArch32 and AArch64, instructions mix operation bits and payload bits in +// complex ways. This is the main use case of ReferenceBytesMixer. +class ReferenceBytesMixerElfArm : public ReferenceBytesMixer { + public: + // |exe_type| must be EXE_TYPE_ELF_ARM or EXE_TYPE_ELF_AARCH64. + explicit ReferenceBytesMixerElfArm(ExecutableType exe_type); + ReferenceBytesMixerElfArm(const ReferenceBytesMixerElfArm&) = delete; + const ReferenceBytesMixerElfArm& operator=(const ReferenceBytesMixerElfArm&) = + delete; + ~ReferenceBytesMixerElfArm() override; + + // ReferenceBytesMixer: + int NumBytes(uint8_t type) const override; + ConstBufferView Mix(uint8_t type, + ConstBufferView old_view, + offset_t old_offset, + ConstBufferView new_view, + offset_t new_offset) override; + + private: + ArmCopyDispFun GetCopier(uint8_t type) const; + + // For simplicity, 32-bit vs. 64-bit distinction is represented by state + // |exe_type_|, instead of creating derived classes. + const ExecutableType exe_type_; + + std::vector<uint8_t> out_buffer_; +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_REFERENCE_BYTES_MIXER_H_ diff --git a/reference_set.cc b/reference_set.cc new file mode 100644 index 0000000..82a9951 --- /dev/null +++ b/reference_set.cc @@ -0,0 +1,60 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/reference_set.h" + +#include <algorithm> +#include <iterator> + +#include "base/check_op.h" +#include "components/zucchini/target_pool.h" + +namespace zucchini { + +namespace { + +// Returns true if |refs| is sorted by location. +bool IsReferenceListSorted(const std::vector<Reference>& refs) { + return std::is_sorted(refs.begin(), refs.end(), + [](const Reference& a, const Reference& b) { + return a.location < b.location; + }); +} + +} // namespace + +ReferenceSet::ReferenceSet(const ReferenceTypeTraits& traits, + const TargetPool& target_pool) + : traits_(traits), target_pool_(target_pool) {} +ReferenceSet::ReferenceSet(ReferenceSet&&) = default; +ReferenceSet::~ReferenceSet() = default; + +void ReferenceSet::InitReferences(ReferenceReader&& ref_reader) { + DCHECK(references_.empty()); + for (auto ref = ref_reader.GetNext(); ref.has_value(); + ref = ref_reader.GetNext()) { + references_.push_back(*ref); + } + DCHECK(IsReferenceListSorted(references_)); +} + +void ReferenceSet::InitReferences(const std::vector<Reference>& refs) { + DCHECK(references_.empty()); + DCHECK(IsReferenceListSorted(references_)); + references_.assign(refs.begin(), refs.end()); +} + +Reference ReferenceSet::at(offset_t offset) const { + auto pos = std::upper_bound(references_.begin(), references_.end(), offset, + [](offset_t offset, const Reference& ref) { + return offset < ref.location; + }); + + DCHECK(pos != references_.begin()); // Iterators. + --pos; + DCHECK_LT(offset, pos->location + width()); + return *pos; +} + +} // namespace zucchini diff --git a/reference_set.h b/reference_set.h new file mode 100644 index 0000000..07940f0 --- /dev/null +++ b/reference_set.h @@ -0,0 +1,64 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_REFERENCE_SET_H_ +#define COMPONENTS_ZUCCHINI_REFERENCE_SET_H_ + +#include <stddef.h> + +#include <vector> + +#include "components/zucchini/image_utils.h" + +namespace zucchini { + +class TargetPool; + +// Container of distinct references of one type, along with traits, only used +// during patch generation. +class ReferenceSet { + public: + using const_iterator = std::vector<Reference>::const_iterator; + + // |traits| specifies the reference represented. |target_pool| specifies + // common targets shared by all reference represented, and mediates target + // translation between offsets and indexes. + ReferenceSet(const ReferenceTypeTraits& traits, + const TargetPool& target_pool); + ReferenceSet(const ReferenceSet&) = delete; + ReferenceSet(ReferenceSet&&); + ~ReferenceSet(); + + // Either one of the initializers below should be called exactly once. These + // insert all references from |ref_reader/refs| into this class. The targets + // of these references must be in |target_pool_|. + void InitReferences(ReferenceReader&& ref_reader); + void InitReferences(const std::vector<Reference>& refs); + + const std::vector<Reference>& references() const { return references_; } + const ReferenceTypeTraits& traits() const { return traits_; } + const TargetPool& target_pool() const { return target_pool_; } + TypeTag type_tag() const { return traits_.type_tag; } + PoolTag pool_tag() const { return traits_.pool_tag; } + offset_t width() const { return traits_.width; } + + // Looks up the Reference by an |offset| that it spans. |offset| is assumed to + // be valid, i.e., |offset| must be spanned by some Reference in + // |references_|. + Reference at(offset_t offset) const; + + size_t size() const { return references_.size(); } + const_iterator begin() const { return references_.begin(); } + const_iterator end() const { return references_.end(); } + + private: + ReferenceTypeTraits traits_; + const TargetPool& target_pool_; + // List of distinct Reference instances sorted by location. + std::vector<Reference> references_; +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_REFERENCE_SET_H_ diff --git a/reference_set_unittest.cc b/reference_set_unittest.cc new file mode 100644 index 0000000..0bf869e --- /dev/null +++ b/reference_set_unittest.cc @@ -0,0 +1,49 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/reference_set.h" + +#include <vector> + +#include "components/zucchini/image_utils.h" +#include "components/zucchini/target_pool.h" +#include "components/zucchini/test_reference_reader.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +namespace { + +constexpr offset_t kWidth = 2U; + +} // namespace + +class ReferenceSetTest : public testing::Test { + protected: + // For simplicity, |target_pool_| has no type info (not needed here). + TargetPool target_pool_ = TargetPool{{0, 2, 3, 5}}; + ReferenceSet reference_set_ = + ReferenceSet{{kWidth, TypeTag(0), PoolTag(0)}, target_pool_}; +}; + +TEST_F(ReferenceSetTest, InitReferencesFromReader) { + EXPECT_EQ(std::vector<Reference>(), reference_set_.references()); + EXPECT_EQ(0U, reference_set_.size()); + std::vector<Reference> references = {{10, 0}, {12, 2}, {14, 5}}; + reference_set_.InitReferences(TestReferenceReader(references)); + EXPECT_EQ(references, reference_set_.references()); +} + +TEST_F(ReferenceSetTest, At) { + reference_set_.InitReferences({{10, 0}, {12, 2}, {15, 5}}); + // Each references has kWidth = 2, so check all bytes covered. + EXPECT_EQ(Reference({10, 0}), reference_set_.at(10)); + EXPECT_EQ(Reference({10, 0}), reference_set_.at(11)); + EXPECT_EQ(Reference({12, 2}), reference_set_.at(12)); + EXPECT_EQ(Reference({12, 2}), reference_set_.at(13)); + EXPECT_EQ(Reference({15, 5}), reference_set_.at(15)); + EXPECT_EQ(Reference({15, 5}), reference_set_.at(16)); +} + +} // namespace zucchini diff --git a/rel32_finder.cc b/rel32_finder.cc new file mode 100644 index 0000000..1ad8910 --- /dev/null +++ b/rel32_finder.cc @@ -0,0 +1,294 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/rel32_finder.h" + +#include <algorithm> + +#include "base/numerics/safe_conversions.h" + +namespace zucchini { + +/******** Abs32GapFinder ********/ + +Abs32GapFinder::Abs32GapFinder(ConstBufferView image, + ConstBufferView region, + const std::vector<offset_t>& abs32_locations, + size_t abs32_width) + : base_(image.begin()), + region_end_(region.end()), + abs32_end_(abs32_locations.end()), + abs32_width_(abs32_width) { + DCHECK_GT(abs32_width, size_t(0)); + DCHECK_GE(region.begin(), image.begin()); + DCHECK_LE(region.end(), image.end()); + + const offset_t begin_offset = + base::checked_cast<offset_t>(region.begin() - image.begin()); + // Find the first |abs32_cur_| with |*abs32_cur_ >= begin_offset|. + abs32_cur_ = std::lower_bound(abs32_locations.begin(), abs32_locations.end(), + begin_offset); + + // Find lower boundary, accounting for the possibility that |abs32_cur_[-1]| + // may straddle across |region.begin()|. + cur_lo_ = region.begin(); + if (abs32_cur_ > abs32_locations.begin()) + cur_lo_ = std::max(cur_lo_, image.begin() + abs32_cur_[-1] + abs32_width_); +} + +Abs32GapFinder::~Abs32GapFinder() = default; + +bool Abs32GapFinder::FindNext() { + // Iterate over |[abs32_cur_, abs32_end_)| and emit segments. + while (abs32_cur_ != abs32_end_ && base_ + *abs32_cur_ < region_end_) { + ConstBufferView::const_iterator hi = base_ + *abs32_cur_; + gap_ = ConstBufferView::FromRange(cur_lo_, hi); + cur_lo_ = hi + abs32_width_; + ++abs32_cur_; + if (!gap_.empty()) + return true; + } + // Emit final segment. + if (cur_lo_ < region_end_) { + gap_ = ConstBufferView::FromRange(cur_lo_, region_end_); + cur_lo_ = region_end_; + return true; + } + return false; +} + +/******** Rel32Finder ********/ + +Rel32Finder::Rel32Finder(ConstBufferView image, + const AddressTranslator& translator) + : image_(image), offset_to_rva_(translator) {} + +Rel32Finder::~Rel32Finder() = default; + +void Rel32Finder::SetRegion(ConstBufferView region) { + region_ = region; + accept_it_ = region.begin(); +} + +bool Rel32Finder::FindNext() { + NextIterators next_iters = Scan(region_); + if (next_iters.reject == nullptr) { + region_.seek(region_.end()); + return false; + } + region_.seek(next_iters.reject); + accept_it_ = next_iters.accept; + DCHECK_GE(accept_it_, region_.begin()); + DCHECK_LE(accept_it_, region_.end()); + return true; +} + +void Rel32Finder::Accept() { + region_.seek(accept_it_); +} + +/******** Rel32FinderIntel ********/ + +Rel32Finder::NextIterators Rel32FinderIntel::SetResult( + ConstBufferView::const_iterator cursor, + uint32_t opcode_size, + bool can_point_outside_section) { + offset_t location = + base::checked_cast<offset_t>((cursor + opcode_size) - image_.begin()); + rva_t location_rva = offset_to_rva_.Convert(location); + DCHECK_NE(location_rva, kInvalidRva); + rva_t target_rva = location_rva + 4 + image_.read<uint32_t>(location); + rel32_ = {location, target_rva, can_point_outside_section}; + return {cursor + 1, cursor + (opcode_size + 4)}; +} + +/******** Rel32FinderX86 ********/ + +Rel32Finder::NextIterators Rel32FinderX86::Scan(ConstBufferView region) { + ConstBufferView::const_iterator cursor = region.begin(); + while (cursor < region.end()) { + // Heuristic rel32 detection by looking for opcodes that use them. + if (cursor + 5 <= region.end()) { + if (cursor[0] == 0xE8 || cursor[0] == 0xE9) // JMP rel32; CALL rel32 + return SetResult(cursor, 1, false); + } + if (cursor + 6 <= region.end()) { + if (cursor[0] == 0x0F && (cursor[1] & 0xF0) == 0x80) // Jcc long form + return SetResult(cursor, 2, false); + } + ++cursor; + } + return {nullptr, nullptr}; +} + +/******** Rel32FinderX64 ********/ + +Rel32Finder::NextIterators Rel32FinderX64::Scan(ConstBufferView region) { + ConstBufferView::const_iterator cursor = region.begin(); + while (cursor < region.end()) { + // Heuristic rel32 detection by looking for opcodes that use them. + if (cursor + 5 <= region.end()) { + if (cursor[0] == 0xE8 || cursor[0] == 0xE9) // JMP rel32; CALL rel32 + return SetResult(cursor, 1, false); + } + if (cursor + 6 <= region.end()) { + if (cursor[0] == 0x0F && (cursor[1] & 0xF0) == 0x80) { // Jcc long form + return SetResult(cursor, 2, false); + } else if ((cursor[0] == 0xFF && + (cursor[1] == 0x15 || cursor[1] == 0x25)) || + ((cursor[0] == 0x89 || cursor[0] == 0x8B || + cursor[0] == 0x8D) && + (cursor[1] & 0xC7) == 0x05)) { + // 6-byte instructions: + // [2-byte opcode] [disp32]: + // Opcode + // FF 15: CALL QWORD PTR [rip+disp32] + // FF 25: JMP QWORD PTR [rip+disp32] + // + // [1-byte opcode] [ModR/M] [disp32]: + // Opcode + // 89: MOV DWORD PTR [rip+disp32],reg + // 8B: MOV reg,DWORD PTR [rip+disp32] + // 8D: LEA reg,[rip+disp32] + // ModR/M : MMRRRMMM + // MM = 00 & MMM = 101 => rip+disp32 + // RRR: selects reg operand from [eax|ecx|...|edi] + return SetResult(cursor, 2, true); + } + } + ++cursor; + } + return {nullptr, nullptr}; +} + +/******** Rel32FinderArm ********/ + +template <typename ADDR_TYPE> +Rel32FinderArm<ADDR_TYPE>::Rel32FinderArm(ConstBufferView image, + const AddressTranslator& translator) + : Rel32Finder(image, translator) {} + +template <typename ADDR_TYPE> +Rel32FinderArm<ADDR_TYPE>::~Rel32FinderArm() = default; + +template <typename ADDR_TYPE> +Rel32Finder::NextIterators Rel32FinderArm<ADDR_TYPE>::SetResult( + Result&& result, + ConstBufferView::const_iterator cursor, + int instr_size) { + rel32_ = result; + return {cursor + instr_size, cursor + instr_size}; +} + +// SetResult() for end of scan. +template <typename ADDR_TYPE> +Rel32Finder::NextIterators Rel32FinderArm<ADDR_TYPE>::SetEmptyResult() { + rel32_ = {kInvalidOffset, kInvalidOffset, ADDR_TYPE::ADDR_NONE}; + return {nullptr, nullptr}; +} + +/******** Rel32FinderAArch32 ********/ + +Rel32FinderAArch32::Rel32FinderAArch32(ConstBufferView image, + const AddressTranslator& translator, + bool is_thumb2) + : Rel32FinderArm(image, translator), is_thumb2_(is_thumb2) {} + +Rel32FinderAArch32::~Rel32FinderAArch32() = default; + +Rel32Finder::NextIterators Rel32FinderAArch32::ScanA32(ConstBufferView region) { + // Guard against alignment potentially causing |cursor > region.end()|. + if (region.size() < 4) + return SetEmptyResult(); + ConstBufferView::const_iterator cursor = region.begin(); + cursor += IncrementForAlignCeil4(cursor - image_.begin()); + for (; region.end() - cursor >= 4; cursor += 4) { + offset_t offset = base::checked_cast<offset_t>(cursor - image_.begin()); + AArch32Rel32Translator translator; + rva_t instr_rva = offset_to_rva_.Convert(offset); + uint32_t code32 = translator.FetchArmCode32(image_, offset); + rva_t target_rva = kInvalidRva; + if (translator.ReadA24(instr_rva, code32, &target_rva)) { + return SetResult({offset, target_rva, AArch32Rel32Translator::ADDR_A24}, + cursor, 4); + } + } + return SetEmptyResult(); +} + +Rel32Finder::NextIterators Rel32FinderAArch32::ScanT32(ConstBufferView region) { + // Guard against alignment potentially causing |cursor > region.end()|. + if (region.size() < 2) + return SetEmptyResult(); + ConstBufferView::const_iterator cursor = region.begin(); + cursor += IncrementForAlignCeil2(cursor - image_.begin()); + while (region.end() - cursor >= 2) { + offset_t offset = base::checked_cast<offset_t>(cursor - image_.begin()); + AArch32Rel32Translator translator; + AArch32Rel32Translator::AddrType type = AArch32Rel32Translator::ADDR_NONE; + rva_t instr_rva = offset_to_rva_.Convert(offset); + uint16_t code16 = translator.FetchThumb2Code16(image_, offset); + int instr_size = GetThumb2InstructionSize(code16); + rva_t target_rva = kInvalidRva; + if (instr_size == 2) { // 16-bit THUMB2 instruction. + if (translator.ReadT8(instr_rva, code16, &target_rva)) + type = AArch32Rel32Translator::ADDR_T8; + else if (translator.ReadT11(instr_rva, code16, &target_rva)) + type = AArch32Rel32Translator::ADDR_T11; + } else { // |instr_size == 4|: 32-bit THUMB2 instruction. + if (region.end() - cursor >= 4) { + uint32_t code32 = translator.FetchThumb2Code32(image_, offset); + if (translator.ReadT20(instr_rva, code32, &target_rva)) + type = AArch32Rel32Translator::ADDR_T20; + else if (translator.ReadT24(instr_rva, code32, &target_rva)) + type = AArch32Rel32Translator::ADDR_T24; + } + } + if (type != AArch32Rel32Translator::ADDR_NONE) + return SetResult({offset, target_rva, type}, cursor, instr_size); + cursor += instr_size; + } + return SetEmptyResult(); +} + +Rel32Finder::NextIterators Rel32FinderAArch32::Scan(ConstBufferView region) { + return is_thumb2_ ? ScanT32(region) : ScanA32(region); +} + +/******** Rel32FinderAArch64 ********/ + +Rel32FinderAArch64::Rel32FinderAArch64(ConstBufferView image, + const AddressTranslator& translator) + : Rel32FinderArm(image, translator) {} + +Rel32FinderAArch64::~Rel32FinderAArch64() = default; + +Rel32Finder::NextIterators Rel32FinderAArch64::Scan(ConstBufferView region) { + // Guard against alignment potentially causing |cursor > region.end()|. + if (region.size() < 4) + return SetEmptyResult(); + ConstBufferView::const_iterator cursor = region.begin(); + cursor += IncrementForAlignCeil4(cursor - image_.begin()); + for (; region.end() - cursor >= 4; cursor += 4) { + offset_t offset = base::checked_cast<offset_t>(cursor - image_.begin()); + // For simplicity we assume RVA fits within 32-bits. + AArch64Rel32Translator translator; + AArch64Rel32Translator::AddrType type = AArch64Rel32Translator::ADDR_NONE; + rva_t instr_rva = offset_to_rva_.Convert(offset); + uint32_t code32 = translator.FetchCode32(image_, offset); + rva_t target_rva = kInvalidRva; + if (translator.ReadImmd14(instr_rva, code32, &target_rva)) { + type = AArch64Rel32Translator::ADDR_IMMD14; + } else if (translator.ReadImmd19(instr_rva, code32, &target_rva)) { + type = AArch64Rel32Translator::ADDR_IMMD19; + } else if (translator.ReadImmd26(instr_rva, code32, &target_rva)) { + type = AArch64Rel32Translator::ADDR_IMMD26; + } + if (type != AArch64Rel32Translator::ADDR_NONE) + return SetResult({offset, target_rva, type}, cursor, 4); + } + return SetEmptyResult(); +} + +} // namespace zucchini diff --git a/rel32_finder.h b/rel32_finder.h new file mode 100644 index 0000000..3ebeb95 --- /dev/null +++ b/rel32_finder.h @@ -0,0 +1,284 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_REL32_FINDER_H_ +#define COMPONENTS_ZUCCHINI_REL32_FINDER_H_ + +#include <stddef.h> + +#include <vector> + +#include "components/zucchini/address_translator.h" +#include "components/zucchini/arm_utils.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/image_utils.h" + +namespace zucchini { + +// See README.md for definitions on abs32 and rel32 references. The following +// are assumed: +// * Abs32 reference bodies have fixed widths. +// * Rel32 locations can be identified by heuristically disassembling machine +// code, and errors are tolerated. +// * The collection all abs32 and rel32 reference bodies do not overlap. + +// A class to visit non-empty contiguous gaps in |region| that lie outside of +// |abs32_locations| elements, each with a body that spans |abs32_width_| bytes. +// For example, given: +// region = [base_ + 4, base_ + 26), +// abs32_locations = {2, 6, 15, 20, 27}, +// abs32_width_ = 4, +// the following is obtained: +// 111111111122222222223 -> offsets +// 0123456789012345678901234567890 +// ....**********************..... -> region = * +// ^ ^ ^ ^ ^ -> abs32 locations +// aaaaaaaa aaaa aaaa aaaa -> abs32 bodies +// ....------*****----*----**..... -> regions excluding abs32 -> 3 gaps +// The resulting gaps (non-empty, so [6, 6) is excluded) are: +// [10, 15), [19, 20), [24, 26). +// These gaps can then be passed to Rel32Finder (below) to find rel32 references +// with bodies that are guaranteed to not overlap with any abs32 bodies. +class Abs32GapFinder { + public: + // |abs32_locations| is a sorted list of non-overlapping abs32 locations in + // |image|, each spanning |abs32_width| bytes. Gaps are searched in |region|, + // which must be part of |image|. + Abs32GapFinder(ConstBufferView image, + ConstBufferView region, + const std::vector<offset_t>& abs32_locations, + size_t abs32_width); + Abs32GapFinder(const Abs32GapFinder&) = delete; + const Abs32GapFinder& operator=(const Abs32GapFinder&) = delete; + ~Abs32GapFinder(); + + // Searches for the next available gap, and returns successfulness. + bool FindNext(); + + // Returns the cached result from the last successful FindNext(). + ConstBufferView GetGap() const { return gap_; } + + private: + const ConstBufferView::const_iterator base_; + const ConstBufferView::const_iterator region_end_; + ConstBufferView::const_iterator cur_lo_; + const std::vector<offset_t>::const_iterator abs32_end_; + std::vector<offset_t>::const_iterator abs32_cur_; + const size_t abs32_width_; + ConstBufferView gap_; +}; + +// A class to scan regions within an image to find successive rel32 references. +// Architecture-specific parsing and result extraction are delegated to +// inherited classes (say, Rel32Finder_Impl). Sample extraction loop, combined +// with Abs32GapFinder usage: +// +// Abs32GapFinder gap_finder(...); +// Rel32Finder_Impl finder(...); +// while (gap_finder.FindNext()) { +// rel_finder.SetRegion(gap_finder.GetGap()); +// while (rel_finder.FindNext()) { +// auto rel32 = rel_finder.GetRel32(); // In Rel32Finder_Impl. +// if (architecture_specific_validation(rel32)) { +// rel_finder.Accept(); +// // Store rel32. +// } +// } +// } +class Rel32Finder { + public: + Rel32Finder(ConstBufferView image, const AddressTranslator& translator); + Rel32Finder(const Rel32Finder&) = delete; + const Rel32Finder& operator=(const Rel32Finder&) = delete; + virtual ~Rel32Finder(); + + // Assigns the scan |region| for rel32 references to enable FindNext() use. + void SetRegion(ConstBufferView region); + + // Scans for the next rel32 reference, and returns whether any is found, so a + // "while" loop can be used for iterative rel32 extraction. The results are + // cached in Rel32Finder_Impl and obtained by Rel32Finder_Impl::GetRel32(). + bool FindNext(); + + // When a rel32 reference is found, the caller needs to decide whether to keep + // the result (perhaps following more validation). If it decides to keep the + // result, then it must call Accept(), so the next call to FindNext() can skip + // the accepted rel32 reference. + void Accept(); + + // Accessors for unittest. + ConstBufferView::const_iterator accept_it() const { return accept_it_; } + ConstBufferView region() const { return region_; } + + protected: + // Alternatives for where to continue the next scan when a rel32 reference is + // found. nulls indicate that no rel32 references remain. + struct NextIterators { + // The next iterator if the caller does not call Accept(). + ConstBufferView::const_iterator reject; + + // The next iterator if the caller calls Accept(). + ConstBufferView::const_iterator accept; + }; + + // Detects and extracts architecture-specific rel32 reference. For each one + // found, the implementation should cache the necessary data to be retrieved + // via accessors. Returns a NextIterators that stores alternatives for where + // to continue the scan. If no rel32 reference is found then the returned + // NextIterators are nulls. + virtual NextIterators Scan(ConstBufferView region) = 0; + + const ConstBufferView image_; + AddressTranslator::OffsetToRvaCache offset_to_rva_; + + private: + ConstBufferView region_; + ConstBufferView::const_iterator accept_it_ = nullptr; +}; + +// Parsing for X86 or X64: we perform naive scan for opcodes that have rel32 as +// an argument, and disregard instruction alignment. +class Rel32FinderIntel : public Rel32Finder { + public: + Rel32FinderIntel(const Rel32FinderIntel&) = delete; + const Rel32FinderIntel& operator=(const Rel32FinderIntel&) = delete; + + // Struct to store GetRel32() results. + struct Result { + offset_t location; + rva_t target_rva; + + // Some references must have their target in the same section as location, + // which we use this to heuristically reject rel32 reference candidates. + // When true, this constraint is relaxed. + bool can_point_outside_section; + }; + + using Rel32Finder::Rel32Finder; + + // Returns the cached result from the last successful FindNext(). + const Result& GetRel32() { return rel32_; } + + protected: + // Helper for Scan() that also assigns |rel32_|. + Rel32Finder::NextIterators SetResult(ConstBufferView::const_iterator cursor, + uint32_t code_size, + bool can_point_outside_section); + + // Cached results. + Result rel32_; + + // Rel32Finder: + NextIterators Scan(ConstBufferView region) override = 0; +}; + +// X86 instructions. +class Rel32FinderX86 : public Rel32FinderIntel { + public: + using Rel32FinderIntel::Rel32FinderIntel; + + Rel32FinderX86(const Rel32FinderX86&) = delete; + const Rel32FinderX86& operator=(const Rel32FinderX86&) = delete; + + private: + // Rel32Finder: + NextIterators Scan(ConstBufferView region) override; +}; + +// X64 instructions. +class Rel32FinderX64 : public Rel32FinderIntel { + public: + using Rel32FinderIntel::Rel32FinderIntel; + + Rel32FinderX64(const Rel32FinderX64&) = delete; + const Rel32FinderX64& operator=(const Rel32FinderX64&) = delete; + + private: + // Rel32Finder: + NextIterators Scan(ConstBufferView region) override; +}; + +// Base class for ARM (AArch32 and AArch64) instructions. +template <typename ADDR_TYPE> +class Rel32FinderArm : public Rel32Finder { + public: + struct Result { + offset_t location; + rva_t target_rva; + ADDR_TYPE type; + + // For testing. + bool operator==(const Result& other) const { + return location == other.location && target_rva == other.target_rva && + type == other.type; + } + }; + + Rel32FinderArm(ConstBufferView image, const AddressTranslator& translator); + Rel32FinderArm(const Rel32FinderArm&) = delete; + const Rel32FinderArm& operator=(const Rel32FinderArm&) = delete; + ~Rel32FinderArm() override; + + // Helper for Scan*() that also assigns |rel32_|. + NextIterators SetResult(Result&& result, + ConstBufferView::const_iterator cursor, + int instr_size); + + // SetResult() for end of scan. + NextIterators SetEmptyResult(); + + protected: + // Cached results. + Result rel32_; +}; + +// AArch32 instructions. +class Rel32FinderAArch32 + : public Rel32FinderArm<AArch32Rel32Translator::AddrType> { + public: + Rel32FinderAArch32(ConstBufferView image, + const AddressTranslator& translator, + bool is_thumb2); + Rel32FinderAArch32(const Rel32FinderAArch32&) = delete; + const Rel32FinderAArch32& operator=(const Rel32FinderAArch32&) = delete; + ~Rel32FinderAArch32() override; + + const Result& GetRel32() const { return rel32_; } + + private: + // Rel32 extraction, assuming segment is in ARM mode. + NextIterators ScanA32(ConstBufferView region); + + // Rel32 extraction, assuming segment is in THUMB2 mode. + NextIterators ScanT32(ConstBufferView region); + + // Rel32Finder: + NextIterators Scan(ConstBufferView region) override; + + // Indicates whether segment is in THUMB2 or ARM mod. In general this can + // change throughout a section. However, currently we assume that this is + // constant for an entire section. + const bool is_thumb2_; +}; + +// AArch64 instructions. +class Rel32FinderAArch64 + : public Rel32FinderArm<AArch64Rel32Translator::AddrType> { + public: + Rel32FinderAArch64(ConstBufferView image, + const AddressTranslator& translator); + Rel32FinderAArch64(const Rel32FinderAArch64&) = delete; + const Rel32FinderAArch64& operator=(const Rel32FinderAArch64&) = delete; + ~Rel32FinderAArch64() override; + + const Result& GetRel32() const { return rel32_; } + + private: + // Rel32Finder: + NextIterators Scan(ConstBufferView region) override; +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_REL32_FINDER_H_ diff --git a/rel32_finder_unittest.cc b/rel32_finder_unittest.cc new file mode 100644 index 0000000..7e4a21e --- /dev/null +++ b/rel32_finder_unittest.cc @@ -0,0 +1,743 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/rel32_finder.h" + +#include <stddef.h> +#include <stdint.h> + +#include <algorithm> +#include <iterator> +#include <string> +#include <utility> +#include <vector> + +#include "base/check_op.h" +#include "base/format_macros.h" +#include "base/numerics/safe_conversions.h" +#include "base/strings/stringprintf.h" +#include "components/zucchini/arm_utils.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/disassembler_elf.h" +#include "components/zucchini/image_utils.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +TEST(Abs32GapFinderTest, All) { + const size_t kRegionTotal = 99; + std::vector<uint8_t> buffer(kRegionTotal); + ConstBufferView image(buffer.data(), buffer.size()); + + // Common test code that returns the resulting segments as a string. + auto run_test = [&](size_t rlo, size_t rhi, + std::vector<offset_t> abs32_locations, + std::ptrdiff_t abs32_width) -> std::string { + CHECK_LE(rlo, kRegionTotal); + CHECK_LE(rhi, kRegionTotal); + CHECK(std::is_sorted(abs32_locations.begin(), abs32_locations.end())); + CHECK_GT(abs32_width, 0); + ConstBufferView region = + ConstBufferView::FromRange(image.begin() + rlo, image.begin() + rhi); + Abs32GapFinder gap_finder(image, region, abs32_locations, abs32_width); + + std::string out_str; + while (gap_finder.FindNext()) { + ConstBufferView gap = gap_finder.GetGap(); + size_t lo = base::checked_cast<size_t>(gap.begin() - image.begin()); + size_t hi = base::checked_cast<size_t>(gap.end() - image.begin()); + out_str.append(base::StringPrintf("[%" PRIuS ",%" PRIuS ")", lo, hi)); + } + return out_str; + }; + + // Empty regions yield empty segments. + EXPECT_EQ("", run_test(0, 0, {}, 4)); + EXPECT_EQ("", run_test(9, 9, {}, 4)); + EXPECT_EQ("", run_test(8, 8, {8}, 4)); + EXPECT_EQ("", run_test(8, 8, {0, 12}, 4)); + + // If no abs32 locations exist then the segment is the main range. + EXPECT_EQ("[0,99)", run_test(0, 99, {}, 4)); + EXPECT_EQ("[20,21)", run_test(20, 21, {}, 4)); + EXPECT_EQ("[51,55)", run_test(51, 55, {}, 4)); + + // abs32 locations found near start of main range. + EXPECT_EQ("[10,20)", run_test(10, 20, {5}, 4)); + EXPECT_EQ("[10,20)", run_test(10, 20, {6}, 4)); + EXPECT_EQ("[11,20)", run_test(10, 20, {7}, 4)); + EXPECT_EQ("[12,20)", run_test(10, 20, {8}, 4)); + EXPECT_EQ("[13,20)", run_test(10, 20, {9}, 4)); + EXPECT_EQ("[14,20)", run_test(10, 20, {10}, 4)); + EXPECT_EQ("[10,11)[15,20)", run_test(10, 20, {11}, 4)); + + // abs32 locations found near end of main range. + EXPECT_EQ("[10,15)[19,20)", run_test(10, 20, {15}, 4)); + EXPECT_EQ("[10,16)", run_test(10, 20, {16}, 4)); + EXPECT_EQ("[10,17)", run_test(10, 20, {17}, 4)); + EXPECT_EQ("[10,18)", run_test(10, 20, {18}, 4)); + EXPECT_EQ("[10,19)", run_test(10, 20, {19}, 4)); + EXPECT_EQ("[10,20)", run_test(10, 20, {20}, 4)); + EXPECT_EQ("[10,20)", run_test(10, 20, {21}, 4)); + + // Main range completely eclipsed by abs32 location. + EXPECT_EQ("", run_test(10, 11, {7}, 4)); + EXPECT_EQ("", run_test(10, 11, {8}, 4)); + EXPECT_EQ("", run_test(10, 11, {9}, 4)); + EXPECT_EQ("", run_test(10, 11, {10}, 4)); + EXPECT_EQ("", run_test(10, 12, {8}, 4)); + EXPECT_EQ("", run_test(10, 12, {9}, 4)); + EXPECT_EQ("", run_test(10, 12, {10}, 4)); + EXPECT_EQ("", run_test(10, 13, {9}, 4)); + EXPECT_EQ("", run_test(10, 13, {10}, 4)); + EXPECT_EQ("", run_test(10, 14, {10}, 4)); + EXPECT_EQ("", run_test(10, 14, {8, 12}, 4)); + + // Partial eclipses. + EXPECT_EQ("[24,25)", run_test(20, 25, {20}, 4)); + EXPECT_EQ("[20,21)", run_test(20, 25, {21}, 4)); + EXPECT_EQ("[20,21)[25,26)", run_test(20, 26, {21}, 4)); + + // abs32 location outside main range. + EXPECT_EQ("[40,60)", run_test(40, 60, {36, 60}, 4)); + EXPECT_EQ("[41,61)", run_test(41, 61, {0, 10, 20, 30, 34, 62, 68, 80}, 4)); + + // Change abs32 width. + EXPECT_EQ("[10,11)[12,14)[16,19)", run_test(10, 20, {9, 11, 14, 15, 19}, 1)); + EXPECT_EQ("", run_test(10, 11, {10}, 1)); + EXPECT_EQ("[18,23)[29,31)", run_test(17, 31, {15, 23, 26, 31}, 3)); + EXPECT_EQ("[17,22)[25,26)[29,30)", run_test(17, 31, {14, 22, 26, 30}, 3)); + EXPECT_EQ("[10,11)[19,20)", run_test(10, 20, {11}, 8)); + + // Mixed cases with abs32 width = 4. + EXPECT_EQ("[10,15)[19,20)[24,25)", run_test(8, 25, {2, 6, 15, 20, 27}, 4)); + EXPECT_EQ("[0,25)[29,45)[49,50)", run_test(0, 50, {25, 45}, 4)); + EXPECT_EQ("[10,20)[28,50)", run_test(10, 50, {20, 24}, 4)); + EXPECT_EQ("[49,50)[54,60)[64,70)[74,80)[84,87)", + run_test(49, 87, {10, 20, 30, 40, 50, 60, 70, 80, 90}, 4)); + EXPECT_EQ("[0,10)[14,20)[24,25)[29,50)", run_test(0, 50, {10, 20, 25}, 4)); +} + +namespace { + +// A mock Rel32Finder to inject next search result on Scan(). +class TestRel32Finder : public Rel32Finder { + public: + using Rel32Finder::Rel32Finder; + + // Rel32Finder: + NextIterators Scan(ConstBufferView region) override { return next_result; } + + NextIterators next_result; +}; + +AddressTranslator GetTrivialTranslator(size_t size) { + AddressTranslator translator; + EXPECT_EQ(AddressTranslator::kSuccess, + translator.Initialize({{0, base::checked_cast<offset_t>(size), 0U, + base::checked_cast<rva_t>(size)}})); + return translator; +} + +} // namespace + +TEST(Rel32FinderTest, Scan) { + const size_t kRegionTotal = 99; + std::vector<uint8_t> buffer(kRegionTotal); + ConstBufferView image(buffer.data(), buffer.size()); + AddressTranslator translator(GetTrivialTranslator(image.size())); + TestRel32Finder finder(image, translator); + finder.SetRegion(image); + + auto check_finder_state = [&](const TestRel32Finder& finder, + size_t expected_cursor, + size_t expected_accept_it) { + CHECK_LE(expected_cursor, kRegionTotal); + CHECK_LE(expected_accept_it, kRegionTotal); + + EXPECT_EQ(image.begin() + expected_cursor, finder.region().begin()); + EXPECT_EQ(image.begin() + expected_accept_it, finder.accept_it()); + }; + + check_finder_state(finder, 0, 0); + + finder.next_result = {image.begin() + 1, image.begin() + 1}; + EXPECT_TRUE(finder.FindNext()); + check_finder_state(finder, 1, 1); + + finder.next_result = {image.begin() + 2, image.begin() + 2}; + EXPECT_TRUE(finder.FindNext()); + check_finder_state(finder, 2, 2); + + finder.next_result = {image.begin() + 5, image.begin() + 6}; + EXPECT_TRUE(finder.FindNext()); + check_finder_state(finder, 5, 6); + finder.Accept(); + check_finder_state(finder, 6, 6); + + finder.next_result = {image.begin() + 7, image.begin() + 7}; + EXPECT_TRUE(finder.FindNext()); + check_finder_state(finder, 7, 7); + + finder.next_result = {image.begin() + 8, image.begin() + 8}; + EXPECT_TRUE(finder.FindNext()); + check_finder_state(finder, 8, 8); + + finder.next_result = {image.begin() + 99, image.begin() + 99}; + EXPECT_TRUE(finder.FindNext()); + check_finder_state(finder, 99, 99); + + finder.next_result = {nullptr, nullptr}; + EXPECT_FALSE(finder.FindNext()); + check_finder_state(finder, 99, 99); +} + +namespace { + +// X86 test data. (x) and +x entries are covered by abs32 references, which have +// width = 4. +constexpr uint8_t kDataX86[] = { + 0x55, // 00: push ebp + 0x8B, 0xEC, // 01: mov ebp,esp + 0xE8, 0, 0, 0, 0, // 03: call 08 + (0xE9), +0, +0, +0, 0, // 08: jmp 0D + 0x0F, 0x80, 0, 0, 0, 0, // 0D: jo 13 + 0x0F, 0x81, 0, 0, (0), +0, // 13: jno 19 + +0x0F, +0x82, 0, 0, 0, 0, // 19: jb 1F + 0x0F, 0x83, 0, 0, 0, 0, // 1F: jae 25 + 0x0F, (0x84), +0, +0, +0, (0), // 25: je 2B + +0x0F, +0x85, +0, 0, 0, 0, // 2B: jne 31 + 0x0F, 0x86, 0, 0, 0, 0, // 31: jbe 37 + 0x0F, 0x87, 0, 0, 0, 0, // 37: ja 3D + 0x0F, 0x88, 0, (0), +0, +0, // 3D: js 43 + +0x0F, 0x89, 0, 0, 0, 0, // 43: jns 49 + 0x0F, 0x8A, 0, 0, 0, 0, // 49: jp 4F + 0x0F, 0x8B, (0), +0, +0, +0, // 4F: jnp 55 + 0x0F, 0x8C, 0, 0, 0, 0, // 55: jl 5B + 0x0F, 0x8D, 0, 0, (0), +0, // 5B: jge 61 + +0x0F, +0x8E, (0), +0, +0, +0, // 61: jle 67 + 0x0F, 0x8F, 0, 0, 0, 0, // 67: jg 6D + 0x5D, // 6D: pop ebp + 0xC3, // C3: ret +}; + +// Abs32 locations corresponding to |kDataX86|, with width = 4. +constexpr offset_t kAbs32X86[] = {0x08, 0x17, 0x26, 0x2A, + 0x40, 0x51, 0x5F, 0x63}; + +} // namespace + +TEST(Rel32FinderX86Test, FindNext) { + ConstBufferView image = + ConstBufferView::FromRange(std::begin(kDataX86), std::end(kDataX86)); + AddressTranslator translator(GetTrivialTranslator(image.size())); + Rel32FinderX86 rel_finder(image, translator); + rel_finder.SetRegion(image); + + // List of expected locations as pairs of {cursor offset, rel32 offset}, + // ignoring |kAbs32X86|. + std::vector<std::pair<size_t, size_t>> expected_locations = { + {0x04, 0x04}, {0x09, 0x09}, {0x0E, 0x0F}, {0x14, 0x15}, {0x1A, 0x1B}, + {0x20, 0x21}, {0x26, 0x27}, {0x2C, 0x2D}, {0x32, 0x33}, {0x38, 0x39}, + {0x3E, 0x3F}, {0x44, 0x45}, {0x4A, 0x4B}, {0x50, 0x51}, {0x56, 0x57}, + {0x5C, 0x5D}, {0x62, 0x63}, {0x68, 0x69}, + }; + for (auto location : expected_locations) { + EXPECT_TRUE(rel_finder.FindNext()); + auto rel32 = rel_finder.GetRel32(); + + EXPECT_EQ(location.first, + size_t(rel_finder.region().begin() - image.begin())); + EXPECT_EQ(location.second, rel32.location); + EXPECT_EQ(image.begin() + (rel32.location + 4), rel_finder.accept_it()); + EXPECT_FALSE(rel32.can_point_outside_section); + rel_finder.Accept(); + } + EXPECT_FALSE(rel_finder.FindNext()); +} + +TEST(Rel32FinderX86Test, Integrated) { + // Truncated form of Rel32FinderIntel::Result. + using TruncatedResults = std::pair<offset_t, rva_t>; + + ConstBufferView image = + ConstBufferView::FromRange(std::begin(kDataX86), std::end(kDataX86)); + std::vector<offset_t> abs32_locations(std::begin(kAbs32X86), + std::end(kAbs32X86)); + std::vector<TruncatedResults> results; + + Abs32GapFinder gap_finder(image, image, abs32_locations, + DisassemblerElfX86::Traits::kVAWidth); + AddressTranslator translator(GetTrivialTranslator(image.size())); + Rel32FinderX86 rel_finder(image, translator); + while (gap_finder.FindNext()) { + rel_finder.SetRegion(gap_finder.GetGap()); + while (rel_finder.FindNext()) { + auto rel32 = rel_finder.GetRel32(); + rel_finder.Accept(); + results.emplace_back(TruncatedResults{rel32.location, rel32.target_rva}); + } + } + + std::vector<TruncatedResults> expected_results = { + {0x04, 0x08}, + /* {0x09, 0x0D}, */ {0x0F, 0x13}, + /* {0x15, 0x19}, */ /*{0x1B, 0x1F}, */ + {0x21, 0x25}, + /* {0x27, 0x2B}, */ /* {0x2D, 0x31}, */ {0x33, 0x37}, + {0x39, 0x3D}, + /* {0x3F, 0x43}, */ /* {0x45, 0x49}, */ {0x4B, 0x4F}, + /* {0x51, 0x55}, */ {0x57, 0x5B}, + /* {0x5D, 0x61}, */ /* {0x63, 0x67}, */ {0x69, 0x6D}, + }; + EXPECT_EQ(expected_results, results); +} + +TEST(Rel32FinderX86Test, Accept) { + constexpr uint8_t data[] = { + 0xB9, 0x00, 0x00, 0x00, 0xE9, // 00: mov E9000000 + 0xE8, 0x00, 0x00, 0x00, 0xE9, // 05: call E900000A + 0xE8, 0x00, 0x00, 0x00, 0xE9, // 0A: call E900000F + }; + + ConstBufferView image = + ConstBufferView::FromRange(std::begin(data), std::end(data)); + + auto next_location = [](Rel32FinderX86& rel_finder) -> offset_t { + EXPECT_TRUE(rel_finder.FindNext()); + auto rel32 = rel_finder.GetRel32(); + return rel32.location; + }; + + AddressTranslator translator(GetTrivialTranslator(image.size())); + Rel32FinderX86 rel_finder(image, translator); + rel_finder.SetRegion(image); + + EXPECT_EQ(0x05U, next_location(rel_finder)); // False positive. + rel_finder.Accept(); + // False negative: shadowed by 0x05 + // EXPECT_EQ(0x06, next_location(rel_finder)); + EXPECT_EQ(0x0AU, next_location(rel_finder)); // False positive. + EXPECT_EQ(0x0BU, next_location(rel_finder)); // Found if 0x0A is discarded. +} + +namespace { + +// X64 test data. (x) and +x entries are covered by abs32 references, which have +// width = 8. +constexpr uint8_t kDataX64[] = { + 0x55, // 00: push ebp + 0x8B, 0xEC, // 01: mov ebp,esp + 0xE8, 0, 0, 0, 0, // 03: call 08 + 0xE9, 0, 0, 0, (0), // 08: jmp 0D + +0x0F, +0x80, +0, +0, +0, +0, // 0D: jo 13 + +0x0F, 0x81, 0, 0, 0, 0, // 13: jno 19 + 0x0F, 0x82, 0, 0, 0, 0, // 19: jb 1F + (0x0F), +0x83, +0, +0, +0, +0, // 1F: jae 25 + +0x0F, +0x84, 0, 0, 0, 0, // 25: je 2B + 0x0F, 0x85, 0, 0, 0, 0, // 2B: jne 31 + 0x0F, 0x86, (0), +0, +0, +0, // 31: jbe 37 + +0x0F, +0x87, +0, +0, (0), +0, // 37: ja 3D + +0x0F, +0x88, +0, +0, +0, +0, // 3D: js 43 + 0x0F, 0x89, 0, 0, 0, 0, // 43: jns 49 + (0x0F), +0x8A, +0, +0, +0, +0, // 49: jp 4F + +0x0F, +0x8B, 0, 0, 0, 0, // 4F: jnp 55 + 0x0F, 0x8C, 0, 0, 0, 0, // 55: jl 5B + 0x0F, 0x8D, 0, 0, 0, 0, // 5B: jge 61 + 0x0F, 0x8E, 0, 0, 0, 0, // 61: jle 67 + 0x0F, 0x8F, 0, (0), +0, +0, // 67: jg 6F + +0xFF, +0x15, +0, +0, +0, 0, // 6D: call [rip+00] # 73 + 0xFF, 0x25, 0, 0, 0, 0, // 73: jmp [rip+00] # 79 + 0x8B, 0x05, 0, 0, 0, 0, // 79: mov eax,[rip+00] # 7F + 0x8B, 0x3D, 0, 0, 0, 0, // 7F: mov edi,[rip+00] # 85 + 0x8D, 0x05, 0, 0, 0, 0, // 85: lea eax,[rip+00] # 8B + 0x8D, 0x3D, 0, 0, 0, 0, // 8B: lea edi,[rip+00] # 91 + 0x48, 0x8B, 0x05, 0, 0, 0, 0, // 91: mov rax,[rip+00] # 98 + 0x48, (0x8B), +0x3D, +0, +0, +0, +0, // 98: mov rdi,[rip+00] # 9F + +0x48, +0x8D, 0x05, 0, 0, 0, 0, // 9F: lea rax,[rip+00] # A6 + 0x48, 0x8D, 0x3D, 0, 0, 0, 0, // A6: lea rdi,[rip+00] # AD + 0x4C, 0x8B, 0x05, 0, 0, 0, (0), // AD: mov r8,[rip+00] # B4 + +0x4C, +0x8B, +0x3D, +0, +0, +0, +0, // B4: mov r15,[rip+00] # BB + 0x4C, 0x8D, 0x05, 0, 0, 0, 0, // BB: lea r8,[rip+00] # C2 + 0x4C, 0x8D, 0x3D, 0, 0, 0, 0, // C2: lea r15,[rip+00] # C9 + 0x66, 0x8B, 0x05, (0), +0, +0, +0, // C9: mov ax,[rip+00] # D0 + +0x66, +0x8B, +0x3D, +0, 0, 0, 0, // D0: mov di,[rip+00] # D7 + 0x66, 0x8D, 0x05, 0, 0, 0, 0, // D7: lea ax,[rip+00] # DE + 0x66, 0x8D, 0x3D, 0, 0, 0, 0, // DE: lea di,[rip+00] # E5 + 0x5D, // E5: pop ebp + 0xC3, // E6: ret +}; + +// Abs32 locations corresponding to |kDataX64|, with width = 8. +constexpr offset_t kAbs32X64[] = {0x0C, 0x1F, 0x33, 0x3B, 0x49, + 0x6A, 0x99, 0xB3, 0xCC}; + +} // namespace + +TEST(Rel32FinderX64Test, FindNext) { + ConstBufferView image = + ConstBufferView::FromRange(std::begin(kDataX64), std::end(kDataX64)); + AddressTranslator translator(GetTrivialTranslator(image.size())); + Rel32FinderX64 rel_finder(image, translator); + rel_finder.SetRegion(image); + + // Lists of expected locations as pairs of {cursor offset, rel32 offset}, + // ignoring |kAbs32X64|. + std::vector<std::pair<size_t, size_t>> expected_locations = { + {0x04, 0x04}, {0x09, 0x09}, {0x0E, 0x0F}, {0x14, 0x15}, {0x1A, 0x1B}, + {0x20, 0x21}, {0x26, 0x27}, {0x2C, 0x2D}, {0x32, 0x33}, {0x38, 0x39}, + {0x3E, 0x3F}, {0x44, 0x45}, {0x4A, 0x4B}, {0x50, 0x51}, {0x56, 0x57}, + {0x5C, 0x5D}, {0x62, 0x63}, {0x68, 0x69}, + }; + std::vector<std::pair<size_t, size_t>> expected_locations_rip = { + {0x6E, 0x6F}, {0x74, 0x75}, {0x7A, 0x7B}, {0x80, 0x81}, {0x86, 0x87}, + {0x8C, 0x8D}, {0x93, 0x94}, {0x9A, 0x9B}, {0xA1, 0xA2}, {0xA8, 0xA9}, + {0xAF, 0xB0}, {0xB6, 0xB7}, {0xBD, 0xBE}, {0xC4, 0xC5}, {0xCB, 0xCC}, + {0xD2, 0xD3}, {0xD9, 0xDA}, {0xE0, 0xE1}, + }; + // Jump instructions, which cannot point outside section. + for (auto location : expected_locations) { + EXPECT_TRUE(rel_finder.FindNext()); + auto rel32 = rel_finder.GetRel32(); + EXPECT_EQ(location.first, + size_t(rel_finder.region().begin() - image.begin())); + EXPECT_EQ(location.second, rel32.location); + EXPECT_EQ(image.begin() + (rel32.location + 4), rel_finder.accept_it()); + EXPECT_FALSE(rel32.can_point_outside_section); + rel_finder.Accept(); + } + // PC-relative data access instructions, which can point outside section. + for (auto location : expected_locations_rip) { + EXPECT_TRUE(rel_finder.FindNext()); + auto rel32 = rel_finder.GetRel32(); + EXPECT_EQ(location.first, + size_t(rel_finder.region().begin() - image.begin())); + EXPECT_EQ(location.second, rel32.location); + EXPECT_EQ(image.begin() + (rel32.location + 4), rel_finder.accept_it()); + EXPECT_TRUE(rel32.can_point_outside_section); // Different from before. + rel_finder.Accept(); + } + EXPECT_FALSE(rel_finder.FindNext()); +} + +TEST(Rel32FinderX64Test, Integrated) { + // Truncated form of Rel32FinderIntel::Result. + using TruncatedResults = std::pair<offset_t, rva_t>; + + ConstBufferView image = + ConstBufferView::FromRange(std::begin(kDataX64), std::end(kDataX64)); + std::vector<offset_t> abs32_locations(std::begin(kAbs32X64), + std::end(kAbs32X64)); + std::vector<TruncatedResults> results; + + Abs32GapFinder gap_finder(image, image, abs32_locations, + DisassemblerElfX64::Traits::kVAWidth); + AddressTranslator translator(GetTrivialTranslator(image.size())); + Rel32FinderX64 rel_finder(image, translator); + while (gap_finder.FindNext()) { + rel_finder.SetRegion(gap_finder.GetGap()); + while (rel_finder.FindNext()) { + auto rel32 = rel_finder.GetRel32(); + rel_finder.Accept(); + results.emplace_back(TruncatedResults{rel32.location, rel32.target_rva}); + } + } + + std::vector<TruncatedResults> expected_results = { + {0x04, 0x08}, + /* {0x09, 0x0D}, */ + /* {0x0F, 0x13}, */ /* {0x15, 0x19}, */ {0x1B, 0x1F}, + /* {0x21, 0x25}, */ /* {0x27, 0x2B}, */ {0x2D, 0x31}, + /* {0x33, 0x37}, */ /* {0x39, 0x3D}, */ + /* {0x3F, 0x43}, */ {0x45, 0x49}, + /* {0x4B, 0x4F}, */ /* {0x51, 0x55}, */ + {0x57, 0x5B}, + {0x5D, 0x61}, + {0x63, 0x67}, /* {0x69, 0x6F}, */ + /* {0x6F, 0x73}, */ {0x75, 0x79}, + {0x7B, 0x7F}, + {0x81, 0x85}, + {0x87, 0x8B}, + {0x8D, 0x91}, + {0x94, 0x98}, + /* {0x9B, 0x9F}, */ /* {0xA2, 0xA6}, */ {0xA9, 0xAD}, + /* {0xB0, 0xB4}, */ /* {0xB7, 0xBB}, */ {0xBE, 0xC2}, + {0xC5, 0xC9}, + /* {0xCC, 0xD0}, */ /* {0xD3, 0xD7}, */ {0xDA, 0xDE}, + {0xE1, 0xE5}, + }; + EXPECT_EQ(expected_results, results); +} + +namespace { + +// Runs the ARM rel32 extraction (nested) loop on |image| using |rel32_finder|, +// given |abs32_locations| for abs32 references each having |abs32_width|. +// Returns the list of extracted references. +template <class REL32_FINDER> +std::vector<typename REL32_FINDER::Result> ArmExtractRel32( + ConstBufferView image, + const std::vector<offset_t>& abs32_locations, + int abs32_width, + REL32_FINDER&& rel32_finder) { + std::vector<typename REL32_FINDER::Result> results; + Abs32GapFinder gap_finder(image, image, abs32_locations, abs32_width); + while (gap_finder.FindNext()) { + rel32_finder.SetRegion(gap_finder.GetGap()); + while (rel32_finder.FindNext()) { + typename REL32_FINDER::Result rel32 = rel32_finder.GetRel32(); + rel32_finder.Accept(); + results.emplace_back(rel32); + } + } + return results; +} + +} // namespace + +namespace { + +// AArch32 ARM mode test data. (x) and +x entries are covered by abs32 +// references (if used), which have width = 4. +constexpr uint8_t kDataAarch32ArmMode[] = { + 0x00, 0x01, 0x02, 0xEA, // 00: B 00080408 ; B encoding A1 + 0x00, 0x01, (0x02), +0xEA, // 04: B 0008040C ; B encoding A1 + +0x00, +0x01, 0x02, 0xEA, // 08: B 00080410 ; B encoding A1 + 0x00, 0x01, 0x02, 0xEA, // 0C: B 00080414 ; B encoding A1 + 0x00, 0x01, 0x02, (0xEA), // 10: B 00080418 ; B encoding A1 + +0x00, +0x01, +0x02, 0xEA, // 14: B 0008041C ; B encoding A1 + 0x00, 0x01, 0x02, 0xEA, // 18: B 00080420 ; B encoding A1 +}; + +// Abs32 locations corresponding to |kDataAarch32ArmMode|, with width = 4. +constexpr offset_t kAbs32Aarch32ArmMode[] = {0x6, 0x13}; + +} // namespace + +TEST(Rel32FinderAArch32Test, IntegratedArmModeWithoutAbs32) { + using AddrType = AArch32Rel32Translator::AddrType; + using Result = Rel32FinderAArch32::Result; + std::vector<Result> expected_results = { + {0x00, 0x80408, AddrType::ADDR_A24}, {0x04, 0x8040C, AddrType::ADDR_A24}, + {0x08, 0x80410, AddrType::ADDR_A24}, {0x0C, 0x80414, AddrType::ADDR_A24}, + {0x10, 0x80418, AddrType::ADDR_A24}, {0x14, 0x8041C, AddrType::ADDR_A24}, + {0x18, 0x80420, AddrType::ADDR_A24}, + }; + + ConstBufferView image = ConstBufferView::FromRange( + std::begin(kDataAarch32ArmMode), std::end(kDataAarch32ArmMode)); + AddressTranslator translator(GetTrivialTranslator(image.size())); + Rel32FinderAArch32 rel32_finder(image, translator, /* is_thumb2 */ false); + + std::vector<Result> results = ArmExtractRel32( + image, /* abs32_locations */ {}, DisassemblerElfAArch32::Traits::kVAWidth, + std::move(rel32_finder)); + + EXPECT_EQ(expected_results, results); +} + +TEST(Rel32FinderAArch32Test, IntegratedArmModeWithAbs32) { + using AddrType = AArch32Rel32Translator::AddrType; + using Result = Rel32FinderAArch32::Result; + std::vector<Result> expected_results = { + {0x00, 0x80408, AddrType::ADDR_A24}, + /* {0x04, 0x8040C, AddrType::ADDR_A24}, */ + /* {0x08, 0x80410, AddrType::ADDR_A24}, */ + {0x0C, 0x80414, AddrType::ADDR_A24}, + /* {0x10, 0x80418, AddrType::ADDR_A24}, */ + /* {0x14, 0x8041C, AddrType::ADDR_A24}, */ + {0x18, 0x80420, AddrType::ADDR_A24}, + }; + + ConstBufferView image = ConstBufferView::FromRange( + std::begin(kDataAarch32ArmMode), std::end(kDataAarch32ArmMode)); + std::vector<offset_t> abs32_locations(std::begin(kAbs32Aarch32ArmMode), + std::end(kAbs32Aarch32ArmMode)); + AddressTranslator translator(GetTrivialTranslator(image.size())); + Rel32FinderAArch32 rel32_finder(image, translator, /* is_thumb2 */ false); + + std::vector<Result> results = ArmExtractRel32( + image, abs32_locations, DisassemblerElfAArch32::Traits::kVAWidth, + std::move(rel32_finder)); + + EXPECT_EQ(expected_results, results); +} + +namespace { + +// AArch32 THUMB2 mode test data. (x) and +x entries are covered by abs32 +// references (if used), which have width = 4. +constexpr uint8_t kDataAarch32Thumb2Mode[] = { + 0x00, 0xDE, // 00: B.AL 00000004 ; B encoding T1 + 0x00, 0xDE, // 02: B.AL 00000006 ; B encoding T1 + 0x00, (0xDE), // 04: B.AL 00000008 ; B encoding T1 + +0x00, +0xDE, // 06: B.AL 0000000A ; B encoding T1 + +0x00, 0xE0, // 08: B 0000000C ; B encoding T2 + 0x00, 0xE0, // 0A: B 0000000E ; B encoding T2 + 0x00, 0xE0, // 0C: B 00000010 ; B encoding T2 + (0x00), +0xE0, // 0E: B 00000012 ; B encoding T2 + +0x00, +0xF0, 0x00, 0x80, // 10: B 00000014 ; B encoding T3 + 0x00, 0xF0, 0x00, 0x80, // 14: B 00000018 ; B encoding T3 + (0x00), +0xF0, +0x00, +0x80, // 18: B 0000001C ; B encoding T3 + 0x00, 0xF0, 0x00, 0x80, // 1C: B 00000020 ; B encoding T3 + 0x00, 0xF0, 0x00, 0xB8, // 20: B 00000024 ; B encoding T4 + 0x00, 0xF0, 0x00, (0xB8), // 24: B 00000028 ; B encoding T4 + +0xFE, +0xDE, // 28: B.AL 00000028 ; B encoding T1 + +0x00, 0xF0, 0x00, 0xF8, // 2A: BL 0000002E ; BL encoding T1 + 0x00, 0xF0, 0x00, 0xE8, // 2E: BLX 00000030 ; BLX encoding T2 + 0x00, 0x0B, // 32: NOP + 0x00, 0xF0, 0x00, 0xE8, // 34: BLX 00000038 ; BLX encoding T2 + 0x00, 0xF0, 0x00, 0xB8, // 38: B 0000003C ; B encoding T4 +}; + +// Abs32 locations corresponding to |kDataAarch32Thumb2Mode|, with width = 4. +constexpr offset_t kAbs32Aarch32Thumb2Mode[] = {0x05, 0x0E, 0x18, 0x27}; + +} // namespace + +TEST(Rel32FinderAArch32Test, IntegratedThumb2ModeWithoutAbs32) { + using AddrType = AArch32Rel32Translator::AddrType; + using Result = Rel32FinderAArch32::Result; + std::vector<Result> expected_results = { + {0x00, 0x04, AddrType::ADDR_T8}, {0x02, 0x06, AddrType::ADDR_T8}, + {0x04, 0x08, AddrType::ADDR_T8}, {0x06, 0x0A, AddrType::ADDR_T8}, + {0x08, 0x0C, AddrType::ADDR_T11}, {0x0A, 0x0E, AddrType::ADDR_T11}, + {0x0C, 0x10, AddrType::ADDR_T11}, {0x0E, 0x12, AddrType::ADDR_T11}, + {0x10, 0x14, AddrType::ADDR_T20}, {0x14, 0x18, AddrType::ADDR_T20}, + {0x18, 0x1C, AddrType::ADDR_T20}, {0x1C, 0x20, AddrType::ADDR_T20}, + {0x20, 0x24, AddrType::ADDR_T24}, {0x24, 0x28, AddrType::ADDR_T24}, + {0x28, 0x28, AddrType::ADDR_T8}, {0x2A, 0x2E, AddrType::ADDR_T24}, + {0x2E, 0x30, AddrType::ADDR_T24}, {0x34, 0x38, AddrType::ADDR_T24}, + {0x38, 0x3C, AddrType::ADDR_T24}, + }; + + ConstBufferView image = ConstBufferView::FromRange( + std::begin(kDataAarch32Thumb2Mode), std::end(kDataAarch32Thumb2Mode)); + AddressTranslator translator(GetTrivialTranslator(image.size())); + Rel32FinderAArch32 rel32_finder(image, translator, /* is_thumb2 */ true); + + std::vector<Result> results = ArmExtractRel32( + image, /* abs32_locations */ {}, DisassemblerElfAArch32::Traits::kVAWidth, + std::move(rel32_finder)); + + EXPECT_EQ(expected_results, results); +} + +TEST(Rel32FinderAArch32Test, IntegratedThumb2ModeWithAbs32) { + using AddrType = AArch32Rel32Translator::AddrType; + using Result = Rel32FinderAArch32::Result; + std::vector<Result> expected_results = { + {0x00, 0x04, AddrType::ADDR_T8}, + {0x02, 0x06, AddrType::ADDR_T8}, + /* {0x04, 0x08, AddrType::ADDR_T8}, */ + /* {0x06, 0x0A, AddrType::ADDR_T8}, */ + /* {0x08, 0x0C, AddrType::ADDR_T11}, */ + {0x0A, 0x0E, AddrType::ADDR_T11}, + {0x0C, 0x10, AddrType::ADDR_T11}, + /* {0x0E, 0x12, AddrType::ADDR_T11}, */ + /* {0x10, 0x14, AddrType::ADDR_T20}, */ + {0x14, 0x18, AddrType::ADDR_T20}, + /* {0x18, 0x1C, AddrType::ADDR_T20}, */ + {0x1C, 0x20, AddrType::ADDR_T20}, + {0x20, 0x24, AddrType::ADDR_T24}, + /* {0x24, 0x28, AddrType::ADDR_T24}, */ + /* {0x28, 0x28, AddrType::ADDR_T8}, */ + /* {0x2A, 0x2E, AddrType::ADDR_T24}, */ + // Abs32 reference 0x27 disrupts alignment, and THUMB2 disassembly starts + // at 0x2C, causing the following to be excluded! + /* {0x2E, 0x30, AddrType::ADDR_T24}, */ + {0x34, 0x38, AddrType::ADDR_T24}, + {0x38, 0x3C, AddrType::ADDR_T24}, + }; + + ConstBufferView image = ConstBufferView::FromRange( + std::begin(kDataAarch32Thumb2Mode), std::end(kDataAarch32Thumb2Mode)); + std::vector<offset_t> abs32_locations(std::begin(kAbs32Aarch32Thumb2Mode), + std::end(kAbs32Aarch32Thumb2Mode)); + AddressTranslator translator(GetTrivialTranslator(image.size())); + Rel32FinderAArch32 rel32_finder(image, translator, /* is_thumb2 */ true); + + std::vector<Result> results = ArmExtractRel32( + image, abs32_locations, DisassemblerElfAArch32::Traits::kVAWidth, + std::move(rel32_finder)); + + EXPECT_EQ(expected_results, results); +} + +namespace { + +// AArch32 THUMB2 mode test data. (x) and +x entries are covered by abs32 +// references (if used), which have width = 8. +constexpr uint8_t kDataAarch64[] = { + 0x0E, 0x00, 0x00, 0x36, // 00: TBZ X0,#0,00000000 ; Immd14 + 0x0E, 0x00, 0x00, (0x36), // 04: TBZ X0,#0,00000004 ; Immd14 + +0x0E, +0x00, +0x00, +0x36, // 08: TBZ X0,#0,00000008 ; Immd14 + +0x0E, +0x00, +0x00, 0x54, // 0C: B.AL 0000000C ; Immd19 + 0x0E, 0x00, 0x00, 0x54, // 10: B.AL 00000010 ; Immd19 + (0x0E), +0x00, +0x00, +0x54, // 14: B.AL 00000014 ; Immd19 + +0x00, +0x00, +0x00, +0x94, // 18: BL 00000018 ; Immd26 + 0x00, 0x00, 0x00, 0x14, // 1C: B 0000001C ; Immd26 + 0x00, 0x00, 0x00, 0x94, // 20: BL 00000020 ; Immd26 + 0x00, 0x00, 0x00, 0x14, // 24: B 00000024 ; Immd26 +}; + +// Abs32 locations corresponding to |kDataAarch64|, with width = 8. +constexpr offset_t kAbs32Aarch64[] = {0x07, 0x14}; + +} // namespace + +TEST(Rel32FinderAArch64Test, IntegratedWithoutAbs32) { + using AddrType = AArch64Rel32Translator::AddrType; + using Result = Rel32FinderAArch64::Result; + std::vector<Result> expected_results = { + {0x00, 0x00, AddrType::ADDR_IMMD14}, {0x04, 0x04, AddrType::ADDR_IMMD14}, + {0x08, 0x08, AddrType::ADDR_IMMD14}, {0x0C, 0x0C, AddrType::ADDR_IMMD19}, + {0x10, 0x10, AddrType::ADDR_IMMD19}, {0x14, 0x14, AddrType::ADDR_IMMD19}, + {0x18, 0x18, AddrType::ADDR_IMMD26}, {0x1C, 0x1C, AddrType::ADDR_IMMD26}, + {0x20, 0x20, AddrType::ADDR_IMMD26}, {0x24, 0x24, AddrType::ADDR_IMMD26}, + }; + + ConstBufferView image = ConstBufferView::FromRange(std::begin(kDataAarch64), + std::end(kDataAarch64)); + AddressTranslator translator(GetTrivialTranslator(image.size())); + Rel32FinderAArch64 rel32_finder(image, translator); + + std::vector<Result> results = ArmExtractRel32( + image, /* abs32_locations */ {}, DisassemblerElfAArch64::Traits::kVAWidth, + std::move(rel32_finder)); + + EXPECT_EQ(expected_results, results); +} + +TEST(Rel32FinderAArch64Test, IntegratedWithAbs32) { + using AddrType = AArch64Rel32Translator::AddrType; + using Result = Rel32FinderAArch64::Result; + std::vector<Result> expected_results = { + {0x00, 0x00, AddrType::ADDR_IMMD14}, + /* {0x04, 0x04, AddrType::ADDR_IMMD14}, */ + /* {0x08, 0x08, AddrType::ADDR_IMMD14}, */ + /* {0x0C, 0x0C, AddrType::ADDR_IMMD19}, */ + {0x10, 0x10, AddrType::ADDR_IMMD19}, + /* {0x14, 0x14, AddrType::ADDR_IMMD19}, */ + /* {0x18, 0x18, AddrType::ADDR_IMMD26}, */ + {0x1C, 0x1C, AddrType::ADDR_IMMD26}, + {0x20, 0x20, AddrType::ADDR_IMMD26}, + {0x24, 0x24, AddrType::ADDR_IMMD26}, + }; + + ConstBufferView image = ConstBufferView::FromRange(std::begin(kDataAarch64), + std::end(kDataAarch64)); + std::vector<offset_t> abs32_locations(std::begin(kAbs32Aarch64), + std::end(kAbs32Aarch64)); + AddressTranslator translator(GetTrivialTranslator(image.size())); + Rel32FinderAArch64 rel32_finder(image, translator); + + std::vector<Result> results = ArmExtractRel32( + image, abs32_locations, DisassemblerElfAArch64::Traits::kVAWidth, + std::move(rel32_finder)); + + EXPECT_EQ(expected_results, results); +} + +} // namespace zucchini diff --git a/rel32_utils.cc b/rel32_utils.cc new file mode 100644 index 0000000..c22cb23 --- /dev/null +++ b/rel32_utils.cc @@ -0,0 +1,67 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/rel32_utils.h" + +#include <algorithm> + +#include "base/check_op.h" +#include "components/zucchini/io_utils.h" + +namespace zucchini { + +/******** Rel32ReaderX86 ********/ + +Rel32ReaderX86::Rel32ReaderX86(ConstBufferView image, + offset_t lo, + offset_t hi, + const std::deque<offset_t>* locations, + const AddressTranslator& translator) + : image_(image), + target_rva_to_offset_(translator), + location_offset_to_rva_(translator), + hi_(hi), + last_(locations->end()) { + DCHECK_LE(lo, image.size()); + DCHECK_LE(hi, image.size()); + current_ = std::lower_bound(locations->begin(), locations->end(), lo); +} + +Rel32ReaderX86::~Rel32ReaderX86() = default; + +absl::optional<Reference> Rel32ReaderX86::GetNext() { + while (current_ < last_ && *current_ < hi_) { + offset_t loc_offset = *(current_++); + DCHECK_LE(loc_offset + 4, image_.size()); // Sanity check. + rva_t loc_rva = location_offset_to_rva_.Convert(loc_offset); + rva_t target_rva = loc_rva + 4 + image_.read<int32_t>(loc_offset); + offset_t target_offset = target_rva_to_offset_.Convert(target_rva); + // |locations| is valid by assumption (see class description). + DCHECK_NE(kInvalidOffset, target_offset); + return Reference{loc_offset, target_offset}; + } + return absl::nullopt; +} + +/******** Rel32ReceptorX86 ********/ + +Rel32WriterX86::Rel32WriterX86(MutableBufferView image, + const AddressTranslator& translator) + : image_(image), + target_offset_to_rva_(translator), + location_offset_to_rva_(translator) {} + +Rel32WriterX86::~Rel32WriterX86() = default; + +void Rel32WriterX86::PutNext(Reference ref) { + rva_t target_rva = target_offset_to_rva_.Convert(ref.target); + rva_t loc_rva = location_offset_to_rva_.Convert(ref.location); + + // Subtraction underflow is okay + uint32_t code = + static_cast<uint32_t>(target_rva) - (static_cast<uint32_t>(loc_rva) + 4); + image_.write<uint32_t>(ref.location, code); +} + +} // namespace zucchini diff --git a/rel32_utils.h b/rel32_utils.h new file mode 100644 index 0000000..f54c5cd --- /dev/null +++ b/rel32_utils.h @@ -0,0 +1,184 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_REL32_UTILS_H_ +#define COMPONENTS_ZUCCHINI_REL32_UTILS_H_ + +#include <algorithm> +#include <deque> +#include <memory> + +#include "base/logging.h" +#include "components/zucchini/address_translator.h" +#include "components/zucchini/arm_utils.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/image_utils.h" +#include "components/zucchini/io_utils.h" +#include "third_party/abseil-cpp/absl/types/optional.h" + +namespace zucchini { + +// Reader that emits x86 / x64 References (locations and target) from a list of +// valid locations, constrained by a portion of an image. +class Rel32ReaderX86 : public ReferenceReader { + public: + // |image| is an image containing x86 / x64 code in [|lo|, |hi|). + // |locations| is a sorted list of offsets of rel32 reference locations. + // |translator| (for |image|) is embedded into |target_rva_to_offset_| and + // |location_offset_to_rva_| for address translation, and therefore must + // outlive |*this|. + Rel32ReaderX86(ConstBufferView image, + offset_t lo, + offset_t hi, + const std::deque<offset_t>* locations, + const AddressTranslator& translator); + Rel32ReaderX86(const Rel32ReaderX86&) = delete; + const Rel32ReaderX86& operator=(const Rel32ReaderX86&) = delete; + ~Rel32ReaderX86() override; + + // Returns the next reference, or absl::nullopt if exhausted. + absl::optional<Reference> GetNext() override; + + private: + ConstBufferView image_; + AddressTranslator::RvaToOffsetCache target_rva_to_offset_; + AddressTranslator::OffsetToRvaCache location_offset_to_rva_; + const offset_t hi_; + const std::deque<offset_t>::const_iterator last_; + std::deque<offset_t>::const_iterator current_; +}; + +// Writer for x86 / x64 rel32 References. +class Rel32WriterX86 : public ReferenceWriter { + public: + // |image| wraps the raw bytes of a binary in which rel32 references will be + // written. |translator| (for |image|) is embedded into + // |target_offset_to_rva_| and |location_offset_to_rva_| for address + // translation, and therefore must outlive |*this|. + Rel32WriterX86(MutableBufferView image, const AddressTranslator& translator); + Rel32WriterX86(const Rel32WriterX86&) = delete; + const Rel32WriterX86& operator=(const Rel32WriterX86&) = delete; + ~Rel32WriterX86() override; + + void PutNext(Reference ref) override; + + private: + MutableBufferView image_; + AddressTranslator::OffsetToRvaCache target_offset_to_rva_; + AddressTranslator::OffsetToRvaCache location_offset_to_rva_; +}; + +// Reader that emits x86 / x64 References (locations and target) of a spcific +// type from a list of valid locations, constrained by a portion of an image. +template <class ADDR_TRAITS> +class Rel32ReaderArm : public ReferenceReader { + public: + using CODE_T = typename ADDR_TRAITS::code_t; + + Rel32ReaderArm(const AddressTranslator& translator, + ConstBufferView view, + const std::deque<offset_t>& rel32_locations, + offset_t lo, + offset_t hi) + : view_(view), + offset_to_rva_(translator), + rva_to_offset_(translator), + hi_(hi) { + cur_it_ = + std::lower_bound(rel32_locations.begin(), rel32_locations.end(), lo); + rel32_end_ = rel32_locations.end(); + } + + Rel32ReaderArm(const Rel32ReaderArm&) = delete; + const Rel32ReaderArm& operator=(const Rel32ReaderArm&) = delete; + + absl::optional<Reference> GetNext() override { + while (cur_it_ < rel32_end_ && *cur_it_ < hi_) { + offset_t location = *(cur_it_++); + CODE_T code = ADDR_TRAITS::Fetch(view_, location); + rva_t instr_rva = offset_to_rva_.Convert(location); + rva_t target_rva = kInvalidRva; + if (ADDR_TRAITS::Read(instr_rva, code, &target_rva)) { + offset_t target = rva_to_offset_.Convert(target_rva); + if (target != kInvalidOffset) + return Reference{location, target}; + } + } + return absl::nullopt; + } + + private: + ConstBufferView view_; + AddressTranslator::OffsetToRvaCache offset_to_rva_; + AddressTranslator::RvaToOffsetCache rva_to_offset_; + std::deque<offset_t>::const_iterator cur_it_; + std::deque<offset_t>::const_iterator rel32_end_; + offset_t hi_; +}; + +// Writer for ARM rel32 References of a specific type. +template <class ADDR_TRAITS> +class Rel32WriterArm : public ReferenceWriter { + public: + using CODE_T = typename ADDR_TRAITS::code_t; + + Rel32WriterArm(const AddressTranslator& translator, + MutableBufferView mutable_view) + : mutable_view_(mutable_view), offset_to_rva_(translator) {} + + Rel32WriterArm(const Rel32WriterArm&) = delete; + const Rel32WriterArm& operator=(const Rel32WriterArm&) = delete; + + void PutNext(Reference ref) override { + CODE_T code = ADDR_TRAITS::Fetch(mutable_view_, ref.location); + rva_t instr_rva = offset_to_rva_.Convert(ref.location); + rva_t target_rva = offset_to_rva_.Convert(ref.target); + if (ADDR_TRAITS::Write(instr_rva, target_rva, &code)) { + ADDR_TRAITS::Store(mutable_view_, ref.location, code); + } else { + LOG(ERROR) << "Write error: " << AsHex<8>(ref.location) << ": " + << AsHex<static_cast<int>(sizeof(CODE_T)) * 2>(code) + << " <= " << AsHex<8>(target_rva) << "."; + } + } + + private: + MutableBufferView mutable_view_; + AddressTranslator::OffsetToRvaCache offset_to_rva_; +}; + +// Type for specialized versions of ArmCopyDisp(). +// TODO(etiennep/huangs): Fold ReferenceByteMixer into Disassembler and remove +// direct function pointer usage. +using ArmCopyDispFun = bool (*)(ConstBufferView src_view, + offset_t src_idx, + MutableBufferView dst_view, + offset_t dst_idx); + +// Copier that makes |*dst_it| similar to |*src_it| (both assumed to point to +// rel32 instructions of type ADDR_TRAITS) by copying the displacement (i.e., +// payload bits) from |src_it| to |dst_it|. If successful, updates |*dst_it|, +// and returns true. Otherwise returns false. Note that alignment is not an +// issue since the displacement is not translated to target RVA! +template <class ADDR_TRAITS> +bool ArmCopyDisp(ConstBufferView src_view, + offset_t src_idx, + MutableBufferView dst_view, + offset_t dst_idx) { + using CODE_T = typename ADDR_TRAITS::code_t; + CODE_T src_code = ADDR_TRAITS::Fetch(src_view, src_idx); + arm_disp_t disp = 0; + if (ADDR_TRAITS::Decode(src_code, &disp)) { + CODE_T dst_code = ADDR_TRAITS::Fetch(dst_view, dst_idx); + if (ADDR_TRAITS::Encode(disp, &dst_code)) { + ADDR_TRAITS::Store(dst_view, dst_idx, dst_code); + return true; + } + } + return false; +} + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_REL32_UTILS_H_ diff --git a/rel32_utils_unittest.cc b/rel32_utils_unittest.cc new file mode 100644 index 0000000..f4a6bde --- /dev/null +++ b/rel32_utils_unittest.cc @@ -0,0 +1,541 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/rel32_utils.h" + +#include <stdint.h> + +#include <deque> +#include <memory> +#include <utility> +#include <vector> + +#include "base/test/gtest_util.h" +#include "components/zucchini/address_translator.h" +#include "components/zucchini/arm_utils.h" +#include "components/zucchini/image_utils.h" +#include "testing/gtest/include/gtest/gtest.h" +#include "third_party/abseil-cpp/absl/types/optional.h" + +namespace zucchini { + +namespace { + +// A trivial AddressTranslator that applies constant shift. +class TestAddressTranslator : public AddressTranslator { + public: + TestAddressTranslator(offset_t image_size, rva_t rva_begin) { + DCHECK_GE(rva_begin, 0U); + CHECK_EQ(AddressTranslator::kSuccess, + Initialize({{0, image_size, rva_begin, image_size}})); + } +}; + +// Checks that |reader| emits and only emits |expected_refs|, in order. +void CheckReader(const std::vector<Reference>& expected_refs, + std::unique_ptr<ReferenceReader> reader) { + for (Reference expected_ref : expected_refs) { + auto ref = reader->GetNext(); + EXPECT_TRUE(ref.has_value()); + EXPECT_EQ(expected_ref, ref.value()); + } + EXPECT_EQ(absl::nullopt, reader->GetNext()); // Nothing should be left. +} + +// Copies displacements from |bytes1| to |bytes2| and checks results against +// |bytes_exp_1_to_2|. Then repeats for |*bytes2| , |*byte1|, and +// |bytes_exp_2_to_1|. Empty expected bytes mean failure is expected. The copy +// function is specified by |copier|. +void CheckCopy(const std::vector<uint8_t>& bytes_exp_1_to_2, + const std::vector<uint8_t>& bytes_exp_2_to_1, + const std::vector<uint8_t>& bytes1, + const std::vector<uint8_t>& bytes2, + ArmCopyDispFun copier) { + auto run_test = [&copier](const std::vector<uint8_t>& bytes_exp, + const std::vector<uint8_t>& bytes_in, + std::vector<uint8_t> bytes_out) { + ConstBufferView buffer_in(&bytes_in[0], bytes_in.size()); + MutableBufferView buffer_out(&bytes_out[0], bytes_out.size()); + if (bytes_exp.empty()) { + EXPECT_FALSE(copier(buffer_in, 0U, buffer_out, 0U)); + } else { + EXPECT_TRUE(copier(buffer_in, 0U, buffer_out, 0U)); + EXPECT_EQ(bytes_exp, bytes_out); + } + }; + run_test(bytes_exp_1_to_2, bytes1, bytes2); + run_test(bytes_exp_2_to_1, bytes2, bytes1); +} + +} // namespace + +TEST(Rel32UtilsTest, Rel32ReaderX86) { + constexpr offset_t kTestImageSize = 0x00100000U; + constexpr rva_t kRvaBegin = 0x00030000U; + TestAddressTranslator translator(kTestImageSize, kRvaBegin); + + // For simplicity, test data is not real X86 machine code. We are only + // including rel32 targets, without the full instructions. + std::vector<uint8_t> bytes = { + 0xFF, 0xFF, 0xFF, 0xFF, // 00030000: (Filler) + 0xFF, 0xFF, 0xFF, 0xFF, // 0003000C: (Filler) + 0x04, 0x00, 0x00, 0x00, // 00030008: 00030010 + 0xFF, 0xFF, 0xFF, 0xFF, // 0003000C: (Filler) + 0x00, 0x00, 0x00, 0x00, // 00030010: 00030014 + 0xFF, 0xFF, 0xFF, 0xFF, // 00030014: (Filler) + 0xF4, 0xFF, 0xFF, 0xFF, // 00030018: 00030010 + 0xE4, 0xFF, 0xFF, 0xFF, // 0003001C: 00030004 + }; + ConstBufferView buffer(bytes.data(), bytes.size()); + // Specify rel32 locations directly, instead of parsing. + std::deque<offset_t> rel32_locations = {0x0008U, 0x0010U, 0x0018U, 0x001CU}; + + // Generate everything. + auto reader1 = std::make_unique<Rel32ReaderX86>(buffer, 0x0000U, 0x0020U, + &rel32_locations, translator); + CheckReader({{0x0008U, 0x0010U}, + {0x0010U, 0x0014U}, + {0x0018U, 0x0010U}, + {0x001CU, 0x0004U}}, + std::move(reader1)); + + // Exclude last. + auto reader2 = std::make_unique<Rel32ReaderX86>(buffer, 0x0000U, 0x001CU, + &rel32_locations, translator); + CheckReader({{0x0008U, 0x0010U}, {0x0010U, 0x0014U}, {0x0018U, 0x0010U}}, + std::move(reader2)); + + // Only find one. + auto reader3 = std::make_unique<Rel32ReaderX86>(buffer, 0x000CU, 0x0018U, + &rel32_locations, translator); + CheckReader({{0x0010U, 0x0014U}}, std::move(reader3)); +} + +TEST(Rel32UtilsTest, Rel32WriterX86) { + constexpr offset_t kTestImageSize = 0x00100000U; + constexpr rva_t kRvaBegin = 0x00030000U; + TestAddressTranslator translator(kTestImageSize, kRvaBegin); + + std::vector<uint8_t> bytes(32, 0xFF); + MutableBufferView buffer(bytes.data(), bytes.size()); + + Rel32WriterX86 writer(buffer, translator); + writer.PutNext({0x0008U, 0x0010U}); + EXPECT_EQ(0x00000004U, buffer.read<uint32_t>(0x08)); // 00030008: 00030010 + + writer.PutNext({0x0010U, 0x0014U}); + EXPECT_EQ(0x00000000U, buffer.read<uint32_t>(0x10)); // 00030010: 00030014 + + writer.PutNext({0x0018U, 0x0010U}); + EXPECT_EQ(0xFFFFFFF4U, buffer.read<uint32_t>(0x18)); // 00030018: 00030010 + + writer.PutNext({0x001CU, 0x0004U}); + EXPECT_EQ(0xFFFFFFE4U, buffer.read<uint32_t>(0x1C)); // 0003001C: 00030004 + + EXPECT_EQ(std::vector<uint8_t>({ + 0xFF, 0xFF, 0xFF, 0xFF, // 00030000: (Filler) + 0xFF, 0xFF, 0xFF, 0xFF, // 00030004: (Filler) + 0x04, 0x00, 0x00, 0x00, // 00030008: 00030010 + 0xFF, 0xFF, 0xFF, 0xFF, // 0003000C: (Filler) + 0x00, 0x00, 0x00, 0x00, // 00030010: 00030014 + 0xFF, 0xFF, 0xFF, 0xFF, // 00030014: (Filler) + 0xF4, 0xFF, 0xFF, 0xFF, // 00030018: 00030010 + 0xE4, 0xFF, 0xFF, 0xFF, // 0003001C: 00030004 + }), + bytes); +} + +TEST(Rel32UtilsTest, Rel32ReaderArm_AArch32) { + constexpr offset_t kTestImageSize = 0x00100000U; + constexpr rva_t kRvaBegin = 0x00030000U; + TestAddressTranslator translator(kTestImageSize, kRvaBegin); + + // A24. + std::vector<uint8_t> bytes = { + 0xFF, 0xFF, 0xFF, 0xFF, // 00030000: (Filler) + 0xFF, 0xFF, 0xFF, 0xFF, // 00030004: (Filler) + 0x00, 0x00, 0x00, 0xEA, // 00030008: B 00030010 ; A24 + 0xFF, 0xFF, 0xFF, 0xFF, // 0003000C: (Filler) + 0xFF, 0xFF, 0xFF, 0xEB, // 00030010: BL 00030014 ; A24 + 0xFF, 0xFF, 0xFF, 0xFF, // 00030014: (Filler) + 0xFC, 0xFF, 0xFF, 0xEB, // 00030018: BL 00030010 ; A24 + 0xF8, 0xFF, 0xFF, 0xEA, // 0003001C: B 00030004 ; A24 + }; + ConstBufferView region(&bytes[0], bytes.size()); + // Specify rel32 locations directly, instead of parsing. + std::deque<offset_t> rel32_locations_A24 = {0x0008U, 0x0010U, 0x0018U, + 0x001CU}; + + // Generate everything. + auto reader1 = + std::make_unique<Rel32ReaderArm<AArch32Rel32Translator::AddrTraits_A24>>( + translator, region, rel32_locations_A24, 0x0000U, 0x0020U); + CheckReader({{0x0008U, 0x0010U}, + {0x0010U, 0x0014U}, + {0x0018U, 0x0010U}, + {0x001CU, 0x0004U}}, + std::move(reader1)); + + // Exclude last. + auto reader2 = + std::make_unique<Rel32ReaderArm<AArch32Rel32Translator::AddrTraits_A24>>( + translator, region, rel32_locations_A24, 0x0000U, 0x001CU); + CheckReader({{0x0008U, 0x0010U}, {0x0010U, 0x0014U}, {0x0018U, 0x0010U}}, + std::move(reader2)); + + // Only find one. + auto reader3 = + std::make_unique<Rel32ReaderArm<AArch32Rel32Translator::AddrTraits_A24>>( + translator, region, rel32_locations_A24, 0x000CU, 0x0018U); + CheckReader({{0x0010U, 0x0014U}}, std::move(reader3)); +} + +TEST(Rel32UtilsTest, Rel32WriterArm_AArch32_Easy) { + constexpr offset_t kTestImageSize = 0x00100000U; + constexpr rva_t kRvaBegin = 0x00030000U; + TestAddressTranslator translator(kTestImageSize, kRvaBegin); + + std::vector<uint8_t> bytes = { + 0xFF, 0xFF, // 00030000: (Filler) + 0x01, 0xDE, // 00030002: B 00030008 ; T8 + 0xFF, 0xFF, 0xFF, 0xFF, // 00030004: (Filler) + 0x01, 0xE0, // 00030008: B 0003000E ; T11 + 0xFF, 0xFF, // 0003000A: (Filler) + 0x80, 0xF3, 0x00, 0x80, // 0003000C: B 00030010 ; T20 + }; + MutableBufferView region(&bytes[0], bytes.size()); + + auto writer1 = + std::make_unique<Rel32WriterArm<AArch32Rel32Translator::AddrTraits_T8>>( + translator, region); + writer1->PutNext({0x0002U, 0x0004U}); + EXPECT_EQ(0xFF, bytes[0x02]); // 00030002: B 00030004 ; T8 + EXPECT_EQ(0xDE, bytes[0x03]); + + writer1->PutNext({0x0002U, 0x000AU}); + EXPECT_EQ(0x02, bytes[0x02]); // 00030002: B 0003000A ; T8 + EXPECT_EQ(0xDE, bytes[0x03]); + + auto writer2 = + std::make_unique<Rel32WriterArm<AArch32Rel32Translator::AddrTraits_T11>>( + translator, region); + writer2->PutNext({0x0008U, 0x0008U}); + EXPECT_EQ(0xFE, bytes[0x08]); // 00030008: B 00030008 ; T11 + EXPECT_EQ(0xE7, bytes[0x09]); + writer2->PutNext({0x0008U, 0x0010U}); + EXPECT_EQ(0x02, bytes[0x08]); // 00030008: B 00030010 ; T11 + EXPECT_EQ(0xE0, bytes[0x09]); + + auto writer3 = + std::make_unique<Rel32WriterArm<AArch32Rel32Translator::AddrTraits_T20>>( + translator, region); + writer3->PutNext({0x000CU, 0x000AU}); + EXPECT_EQ(0xBF, bytes[0x0C]); // 0003000C: B 0003000A ; T20 + EXPECT_EQ(0xF7, bytes[0x0D]); + EXPECT_EQ(0xFD, bytes[0x0E]); + EXPECT_EQ(0xAF, bytes[0x0F]); + writer3->PutNext({0x000CU, 0x0010U}); + EXPECT_EQ(0x80, bytes[0x0C]); // 0003000C: B 00030010 ; T20 + EXPECT_EQ(0xF3, bytes[0x0D]); + EXPECT_EQ(0x00, bytes[0x0E]); + EXPECT_EQ(0x80, bytes[0x0F]); +} + +TEST(Rel32UtilsTest, Rel32WriterArm_AArch32_Hard) { + constexpr offset_t kTestImageSize = 0x10000000U; + constexpr rva_t kRvaBegin = 0x0C030000U; + TestAddressTranslator translator(kTestImageSize, kRvaBegin); + + std::vector<uint8_t> bytes = { + 0xFF, 0xFF, // 0C030000: (Filler) + 0x00, 0xF0, 0x00, 0xB8, // 0C030002: B 0C030006 ; T24 + 0xFF, 0xFF, 0xFF, 0xFF, // 0C030006: (Filler) + 0x00, 0xF0, 0x7A, 0xE8, // 0C03000A: BLX 0C030100 ; T24 + 0xFF, 0xFF, // 0C03000E: (Filler) + 0x00, 0xF0, 0x7A, 0xE8, // 0C030010: BLX 0C030108 ; T24 + }; + MutableBufferView region(&bytes[0], bytes.size()); + + auto writer = + std::make_unique<Rel32WriterArm<AArch32Rel32Translator::AddrTraits_T24>>( + translator, region); + writer->PutNext({0x0002U, 0x0000U}); + EXPECT_EQ(0xFF, bytes[0x02]); // 0C030002: B 0C030000 ; T24 + EXPECT_EQ(0xF7, bytes[0x03]); + EXPECT_EQ(0xFD, bytes[0x04]); + EXPECT_EQ(0xBF, bytes[0x05]); + writer->PutNext({0x0002U, 0x0008U}); + EXPECT_EQ(0x00, bytes[0x02]); // 0C030002: B 0C030008 ; T24 + EXPECT_EQ(0xF0, bytes[0x03]); + EXPECT_EQ(0x01, bytes[0x04]); + EXPECT_EQ(0xB8, bytes[0x05]); + + // BLX complication, with location that's not 4-byte aligned. + writer->PutNext({0x000AU, 0x0010U}); + EXPECT_EQ(0x00, bytes[0x0A]); // 0C03000A: BLX 0C030010 ; T24 + EXPECT_EQ(0xF0, bytes[0x0B]); + EXPECT_EQ(0x02, bytes[0x0C]); + EXPECT_EQ(0xE8, bytes[0x0D]); + writer->PutNext({0x000AU, 0x0100U}); + EXPECT_EQ(0x00, bytes[0x0A]); // 0C03000A: BLX 0C030100 ; T24 + EXPECT_EQ(0xF0, bytes[0x0B]); + EXPECT_EQ(0x7A, bytes[0x0C]); + EXPECT_EQ(0xE8, bytes[0x0D]); + writer->PutNext({0x000AU, 0x0000U}); + EXPECT_EQ(0xFF, bytes[0x0A]); // 0C03000A: BLX 0C030000 ; T24 + EXPECT_EQ(0xF7, bytes[0x0B]); + EXPECT_EQ(0xFA, bytes[0x0C]); + EXPECT_EQ(0xEF, bytes[0x0D]); + + // BLX complication, with location that's 4-byte aligned. + writer->PutNext({0x0010U, 0x0010U}); + EXPECT_EQ(0xFF, bytes[0x10]); // 0C030010: BLX 0C030010 ; T24 + EXPECT_EQ(0xF7, bytes[0x11]); + EXPECT_EQ(0xFE, bytes[0x12]); + EXPECT_EQ(0xEF, bytes[0x13]); + writer->PutNext({0x0010U, 0x0108U}); + EXPECT_EQ(0x00, bytes[0x10]); // 0C030010: BLX 0C030108 ; T24 + EXPECT_EQ(0xF0, bytes[0x11]); + EXPECT_EQ(0x7A, bytes[0x12]); + EXPECT_EQ(0xE8, bytes[0x13]); +} + +// Test BLX encoding A2, which is an ARM instruction that switches to THUMB2, +// and therefore should have 2-byte alignment. +TEST(Rel32UtilsTest, AArch32SwitchToThumb2) { + constexpr offset_t kTestImageSize = 0x10000000U; + constexpr rva_t kRvaBegin = 0x08030000U; + TestAddressTranslator translator(kTestImageSize, kRvaBegin); + + std::vector<uint8_t> bytes = { + 0xFF, 0xFF, 0x00, 0x00, // 08030000: (Filler) + 0x00, 0x00, 0x00, 0xFA, // 08030004: BLX 0803000C ; A24 + }; + MutableBufferView region(&bytes[0], bytes.size()); + + auto writer = + std::make_unique<Rel32WriterArm<AArch32Rel32Translator::AddrTraits_A24>>( + translator, region); + + // To location that's 4-byte aligned. + writer->PutNext({0x0004U, 0x0100U}); + EXPECT_EQ(0x3D, bytes[0x04]); // 08030004: BLX 08030100 ; A24 + EXPECT_EQ(0x00, bytes[0x05]); + EXPECT_EQ(0x00, bytes[0x06]); + EXPECT_EQ(0xFA, bytes[0x07]); + + // To location that's 2-byte aligned but not 4-byte aligned. + writer->PutNext({0x0004U, 0x0052U}); + EXPECT_EQ(0x11, bytes[0x04]); // 08030004: BLX 08030052 ; A24 + EXPECT_EQ(0x00, bytes[0x05]); + EXPECT_EQ(0x00, bytes[0x06]); + EXPECT_EQ(0xFB, bytes[0x07]); + + // Clean slate code. + writer->PutNext({0x0004U, 0x000CU}); + EXPECT_EQ(0x00, bytes[0x04]); // 08030004: BLX 0803000C ; A24 + EXPECT_EQ(0x00, bytes[0x05]); + EXPECT_EQ(0x00, bytes[0x06]); + EXPECT_EQ(0xFA, bytes[0x07]); +} + +TEST(Rel32UtilsTest, ArmCopyDisp_AArch32) { + std::vector<uint8_t> expect_fail; + + // Successful A24. + ArmCopyDispFun copier_A24 = + ArmCopyDisp<AArch32Rel32Translator::AddrTraits_A24>; + CheckCopy({0x12, 0x34, 0x56, 0xEB}, // 00000100: BL 0158D150 + {0xA0, 0xC0, 0x0E, 0x2A}, // 00000100: BCS 003B0388 + {0x12, 0x34, 0x56, 0x2A}, // 00000100: BCS 0158D150 + {0xA0, 0xC0, 0x0E, 0xEB}, // 00000100: BL 003B0388 + copier_A24); + + // Successful T8. + ArmCopyDispFun copier_T8 = ArmCopyDisp<AArch32Rel32Translator::AddrTraits_T8>; + CheckCopy({0x12, 0xD5}, // 00000100: BPL 00000128 + {0xAB, 0xD8}, // 00000100: BHI 0000005A + {0x12, 0xD8}, // 00000100: BHI 00000128 + {0xAB, 0xD5}, // 00000100: BPL 0000005A + copier_T8); + + // Successful T11. + ArmCopyDispFun copier_T11 = + ArmCopyDisp<AArch32Rel32Translator::AddrTraits_T11>; + CheckCopy({0xF5, 0xE0}, // 00000100: B 000002EE + {0x12, 0xE7}, // 00000100: B FFFFFF28 + {0xF5, 0xE0}, // 00000100: B 000002EE + {0x12, 0xE7}, // 00000100: B FFFFFF28 + copier_T11); + + // Failure if wrong copier is used. + CheckCopy(expect_fail, expect_fail, {0xF5, 0xE0}, {0x12, 0xE7}, copier_T8); + + // Successful T20. + ArmCopyDispFun copier_T20 = + ArmCopyDisp<AArch32Rel32Translator::AddrTraits_T20>; + CheckCopy({0x41, 0xF2, 0xA5, 0x88}, // 00000100: BLS.W 0008124E + {0x04, 0xF3, 0x3C, 0xA2}, // 00000100: BGT.W 0004457C + {0x01, 0xF3, 0xA5, 0x88}, // 00000100: BGT.W 0008124E + {0x44, 0xF2, 0x3C, 0xA2}, // 00000100: BLS.W 0004457C + copier_T20); + CheckCopy({0x7F, 0xF6, 0xFF, 0xAF}, // 00000100: BLS.W 00000102 + {0x00, 0xF3, 0x00, 0x80}, // 00000100: BGT.W 00000104 + {0x3F, 0xF7, 0xFF, 0xAF}, // 00000100: BGT.W 00000102 + {0x40, 0xF2, 0x00, 0x80}, // 00000100: BLS.W 00000104 + copier_T20); + + // Failure if wrong copier is used. + CheckCopy(expect_fail, expect_fail, {0x41, 0xF2, 0xA5, 0x88}, + {0x84, 0xF3, 0x3C, 0xA2}, copier_A24); + + // T24: Mix B encoding T4 and BL encoding T1. + ArmCopyDispFun copier_T24 = + ArmCopyDisp<AArch32Rel32Translator::AddrTraits_T24>; + CheckCopy({0xFF, 0xF7, 0xFF, 0xFF}, // 00000100: BL 00000102 + {0x00, 0xF0, 0x00, 0x90}, // 00000100: B.W 00C00104 + {0xFF, 0xF7, 0xFF, 0xBF}, // 00000100: B.W 00000102 + {0x00, 0xF0, 0x00, 0xD0}, // 00000100: BL 00C00104 + copier_T24); + + // Mix B encoding T4 and BLX encoding T2. Note that the forward direction + // fails because B's target is invalid for BLX! It's possible to do "best + // effort" copying to reduce diff -- but right now we're not doing this. + CheckCopy(expect_fail, {0x00, 0xF0, 0x00, 0x90}, // 00000100: B.W 00C00104 + {0xFF, 0xF7, 0xFF, 0xBF}, // 00000100: B.W 00000102 + {0x00, 0xF0, 0x00, 0xC0}, // 00000100: BLX 00C00104 + copier_T24); + // Success if ow B's target is valid for BLX. + CheckCopy({0xFF, 0xF7, 0xFE, 0xEF}, // 00000100: BLX 00000100 + {0x00, 0xF0, 0x00, 0x90}, // 00000100: B.W 00C00104 + {0xFF, 0xF7, 0xFE, 0xBF}, // 00000100: B.W 00000100 + {0x00, 0xF0, 0x00, 0xC0}, // 00000100: BLX 00C00104 + copier_T24); +} + +TEST(Rel32UtilsTest, Rel32ReaderArm_AArch64) { + constexpr offset_t kTestImageSize = 0x00100000U; + constexpr rva_t kRvaBegin = 0x00030000U; + TestAddressTranslator translator(kTestImageSize, kRvaBegin); + + std::vector<uint8_t> bytes = { + 0xFF, 0xFF, 0xFF, 0xFF, // 00030000: (Filler) + 0xFF, 0xFF, 0xFF, 0xFF, // 00030004: (Filler) + 0x02, 0x00, 0x00, 0x14, // 00030008: B 00030010 ; Immd26 + 0xFF, 0xFF, 0xFF, 0xFF, // 0003000C: (Filler) + 0x25, 0x00, 0x00, 0x35, // 00030010: CBNZ R5,00030014 ; Immd19 + 0xFF, 0xFF, 0xFF, 0xFF, // 00030014: (Filler) + 0xCA, 0xFF, 0xFF, 0x54, // 00030018: BGE 00030010 ; Immd19 + 0x4C, 0xFF, 0x8F, 0x36, // 0003001C: TBZ X12,#17,00030004 ; Immd14 + }; + MutableBufferView region(&bytes[0], bytes.size()); + + // Generate Immd26. We specify rel32 locations directly. + std::deque<offset_t> rel32_locations_Immd26 = {0x0008U}; + auto reader1 = std::make_unique< + Rel32ReaderArm<AArch64Rel32Translator::AddrTraits_Immd26>>( + translator, region, rel32_locations_Immd26, 0x0000U, 0x0020U); + CheckReader({{0x0008U, 0x0010U}}, std::move(reader1)); + + // Generate Immd19. + std::deque<offset_t> rel32_locations_Immd19 = {0x0010U, 0x0018U}; + auto reader2 = std::make_unique< + Rel32ReaderArm<AArch64Rel32Translator::AddrTraits_Immd19>>( + translator, region, rel32_locations_Immd19, 0x0000U, 0x0020U); + CheckReader({{0x0010U, 0x0014U}, {0x0018U, 0x0010U}}, std::move(reader2)); + + // Generate Immd14. + std::deque<offset_t> rel32_locations_Immd14 = {0x001CU}; + auto reader3 = std::make_unique< + Rel32ReaderArm<AArch64Rel32Translator::AddrTraits_Immd14>>( + translator, region, rel32_locations_Immd14, 0x0000U, 0x0020U); + CheckReader({{0x001CU, 0x0004U}}, std::move(reader3)); +} + +TEST(Rel32UtilsTest, Rel32WriterArm_AArch64) { + constexpr offset_t kTestImageSize = 0x00100000U; + constexpr rva_t kRvaBegin = 0x00030000U; + TestAddressTranslator translator(kTestImageSize, kRvaBegin); + + std::vector<uint8_t> bytes = { + 0xFF, 0xFF, 0xFF, 0xFF, // 00030000: (Filler) + 0xFF, 0xFF, 0xFF, 0xFF, // 00030004: (Filler) + 0x02, 0x00, 0x00, 0x14, // 00030008: B 00030010 ; Immd26 + 0xFF, 0xFF, 0xFF, 0xFF, // 0003000C: (Filler) + 0x25, 0x00, 0x00, 0x35, // 00030010: CBNZ R5,00030014 ; Immd19 + 0xFF, 0xFF, 0xFF, 0xFF, // 00030014: (Filler) + 0xCA, 0xFF, 0xFF, 0x54, // 00030018: BGE 00030010 ; Immd19 + 0x4C, 0xFF, 0x8F, 0x36, // 0003001C: TBZ X12,#17,00030004 ; Immd14 + }; + MutableBufferView region(&bytes[0], bytes.size()); + + auto writer1 = std::make_unique< + Rel32WriterArm<AArch64Rel32Translator::AddrTraits_Immd26>>(translator, + region); + writer1->PutNext({0x0008U, 0x0000U}); + EXPECT_EQ(0xFE, bytes[0x08]); // 00030008: B 00030000 ; Immd26 + EXPECT_EQ(0xFF, bytes[0x09]); + EXPECT_EQ(0xFF, bytes[0x0A]); + EXPECT_EQ(0x17, bytes[0x0B]); + + auto writer2 = std::make_unique< + Rel32WriterArm<AArch64Rel32Translator::AddrTraits_Immd19>>(translator, + region); + writer2->PutNext({0x0010U, 0x0000U}); + EXPECT_EQ(0x85, bytes[0x10]); // 00030010: CBNZ R5,00030000 ; Immd19 + EXPECT_EQ(0xFF, bytes[0x11]); + EXPECT_EQ(0xFF, bytes[0x12]); + EXPECT_EQ(0x35, bytes[0x13]); + writer2->PutNext({0x0018U, 0x001CU}); + EXPECT_EQ(0x2A, bytes[0x18]); // 00030018: BGE 0003001C ; Immd19 + EXPECT_EQ(0x00, bytes[0x19]); + EXPECT_EQ(0x00, bytes[0x1A]); + EXPECT_EQ(0x54, bytes[0x1B]); + + auto writer3 = std::make_unique< + Rel32WriterArm<AArch64Rel32Translator::AddrTraits_Immd14>>(translator, + region); + writer3->PutNext({0x001CU, 0x0010U}); + EXPECT_EQ(0xAC, bytes[0x1C]); // 0003001C: TBZ X12,#17,00030010 ; Immd14 + EXPECT_EQ(0xFF, bytes[0x1D]); + EXPECT_EQ(0x8F, bytes[0x1E]); + EXPECT_EQ(0x36, bytes[0x1F]); +} + +TEST(Rel32UtilsTest, ArmCopyDisp_AArch64) { + std::vector<uint8_t> expect_fail; + + // Successful Imm26. + ArmCopyDispFun copier_Immd26 = + ArmCopyDisp<AArch64Rel32Translator::AddrTraits_Immd26>; + CheckCopy({0x12, 0x34, 0x56, 0x94}, // 00000100: BL 0158D148 + {0xA1, 0xC0, 0x0E, 0x17}, // 00000100: B FC3B0384 + {0x12, 0x34, 0x56, 0x14}, // 00000100: B 0158D148 + {0xA1, 0xC0, 0x0E, 0x97}, // 00000100: BL FC3B0384 + copier_Immd26); + + // Successful Imm19. + ArmCopyDispFun copier_Immd19 = + ArmCopyDisp<AArch64Rel32Translator::AddrTraits_Immd19>; + CheckCopy({0x24, 0x12, 0x34, 0x54}, // 00000100: BMI 00068344 + {0xD7, 0xA5, 0xFC, 0xB4}, // 00000100: CBZ X23,FFFF95B8 + {0x37, 0x12, 0x34, 0xB4}, // 00000100: CBZ X23,00068344 + {0xC4, 0xA5, 0xFC, 0x54}, // 00000100: BMI FFFF95B8 + copier_Immd19); + + // Successful Imm14. + ArmCopyDispFun copier_Immd14 = + ArmCopyDisp<AArch64Rel32Translator::AddrTraits_Immd14>; + CheckCopy({0x00, 0x00, 0x00, 0x36}, // 00000100: TBZ X0,#0,00000100 + {0xFF, 0xFF, 0xFF, 0xB7}, // 00000100: TBNZ ZR,#63,000000FC + {0x1F, 0x00, 0xF8, 0xB7}, // 00000100: TBNZ ZR,#63,00000100 + {0xE0, 0xFF, 0x07, 0x36}, // 00000100: TBZ X0,#0,000000FC + copier_Immd14); + + // Failure if wrong copier is used. + CheckCopy(expect_fail, expect_fail, {0x1F, 0x00, 0xF8, 0xB7}, + {0xE0, 0xFF, 0x07, 0x36}, copier_Immd26); +} + +} // namespace zucchini diff --git a/reloc_elf.cc b/reloc_elf.cc new file mode 100644 index 0000000..a7d1b38 --- /dev/null +++ b/reloc_elf.cc @@ -0,0 +1,163 @@ +// Copyright 2018 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/reloc_elf.h" + +#include <algorithm> + +#include "base/logging.h" +#include "components/zucchini/algorithm.h" + +namespace zucchini { + +/******** RelocReaderElf ********/ + +RelocReaderElf::RelocReaderElf( + ConstBufferView image, + Bitness bitness, + const std::vector<SectionDimensionsElf>& reloc_section_dims, + uint32_t rel_type, + offset_t lo, + offset_t hi, + const AddressTranslator& translator) + : image_(image), + bitness_(bitness), + rel_type_(rel_type), + reloc_section_dimensions_(reloc_section_dims), + hi_(hi), + target_rva_to_offset_(translator) { + DCHECK(bitness_ == kBit32 || bitness_ == kBit64); + + // Find the relocation section at or right before |lo|. + cur_section_dimensions_ = std::upper_bound( + reloc_section_dimensions_.begin(), reloc_section_dimensions_.end(), lo); + if (cur_section_dimensions_ != reloc_section_dimensions_.begin()) + --cur_section_dimensions_; + + // |lo| and |hi_| do not cut across a reloc reference (e.g., + // Elf_Rel::r_offset), but may cut across a reloc struct (e.g. Elf_Rel)! + // GetNext() emits all reloc references in |[lo, hi_)|, but needs to examine + // the entire reloc struct for context. Knowing that |r_offset| is the first + // entry in a reloc struct, |cursor_| and |hi_| are adjusted by the following: + // - If |lo| is in a reloc section, then |cursor_| is chosen, as |lo| aligned + // up to the next reloc struct, to exclude reloc struct that |lo| may cut + // across. + // - If |hi_| is in a reloc section, then align it up, to include reloc struct + // that |hi_| may cut across. + cursor_ = + base::checked_cast<offset_t>(cur_section_dimensions_->region.offset); + if (cursor_ < lo) + cursor_ += + AlignCeil<offset_t>(lo - cursor_, cur_section_dimensions_->entry_size); + + auto end_section = std::upper_bound(reloc_section_dimensions_.begin(), + reloc_section_dimensions_.end(), hi_); + if (end_section != reloc_section_dimensions_.begin()) { + --end_section; + if (hi_ - end_section->region.offset < end_section->region.size) { + offset_t end_region_offset = + base::checked_cast<offset_t>(end_section->region.offset); + hi_ = end_region_offset + AlignCeil<offset_t>(hi_ - end_region_offset, + end_section->entry_size); + } + } +} + +RelocReaderElf::~RelocReaderElf() = default; + +rva_t RelocReaderElf::GetRelocationTarget(elf::Elf32_Rel rel) const { + // The least significant byte of |rel.r_info| is the type. The other 3 bytes + // store the symbol, which we ignore. + uint32_t type = static_cast<uint32_t>(rel.r_info & 0xFF); + if (type == rel_type_) + return rel.r_offset; + return kInvalidRva; +} + +rva_t RelocReaderElf::GetRelocationTarget(elf::Elf64_Rel rel) const { + // The least significant 4 bytes of |rel.r_info| is the type. The other 4 + // bytes store the symbol, which we ignore. + uint32_t type = static_cast<uint32_t>(rel.r_info & 0xFFFFFFFF); + if (type == rel_type_) { + // Assume |rel.r_offset| fits within 32-bit integer. + if ((rel.r_offset & 0xFFFFFFFF) == rel.r_offset) + return static_cast<rva_t>(rel.r_offset); + // Otherwise output warning. + LOG(WARNING) << "Warning: Skipping r_offset whose value exceeds 32-bits."; + } + return kInvalidRva; +} + +absl::optional<Reference> RelocReaderElf::GetNext() { + offset_t cur_entry_size = cur_section_dimensions_->entry_size; + offset_t cur_section_dimensions_end = + base::checked_cast<offset_t>(cur_section_dimensions_->region.hi()); + + for (; cursor_ + cur_entry_size <= hi_; cursor_ += cur_entry_size) { + while (cursor_ >= cur_section_dimensions_end) { + ++cur_section_dimensions_; + if (cur_section_dimensions_ == reloc_section_dimensions_.end()) + return absl::nullopt; + cur_entry_size = cur_section_dimensions_->entry_size; + cursor_ = + base::checked_cast<offset_t>(cur_section_dimensions_->region.offset); + if (cursor_ + cur_entry_size > hi_) + return absl::nullopt; + cur_section_dimensions_end = + base::checked_cast<offset_t>(cur_section_dimensions_->region.hi()); + } + rva_t target_rva = kInvalidRva; + // TODO(huangs): Fix RELA sections: Need to process |r_addend|. + switch (bitness_) { + case kBit32: + target_rva = GetRelocationTarget(image_.read<elf::Elf32_Rel>(cursor_)); + break; + case kBit64: + target_rva = GetRelocationTarget(image_.read<elf::Elf64_Rel>(cursor_)); + break; + } + if (target_rva == kInvalidRva) + continue; + // TODO(huangs): Make the check more strict: The reference body should not + // straddle section boundary. + offset_t target = target_rva_to_offset_.Convert(target_rva); + if (target == kInvalidOffset) + continue; + // |target| will be used to obtain abs32 references, so we must ensure that + // it lies inside |image_|. + if (!image_.covers({target, WidthOf(bitness_)})) + continue; + offset_t location = cursor_; + cursor_ += cur_entry_size; + return Reference{location, target}; + } + return absl::nullopt; +} + +/******** RelocWriterElf ********/ + +RelocWriterElf::RelocWriterElf(MutableBufferView image, + Bitness bitness, + const AddressTranslator& translator) + : image_(image), bitness_(bitness), target_offset_to_rva_(translator) { + DCHECK(bitness_ == kBit32 || bitness_ == kBit64); +} + +RelocWriterElf::~RelocWriterElf() = default; + +void RelocWriterElf::PutNext(Reference ref) { + switch (bitness_) { + case kBit32: + image_.modify<elf::Elf32_Rel>(ref.location).r_offset = + target_offset_to_rva_.Convert(ref.target); + break; + case kBit64: + image_.modify<elf::Elf64_Rel>(ref.location).r_offset = + target_offset_to_rva_.Convert(ref.target); + break; + } + // Leave |reloc.r_info| alone. +} + +} // namespace zucchini diff --git a/reloc_elf.h b/reloc_elf.h new file mode 100644 index 0000000..ebf2577 --- /dev/null +++ b/reloc_elf.h @@ -0,0 +1,102 @@ +// Copyright 2018 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_RELOC_ELF_H_ +#define COMPONENTS_ZUCCHINI_RELOC_ELF_H_ + +#include <stddef.h> +#include <stdint.h> + +#include <vector> + +#include "base/numerics/safe_conversions.h" +#include "components/zucchini/address_translator.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/image_utils.h" +#include "components/zucchini/type_elf.h" +#include "third_party/abseil-cpp/absl/types/optional.h" + +namespace zucchini { + +// Section dimensions for ELF files, to store relevant dimensions data from +// Elf32_Shdr and Elf64_Shdr, while reducing code duplication from templates. +struct SectionDimensionsElf { + SectionDimensionsElf() = default; + + template <class Elf_Shdr> + explicit SectionDimensionsElf(const Elf_Shdr& section) + : region(BufferRegion{base::checked_cast<size_t>(section.sh_offset), + base::checked_cast<size_t>(section.sh_size)}), + entry_size(base::checked_cast<offset_t>(section.sh_entsize)) {} + + friend bool operator<(const SectionDimensionsElf& a, + const SectionDimensionsElf& b) { + return a.region.offset < b.region.offset; + } + + friend bool operator<(offset_t offset, const SectionDimensionsElf& section) { + return offset < section.region.offset; + } + + BufferRegion region; + offset_t entry_size; // Varies across REL / RELA sections. +}; + +// A Generator to visit all reloc structs located in [|lo|, |hi|) (excluding +// truncated strct at |lo| but inlcuding truncated struct at |hi|), and emit +// valid References with |rel_type|. This implements a nested loop unrolled into +// a generator: the outer loop has |cur_section_dimensions_| visiting +// |reloc_section_dims| (sorted by |region.offset|), and the inner loop has +// |cursor_| visiting successive reloc structs within |cur_section_dimensions_|. +class RelocReaderElf : public ReferenceReader { + public: + RelocReaderElf( + ConstBufferView image, + Bitness bitness, + const std::vector<SectionDimensionsElf>& reloc_section_dimensions, + uint32_t rel_type, + offset_t lo, + offset_t hi, + const AddressTranslator& translator); + ~RelocReaderElf() override; + + // If |rel| contains |r_offset| for |rel_type_|, return the RVA. Otherwise + // return |kInvalidRva|. These also handle Elf*_Rela, by using the fact that + // Elf*_Rel is a prefix of Elf*_Rela. + rva_t GetRelocationTarget(elf::Elf32_Rel rel) const; + rva_t GetRelocationTarget(elf::Elf64_Rel rel) const; + + // ReferenceReader: + absl::optional<Reference> GetNext() override; + + private: + const ConstBufferView image_; + const Bitness bitness_; + const uint32_t rel_type_; + const std::vector<SectionDimensionsElf>& reloc_section_dimensions_; + std::vector<SectionDimensionsElf>::const_iterator cur_section_dimensions_; + offset_t hi_; + offset_t cursor_; + AddressTranslator::RvaToOffsetCache target_rva_to_offset_; +}; + +class RelocWriterElf : public ReferenceWriter { + public: + RelocWriterElf(MutableBufferView image, + Bitness bitness, + const AddressTranslator& translator); + ~RelocWriterElf() override; + + // ReferenceWriter: + void PutNext(Reference ref) override; + + private: + MutableBufferView image_; + const Bitness bitness_; + AddressTranslator::OffsetToRvaCache target_offset_to_rva_; +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_RELOC_ELF_H_ diff --git a/reloc_elf_unittest.cc b/reloc_elf_unittest.cc new file mode 100644 index 0000000..8a1b932 --- /dev/null +++ b/reloc_elf_unittest.cc @@ -0,0 +1,242 @@ +// Copyright 2018 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/reloc_elf.h" + +#include <stdint.h> + +#include <algorithm> +#include <memory> +#include <utility> +#include <vector> + +#include "base/numerics/safe_conversions.h" +#include "components/zucchini/address_translator.h" +#include "components/zucchini/algorithm.h" +#include "components/zucchini/disassembler_elf.h" +#include "components/zucchini/image_utils.h" +#include "components/zucchini/test_utils.h" +#include "components/zucchini/type_elf.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +namespace { + +template <class Elf_Shdr> +SectionDimensionsElf MakeSectionDimensions(const BufferRegion& region, + offset_t entry_size) { + using sh_offset_t = decltype(Elf_Shdr::sh_offset); + using sh_size_t = decltype(Elf_Shdr::sh_size); + using sh_entsize_t = decltype(Elf_Shdr::sh_entsize); + return SectionDimensionsElf{Elf_Shdr{ + 0, // sh_name + 0, // sh_type + 0, // sh_flags + 0, // sh_addr + // sh_offset + base::checked_cast<sh_offset_t>(region.offset), + // sh_size + base::checked_cast<sh_size_t>(region.size), + 0, // sh_link + 0, // sh_info + 0, // sh_addralign + // sh_entsize + base::checked_cast<sh_entsize_t>(entry_size), + }}; +} + +// Helper to manipulate an image with one or more relocation tables. +template <class ELF_INTEL_TRAITS> +class FakeImageWithReloc { + public: + using ElfIntelTraits = ELF_INTEL_TRAITS; + struct RelocSpec { + offset_t start; + std::vector<uint8_t> data; + }; + + FakeImageWithReloc(size_t image_size, + rva_t base_rva, + const std::vector<RelocSpec>& reloc_specs) + : image_data_(image_size, 0xFF), + mutable_image_(&image_data_[0], image_data_.size()) { + translator_.Initialize({{0, static_cast<offset_t>(image_size), base_rva, + static_cast<rva_t>(image_size)}}); + // Set up test image with reloc sections. + for (const RelocSpec& reloc_spec : reloc_specs) { + BufferRegion reloc_region = {reloc_spec.start, reloc_spec.data.size()}; + std::copy(reloc_spec.data.begin(), reloc_spec.data.end(), + image_data_.begin() + reloc_region.lo()); + section_dimensions_.emplace_back( + MakeSectionDimensions<typename ElfIntelTraits::Elf_Shdr>( + reloc_region, ElfIntelTraits::kVAWidth)); + reloc_regions_.push_back(reloc_region); + } + } + + std::vector<Reference> ExtractRelocReferences() { + const size_t image_size = image_data_.size(); + ConstBufferView image = {image_data_.data(), image_size}; + + // Make RelocReaderElf. + auto reader = std::make_unique<RelocReaderElf>( + image, ElfIntelTraits::kBitness, section_dimensions_, + ElfIntelTraits::kRelType, 0, image_size, translator_); + + // Read all references and check. + std::vector<Reference> refs; + for (absl::optional<Reference> ref = reader->GetNext(); ref.has_value(); + ref = reader->GetNext()) { + refs.push_back(ref.value()); + } + return refs; + } + + std::unique_ptr<RelocWriterElf> MakeRelocWriter() { + return std::move(std::make_unique<RelocWriterElf>( + mutable_image_, ElfIntelTraits::kBitness, translator_)); + } + + std::vector<uint8_t> GetRawRelocData(int reloc_index) { + BufferRegion reloc_region = reloc_regions_[reloc_index]; + return Sub(image_data_, reloc_region.lo(), reloc_region.hi()); + } + + private: + std::vector<uint8_t> image_data_; + MutableBufferView mutable_image_; + std::vector<BufferRegion> reloc_regions_; + std::vector<SectionDimensionsElf> section_dimensions_; + AddressTranslator translator_; +}; + +} // namespace + +TEST(RelocElfTest, ReadWrite32) { + // Set up mock image: Size = 0x3000, .reloc at 0x600. RVA is 0x40000 + offset. + constexpr size_t kImageSize = 0x3000; + constexpr rva_t kBaseRva = 0x40000; + + constexpr offset_t kRelocStart0 = 0x600; + // "C0 10 04 00 08 00 00 00" represents + // (r_sym, r_type, r_offset) = (0x000000, 0x08, 0x000410C0). + // r_type = 0x08 = R_386_RELATIVE, and so |r_offset| is an RVA 0x000410C0. + // Zucchini does not care about |r_sym|. + std::vector<uint8_t> reloc_data0 = ParseHexString( + "C0 10 04 00 08 00 00 00 " // R_386_RELATIVE. + "F8 10 04 00 08 AB CD EF " // R_386_RELATIVE. + "00 10 04 00 00 AB CD EF " // R_386_NONE. + "00 10 04 00 07 AB CD EF"); // R_386_JMP_SLOT. + + constexpr offset_t kRelocStart1 = 0x620; + std::vector<uint8_t> reloc_data1 = ParseHexString( + "BC 20 04 00 08 00 00 00 " // R_386_RELATIVE. + "A0 20 04 00 08 AB CD EF"); // R_386_RELATIVE. + + FakeImageWithReloc<Elf32IntelTraits> fake_image( + kImageSize, kBaseRva, + {{kRelocStart0, reloc_data0}, {kRelocStart1, reloc_data1}}); + + // Only R_386_RELATIVE references are extracted. Targets are translated from + // address (e.g., 0x000420BC) to offset (e.g., 0x20BC). + std::vector<Reference> exp_refs{ + {0x600, 0x10C0}, {0x608, 0x10F8}, {0x620, 0x20BC}, {0x628, 0x20A0}}; + EXPECT_EQ(exp_refs, fake_image.ExtractRelocReferences()); + + // Write reference, extract bytes and check. + std::unique_ptr<RelocWriterElf> writer = fake_image.MakeRelocWriter(); + + writer->PutNext({0x608, 0x1F83}); + std::vector<uint8_t> exp_reloc_data0 = ParseHexString( + "C0 10 04 00 08 00 00 00 " // R_386_RELATIVE. + "83 1F 04 00 08 AB CD EF " // R_386_RELATIVE (address modified). + "00 10 04 00 00 AB CD EF " // R_386_NONE. + "00 10 04 00 07 AB CD EF"); // R_386_JMP_SLOT. + EXPECT_EQ(exp_reloc_data0, fake_image.GetRawRelocData(0)); + + writer->PutNext({0x628, 0x2950}); + std::vector<uint8_t> exp_reloc_data1 = ParseHexString( + "BC 20 04 00 08 00 00 00 " // R_386_RELATIVE. + "50 29 04 00 08 AB CD EF"); // R_386_RELATIVE (address modified). + EXPECT_EQ(exp_reloc_data1, fake_image.GetRawRelocData(1)); +} + +TEST(RelocElfTest, Limit32) { + constexpr size_t kImageSize = 0x3000; + constexpr offset_t kBaseRva = 0x40000; + constexpr offset_t kRelocStart = 0x600; + // All R_386_RELATIVE. + std::vector<uint8_t> reloc_data = ParseHexString( + // Strictly within file. + "00 00 04 00 08 00 00 00 " + "00 10 04 00 08 00 00 00 " + "F0 2F 04 00 08 00 00 00 " + "F8 2F 04 00 08 00 00 00 " + "FC 2F 04 00 08 00 00 00 " + // Straddles end of file. + "FD 2F 04 00 08 00 00 00 " + "FE 2F 04 00 08 00 00 00 " + "FF 2F 04 00 08 00 00 00 " + // Beyond end of file. + "00 30 04 00 08 00 00 00 " + "01 30 04 00 08 00 00 00 " + "FC FF FF 7F 08 00 00 00 " + "FE FF FF 7F 08 00 00 00 " + "00 00 00 80 08 00 00 00 " + "FC FF FF FF 08 00 00 00 " + "FF FF FF FF 08 00 00 00 " + // Another good reference. + "34 12 04 00 08 00 00 00"); + + FakeImageWithReloc<Elf32IntelTraits> fake_image(kImageSize, kBaseRva, + {{kRelocStart, reloc_data}}); + + std::vector<Reference> exp_refs{{0x600, 0x0000}, {0x608, 0x1000}, + {0x610, 0x2FF0}, {0x618, 0x2FF8}, + {0x620, 0x2FFC}, {0x678, 0x1234}}; + EXPECT_EQ(exp_refs, fake_image.ExtractRelocReferences()); +} + +TEST(RelocElfTest, Limit64) { + constexpr size_t kImageSize = 0x3000; + constexpr offset_t kBaseRva = 0x40000; + + constexpr offset_t kRelocStart = 0x600; + // All R_X86_64_RELATIVE. + std::vector<uint8_t> reloc_data = ParseHexString( + // Strictly within file. + "00 00 04 00 00 00 00 00 08 00 00 00 00 00 00 00 " + "00 10 04 00 00 00 00 00 08 00 00 00 00 00 00 00 " + "F0 2F 04 00 00 00 00 00 08 00 00 00 00 00 00 00 " + "F4 2F 04 00 00 00 00 00 08 00 00 00 00 00 00 00 " + "F8 2F 04 00 00 00 00 00 08 00 00 00 00 00 00 00 " + // Straddles end of file. + "F9 2F 04 00 00 00 00 00 08 00 00 00 00 00 00 00 " + "FC 2F 04 00 00 00 00 00 08 00 00 00 00 00 00 00 " + "FF 2F 04 00 00 00 00 00 08 00 00 00 00 00 00 00 " + // Beyond end of file. + "00 30 04 00 00 00 00 00 08 00 00 00 00 00 00 00 " + "01 30 04 00 00 00 00 00 08 00 00 00 00 00 00 00 " + "FC FF FF 7F 00 00 00 00 08 00 00 00 00 00 00 00 " + "FE FF FF 7F 00 00 00 00 08 00 00 00 00 00 00 00 " + "00 00 00 80 00 00 00 00 08 00 00 00 00 00 00 00 " + "FC FF FF FF 00 00 00 00 08 00 00 00 00 00 00 00 " + "FF FF FF FF 00 00 00 00 08 00 00 00 00 00 00 00 " + "00 00 04 00 01 00 00 00 08 00 00 00 00 00 00 00 " + "FF FF FF FF FF FF FF FF 08 00 00 00 00 00 00 00 " + "F8 FF FF FF FF FF FF FF 08 00 00 00 00 00 00 00 " + // Another good reference. + "34 12 04 00 00 00 00 00 08 00 00 00 00 00 00 00"); + + FakeImageWithReloc<Elf64IntelTraits> fake_image(kImageSize, kBaseRva, + {{kRelocStart, reloc_data}}); + + std::vector<Reference> exp_refs{{0x600, 0x0000}, {0x610, 0x1000}, + {0x620, 0x2FF0}, {0x630, 0x2FF4}, + {0x640, 0x2FF8}, {0x720, 0x1234}}; + EXPECT_EQ(exp_refs, fake_image.ExtractRelocReferences()); +} + +} // namespace zucchini diff --git a/reloc_win32.cc b/reloc_win32.cc new file mode 100644 index 0000000..b70aa8a --- /dev/null +++ b/reloc_win32.cc @@ -0,0 +1,196 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/reloc_win32.h" + +#include <algorithm> +#include <tuple> +#include <utility> + +#include "base/logging.h" +#include "base/numerics/safe_conversions.h" +#include "components/zucchini/algorithm.h" +#include "components/zucchini/io_utils.h" +#include "components/zucchini/type_win_pe.h" + +namespace zucchini { + +/******** RelocUnitWin32 ********/ + +RelocUnitWin32::RelocUnitWin32() = default; +RelocUnitWin32::RelocUnitWin32(uint8_t type_in, + offset_t location_in, + rva_t target_rva_in) + : type(type_in), location(location_in), target_rva(target_rva_in) {} + +bool operator==(const RelocUnitWin32& a, const RelocUnitWin32& b) { + return std::tie(a.type, a.location, a.target_rva) == + std::tie(b.type, b.location, b.target_rva); +} + +/******** RelocRvaReaderWin32 ********/ + +// static +bool RelocRvaReaderWin32::FindRelocBlocks( + ConstBufferView image, + BufferRegion reloc_region, + std::vector<offset_t>* reloc_block_offsets) { + CHECK_LT(reloc_region.size, kOffsetBound); + ConstBufferView reloc_data = image[reloc_region]; + reloc_block_offsets->clear(); + while (reloc_data.size() >= sizeof(pe::RelocHeader)) { + reloc_block_offsets->push_back( + base::checked_cast<offset_t>(reloc_data.begin() - image.begin())); + auto size = reloc_data.read<pe::RelocHeader>(0).size; + // |size| must be aligned to 4-bytes. + if (size < sizeof(pe::RelocHeader) || size % 4 || size > reloc_data.size()) + return false; + reloc_data.remove_prefix(size); + } + return reloc_data.empty(); // Fail if trailing data exist. +} + +RelocRvaReaderWin32::RelocRvaReaderWin32( + ConstBufferView image, + BufferRegion reloc_region, + const std::vector<offset_t>& reloc_block_offsets, + offset_t lo, + offset_t hi) + : image_(image) { + CHECK_LE(lo, hi); + lo = base::checked_cast<offset_t>(reloc_region.InclusiveClamp(lo)); + hi = base::checked_cast<offset_t>(reloc_region.InclusiveClamp(hi)); + end_it_ = image_.begin() + hi; + + // By default, get GetNext() to produce empty output. + cur_reloc_units_ = BufferSource(end_it_, 0); + if (reloc_block_offsets.empty()) + return; + + // Find the block that contains |lo|. + auto block_it = std::upper_bound(reloc_block_offsets.begin(), + reloc_block_offsets.end(), lo); + DCHECK(block_it != reloc_block_offsets.begin()); + --block_it; + + // Initialize |cur_reloc_units_| and |rva_hi_bits_|. + if (!LoadRelocBlock(image_.begin() + *block_it)) + return; // Nothing left. + + // Skip |cur_reloc_units_| to |lo|, truncating up. + offset_t cur_reloc_units_offset = + base::checked_cast<offset_t>(cur_reloc_units_.begin() - image_.begin()); + if (lo > cur_reloc_units_offset) { + offset_t delta = + AlignCeil<offset_t>(lo - cur_reloc_units_offset, kRelocUnitSize); + cur_reloc_units_.Skip(delta); + } +} + +RelocRvaReaderWin32::RelocRvaReaderWin32(RelocRvaReaderWin32&&) = default; + +RelocRvaReaderWin32::~RelocRvaReaderWin32() = default; + +// Unrolls a nested loop: outer = reloc blocks and inner = reloc entries. +absl::optional<RelocUnitWin32> RelocRvaReaderWin32::GetNext() { + // "Outer loop" to find non-empty reloc block. + while (cur_reloc_units_.Remaining() < kRelocUnitSize) { + if (!LoadRelocBlock(cur_reloc_units_.end())) + return absl::nullopt; + } + if (end_it_ - cur_reloc_units_.begin() < kRelocUnitSize) + return absl::nullopt; + // "Inner loop" to extract single reloc unit. + offset_t location = + base::checked_cast<offset_t>(cur_reloc_units_.begin() - image_.begin()); + uint16_t entry = cur_reloc_units_.read<uint16_t>(0); + uint8_t type = static_cast<uint8_t>(entry >> 12); + rva_t rva = rva_hi_bits_ + (entry & 0xFFF); + cur_reloc_units_.Skip(kRelocUnitSize); + return RelocUnitWin32{type, location, rva}; +} + +bool RelocRvaReaderWin32::LoadRelocBlock( + ConstBufferView::const_iterator block_begin) { + ConstBufferView header_buf(block_begin, sizeof(pe::RelocHeader)); + if (header_buf.end() >= end_it_ || + end_it_ - header_buf.end() < kRelocUnitSize) { + return false; + } + const auto& header = header_buf.read<pe::RelocHeader>(0); + rva_hi_bits_ = header.rva_hi; + uint32_t block_size = header.size; + if (block_size < sizeof(pe::RelocHeader)) + return false; + if ((block_size - sizeof(pe::RelocHeader)) % kRelocUnitSize != 0) + return false; + cur_reloc_units_ = BufferSource(block_begin, block_size); + cur_reloc_units_.Skip(sizeof(pe::RelocHeader)); + return true; +} + +/******** RelocReaderWin32 ********/ + +RelocReaderWin32::RelocReaderWin32(RelocRvaReaderWin32&& reloc_rva_reader, + uint16_t reloc_type, + offset_t offset_bound, + const AddressTranslator& translator) + : reloc_rva_reader_(std::move(reloc_rva_reader)), + reloc_type_(reloc_type), + offset_bound_(offset_bound), + entry_rva_to_offset_(translator) {} + +RelocReaderWin32::~RelocReaderWin32() = default; + +// ReferenceReader: +absl::optional<Reference> RelocReaderWin32::GetNext() { + for (absl::optional<RelocUnitWin32> unit = reloc_rva_reader_.GetNext(); + unit.has_value(); unit = reloc_rva_reader_.GetNext()) { + if (unit->type != reloc_type_) + continue; + offset_t target = entry_rva_to_offset_.Convert(unit->target_rva); + if (target == kInvalidOffset) + continue; + // Ensure that |target| (abs32 reference) lies entirely within the image. + if (target >= offset_bound_) + continue; + offset_t location = unit->location; + return Reference{location, target}; + } + return absl::nullopt; +} + +/******** RelocWriterWin32 ********/ + +RelocWriterWin32::RelocWriterWin32( + uint16_t reloc_type, + MutableBufferView image, + BufferRegion reloc_region, + const std::vector<offset_t>& reloc_block_offsets, + const AddressTranslator& translator) + : reloc_type_(reloc_type), + image_(image), + reloc_region_(reloc_region), + reloc_block_offsets_(reloc_block_offsets), + target_offset_to_rva_(translator) {} + +RelocWriterWin32::~RelocWriterWin32() = default; + +void RelocWriterWin32::PutNext(Reference ref) { + DCHECK_GE(ref.location, reloc_region_.lo()); + DCHECK_LT(ref.location, reloc_region_.hi()); + auto block_it = std::upper_bound(reloc_block_offsets_.begin(), + reloc_block_offsets_.end(), ref.location); + --block_it; + rva_t rva_hi_bits = image_.read<pe::RelocHeader>(*block_it).rva_hi; + rva_t target_rva = target_offset_to_rva_.Convert(ref.target); + rva_t rva_lo_bits = (target_rva - rva_hi_bits) & 0xFFF; + if (target_rva != rva_hi_bits + rva_lo_bits) { + LOG(ERROR) << "Invalid RVA at " << AsHex<8>(ref.location) << "."; + return; + } + image_.write<uint16_t>(ref.location, rva_lo_bits | (reloc_type_ << 12)); +} + +} // namespace zucchini diff --git a/reloc_win32.h b/reloc_win32.h new file mode 100644 index 0000000..6393702 --- /dev/null +++ b/reloc_win32.h @@ -0,0 +1,140 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_RELOC_WIN32_H_ +#define COMPONENTS_ZUCCHINI_RELOC_WIN32_H_ + +#include <stddef.h> +#include <stdint.h> + +#include <vector> + +#include "components/zucchini/address_translator.h" +#include "components/zucchini/buffer_source.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/image_utils.h" +#include "third_party/abseil-cpp/absl/types/optional.h" + +namespace zucchini { + +// Win32 PE relocation table stores a list of (type, RVA) pairs. The table is +// organized into "blocks" for RVAs with common high-order bits (12-31). Each +// block consists of a list (even length) of 2-byte "units". Each unit stores +// type (in bits 12-15) and low-order bits (0-11) of an RVA (in bits 0-11). In +// pseudo-struct: +// struct Block { +// uint32_t rva_hi; +// uint32_t block_size_in_bytes; // 8 + multiple of 4. +// struct { +// uint16_t rva_lo:12, type:4; // Little-endian. +// } units[(block_size_in_bytes - 8) / 2]; // Size must be even. +// } reloc_table[num_blocks]; // May have padding (type = 0). + +// Extracted Win32 reloc Unit data. +struct RelocUnitWin32 { + RelocUnitWin32(); + RelocUnitWin32(uint8_t type_in, offset_t location_in, rva_t target_rva_in); + friend bool operator==(const RelocUnitWin32& a, const RelocUnitWin32& b); + + uint8_t type; + offset_t location; + rva_t target_rva; +}; + +// A reader that parses Win32 PE relocation data and emits RelocUnitWin32 for +// each reloc unit that lies strictly inside |[lo, hi)|. +class RelocRvaReaderWin32 { + public: + enum : ptrdiff_t { kRelocUnitSize = sizeof(uint16_t) }; + + // Parses |image| at |reloc_region| to find beginning offsets of each reloc + // block. On success, writes the result to |reloc_block_offsets| and returns + // true. Otherwise leaves |reloc_block_offsets| in an undetermined state, and + // returns false. + static bool FindRelocBlocks(ConstBufferView image, + BufferRegion reloc_region, + std::vector<offset_t>* reloc_block_offsets); + + // |reloc_block_offsets| should be precomputed from FindRelBlocks(). + RelocRvaReaderWin32(ConstBufferView image, + BufferRegion reloc_region, + const std::vector<offset_t>& reloc_block_offsets, + offset_t lo, + offset_t hi); + RelocRvaReaderWin32(RelocRvaReaderWin32&&); + ~RelocRvaReaderWin32(); + + // Successively visits and returns data for each reloc unit, or absl::nullopt + // when all reloc units are found. Encapsulates block transition details. + absl::optional<RelocUnitWin32> GetNext(); + + private: + // Assuming that |block_begin| points to the beginning of a reloc block, loads + // |rva_hi_bits_| and assigns |cur_reloc_units_| as the region containing the + // associated units, potentially truncated by |end_it_|. Returns true if reloc + // data are available for read, and false otherwise. + bool LoadRelocBlock(ConstBufferView::const_iterator block_begin); + + const ConstBufferView image_; + + // End iterator. + ConstBufferView::const_iterator end_it_; + + // Unit data of the current reloc block. + BufferSource cur_reloc_units_; + + // High-order bits (12-31) for all relocs of the current reloc block. + rva_t rva_hi_bits_; +}; + +// A reader for Win32 reloc References, implemented as a filtering and +// translation adaptor of RelocRvaReaderWin32. +class RelocReaderWin32 : public ReferenceReader { + public: + // Takes ownership of |reloc_rva_reader|. |offset_bound| specifies the + // exclusive upper bound of reloc target offsets, taking account of widths of + // targets (which are abs32 References). + RelocReaderWin32(RelocRvaReaderWin32&& reloc_rva_reader, + uint16_t reloc_type, + offset_t offset_bound, + const AddressTranslator& translator); + ~RelocReaderWin32() override; + + // ReferenceReader: + absl::optional<Reference> GetNext() override; + + private: + RelocRvaReaderWin32 reloc_rva_reader_; + const uint16_t reloc_type_; // uint16_t to simplify shifting (<< 12). + const offset_t offset_bound_; + AddressTranslator::RvaToOffsetCache entry_rva_to_offset_; +}; + +// A writer for Win32 reloc References. This is simpler than the reader since: +// - No iteration is required. +// - High-order bits of reloc target RVAs are assumed to be handled elsewhere, +// so only low-order bits need to be written. +class RelocWriterWin32 : public ReferenceWriter { + public: + RelocWriterWin32(uint16_t reloc_type, + MutableBufferView image, + BufferRegion reloc_region, + const std::vector<offset_t>& reloc_block_offsets, + const AddressTranslator& translator); + ~RelocWriterWin32() override; + + // ReferenceWriter: + void PutNext(Reference ref) override; + + private: + const uint16_t reloc_type_; + MutableBufferView image_; + BufferRegion reloc_region_; + const std::vector<offset_t>& reloc_block_offsets_; + AddressTranslator::OffsetToRvaCache target_offset_to_rva_; +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_RELOC_WIN32_H_ diff --git a/reloc_win32_unittest.cc b/reloc_win32_unittest.cc new file mode 100644 index 0000000..e3d33ca --- /dev/null +++ b/reloc_win32_unittest.cc @@ -0,0 +1,251 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/reloc_win32.h" + +#include <stdint.h> + +#include <algorithm> +#include <memory> +#include <string> +#include <utility> +#include <vector> + +#include "base/numerics/safe_conversions.h" +#include "base/test/gtest_util.h" +#include "components/zucchini/address_translator.h" +#include "components/zucchini/algorithm.h" +#include "components/zucchini/image_utils.h" +#include "components/zucchini/test_utils.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +class RelocUtilsWin32Test : public testing::Test { + protected: + using Units = std::vector<RelocUnitWin32>; + + RelocUtilsWin32Test() {} + + // Resets all tester data, calls RelocRvaReaderWin32::FindRelocBlocks(), and + // returns its results. + bool Initialize(const std::vector<uint8_t>& image_raw, + BufferRegion reloc_region) { + image_ = BufferSource(image_raw.data(), image_raw.size()); + reloc_region_ = reloc_region; + return RelocRvaReaderWin32::FindRelocBlocks(image_, reloc_region_, + &reloc_block_offsets_); + } + + // Uses RelocRvaReaderWin32 to get all relocs, returned as Units. + Units EmitAll(offset_t lo, offset_t hi) { + RelocRvaReaderWin32 reader(image_, reloc_region_, reloc_block_offsets_, lo, + hi); + Units units; + for (auto unit = reader.GetNext(); unit.has_value(); + unit = reader.GetNext()) { + units.push_back(unit.value()); + } + return units; + } + + ConstBufferView image_; + BufferRegion reloc_region_; + std::vector<uint32_t> reloc_block_offsets_; +}; + +TEST_F(RelocUtilsWin32Test, RvaReaderEmpty) { + { + std::vector<uint8_t> image_raw = ParseHexString(""); + EXPECT_TRUE(Initialize(image_raw, {0U, 0U})); + EXPECT_EQ(std::vector<uint32_t>(), reloc_block_offsets_); // Nothing. + EXPECT_EQ(Units(), EmitAll(0U, 0U)); + } + { + std::vector<uint8_t> image_raw = ParseHexString("AA BB CC DD EE FF"); + EXPECT_TRUE(Initialize(image_raw, {2U, 0U})); + EXPECT_EQ(std::vector<uint32_t>(), reloc_block_offsets_); // Nothing. + EXPECT_EQ(Units(), EmitAll(2U, 2U)); + } + { + std::vector<uint8_t> image_raw = ParseHexString("00 C0 00 00 08 00 00 00"); + EXPECT_TRUE(Initialize(image_raw, {0U, image_raw.size()})); + EXPECT_EQ(std::vector<uint32_t>({0U}), + reloc_block_offsets_); // Empty block. + EXPECT_EQ(Units(), EmitAll(0U, 8U)); + } +} + +TEST_F(RelocUtilsWin32Test, RvaReaderBad) { + std::string test_cases[] = { + "00 C0 00 00 07 00 00", // Header too small. + "00 C0 00 00 08 00 00", // Header too small, lies about size. + "00 C0 00 00 0A 00 00 00 66 31", // Odd number of units. + "00 C0 00 00 0C 00 00 00 66 31 88 31 FF", // Trailing data. + }; + for (const std::string& test_case : test_cases) { + std::vector<uint8_t> image_raw = ParseHexString(test_case); + EXPECT_FALSE(Initialize(image_raw, {0U, image_raw.size()})); + } +} + +TEST_F(RelocUtilsWin32Test, RvaReaderSingle) { + // Block 0: All type 0x3: {0xC166, 0xC288, 0xC342, (padding) 0xCFFF}. + std::vector<uint8_t> image_raw = ParseHexString( + "FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF " + "00 C0 00 00 10 00 00 00 66 31 88 32 42 33 FF 0F " + "FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF"); + constexpr offset_t kBlock0 = 16U; + Units exp0 = {{3, kBlock0 + 8U, 0xC166U}, + {3, kBlock0 + 10U, 0xC288U}, + {3, kBlock0 + 12U, 0xC342U}, + {0, kBlock0 + 14U, 0xCFFFU}}; + + EXPECT_TRUE(Initialize(image_raw, {16U, 16U})); + EXPECT_EQ(exp0, EmitAll(kBlock0, kBlock0 + 16U)); + EXPECT_EQ(Units(), EmitAll(kBlock0, kBlock0)); + EXPECT_EQ(Units(), EmitAll(kBlock0, kBlock0 + 8U)); + EXPECT_EQ(Units(), EmitAll(kBlock0, kBlock0 + 9U)); + EXPECT_EQ(Sub(exp0, 0, 1), EmitAll(kBlock0, kBlock0 + 10U)); + EXPECT_EQ(Sub(exp0, 0, 1), EmitAll(kBlock0 + 8U, kBlock0 + 10U)); + EXPECT_EQ(Units(), EmitAll(kBlock0 + 9U, kBlock0 + 10U)); + EXPECT_EQ(Sub(exp0, 0, 3), EmitAll(kBlock0, kBlock0 + 15U)); + EXPECT_EQ(Sub(exp0, 2, 3), EmitAll(kBlock0 + 11U, kBlock0 + 15U)); +} + +TEST_F(RelocUtilsWin32Test, RvaReaderMulti) { + // The sample image encodes 3 reloc blocks: + // Block 0: All type 0x3: {0xC166, 0xC288, 0xC344, (padding) 0xCFFF}. + // Block 1: All type 0x3: {0x12166, 0x12288}. + // Block 2: All type 0xA: {0x24000, 0x24010, 0x24020, 0x24028, 0x24A3C, + // 0x24170}. + std::vector<uint8_t> image_raw = ParseHexString( + "FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF " + "00 C0 00 00 10 00 00 00 66 31 88 32 42 33 FF 0F " + "00 20 01 00 0C 00 00 00 66 31 88 32 " + "00 40 02 00 14 00 00 00 00 A0 10 A0 20 A0 28 A0 3C A0 70 A1 " + "FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF"); + offset_t image_size = base::checked_cast<offset_t>(image_raw.size()); + constexpr offset_t kBlock0 = 16U; + constexpr offset_t kBlock1 = kBlock0 + 16U; + constexpr offset_t kBlock2 = kBlock1 + 12U; + constexpr offset_t kBlockEnd = kBlock2 + 20U; + Units exp0 = {{3, kBlock0 + 8U, 0xC166U}, + {3, kBlock0 + 10U, 0xC288U}, + {3, kBlock0 + 12U, 0xC342U}, + {0, kBlock0 + 14U, 0xCFFFU}}; + Units exp1 = {{3, kBlock0 + 24U, 0x12166U}, {3, kBlock0 + 26U, 0x12288U}}; + Units exp2 = {{10, kBlock0 + 36U, 0x24000U}, {10, kBlock0 + 38U, 0x24010U}, + {10, kBlock0 + 40U, 0x24020U}, {10, kBlock0 + 42U, 0x24028U}, + {10, kBlock0 + 44U, 0x2403CU}, {10, kBlock0 + 46U, 0x24170U}}; + + EXPECT_TRUE(Initialize(image_raw, {kBlock0, kBlockEnd - kBlock0})); + EXPECT_EQ(std::vector<uint32_t>({kBlock0, kBlock1, kBlock2}), + reloc_block_offsets_); + + // Everything. + EXPECT_EQ(Cat(Cat(exp0, exp1), exp2), EmitAll(kBlock0, kBlockEnd)); + EXPECT_EQ(Cat(Cat(exp0, exp1), exp2), EmitAll(0, image_size)); + // Entire blocks. + EXPECT_EQ(exp0, EmitAll(kBlock0, kBlock1)); + EXPECT_EQ(exp1, EmitAll(kBlock1, kBlock2)); + EXPECT_EQ(exp2, EmitAll(kBlock2, kBlockEnd)); + EXPECT_EQ(Units(), EmitAll(0, kBlock0)); + EXPECT_EQ(Units(), EmitAll(kBlockEnd, image_size)); + // Within blocks, clipped at boundaries. + EXPECT_EQ(exp0, EmitAll(kBlock0 + 5U, kBlock1)); + EXPECT_EQ(exp0, EmitAll(kBlock0 + 8U, kBlock1)); + EXPECT_EQ(Sub(exp0, 1, 4), EmitAll(kBlock0 + 9U, kBlock1)); + EXPECT_EQ(Sub(exp0, 0, 3), EmitAll(kBlock0, kBlock0 + 15U)); + EXPECT_EQ(Sub(exp0, 0, 3), EmitAll(kBlock0, kBlock0 + 14U)); + EXPECT_EQ(Sub(exp0, 0, 1), EmitAll(kBlock0 + 8U, kBlock0 + 10U)); + EXPECT_EQ(Sub(exp1, 1, 2), EmitAll(kBlock1 + 10U, kBlock1 + 12U)); + EXPECT_EQ(Sub(exp2, 2, 4), EmitAll(kBlock2 + 12U, kBlock2 + 16U)); + EXPECT_EQ(Units(), EmitAll(kBlock0, kBlock0)); + EXPECT_EQ(Units(), EmitAll(kBlock0, kBlock0 + 8U)); + EXPECT_EQ(Units(), EmitAll(kBlock2 + 10U, kBlock2 + 11U)); + EXPECT_EQ(Units(), EmitAll(kBlock2 + 11U, kBlock2 + 12U)); + // Across blocks. + EXPECT_EQ(Cat(Cat(exp0, exp1), exp2), EmitAll(kBlock0 - 5U, kBlockEnd)); + EXPECT_EQ(Cat(Cat(exp0, exp1), exp2), EmitAll(kBlock0 + 6U, kBlockEnd)); + EXPECT_EQ(Cat(Cat(exp0, exp1), Sub(exp2, 0, 5)), + EmitAll(kBlock0 + 6U, kBlock2 + 18U)); + EXPECT_EQ(Cat(Sub(exp0, 2, 4), Sub(exp1, 0, 1)), + EmitAll(kBlock0 + 12U, kBlock1 + 10U)); + EXPECT_EQ(Cat(Sub(exp0, 2, 4), Sub(exp1, 0, 1)), + EmitAll(kBlock0 + 11U, kBlock1 + 10U)); + EXPECT_EQ(Cat(Sub(exp0, 2, 4), Sub(exp1, 0, 1)), + EmitAll(kBlock0 + 12U, kBlock1 + 11U)); + EXPECT_EQ(Sub(exp1, 1, 2), EmitAll(kBlock1 + 10U, kBlock2 + 5U)); + EXPECT_EQ(Cat(Sub(exp1, 1, 2), exp2), EmitAll(kBlock1 + 10U, kBlockEnd + 5)); + EXPECT_EQ(Units(), EmitAll(kBlock0 + 15, kBlock1 + 9)); +} + +TEST_F(RelocUtilsWin32Test, ReadWrite) { + // Set up mock image: Size = 0x3000, .reloc at 0x600. RVA is 0x40000 + offset. + constexpr rva_t kBaseRva = 0x40000; + std::vector<uint8_t> image_data(0x3000, 0xFF); + // 4 x86 relocs (xx 3x), 3 x64 relocs (xx Ax), 1 padding (xx 0X). + std::vector<uint8_t> reloc_data = ParseHexString( + "00 10 04 00 10 00 00 00 C0 32 18 A3 F8 A7 FF 0F " + "00 20 04 00 10 00 00 00 80 A0 65 31 F8 37 BC 3A"); + reloc_region_ = {0x600, reloc_data.size()}; + std::copy(reloc_data.begin(), reloc_data.end(), + image_data.begin() + reloc_region_.lo()); + image_ = {image_data.data(), image_data.size()}; + offset_t image_size = base::checked_cast<offset_t>(image_.size()); + + AddressTranslator translator; + translator.Initialize({{0, image_size, kBaseRva, image_size}}); + + // Precompute |reloc_block_offsets_|. + EXPECT_TRUE(RelocRvaReaderWin32::FindRelocBlocks(image_, reloc_region_, + &reloc_block_offsets_)); + EXPECT_EQ(std::vector<uint32_t>({0x600U, 0x610U}), reloc_block_offsets_); + + // Focus on x86. + constexpr uint16_t kRelocTypeX86 = 3; + constexpr offset_t kVAWidthX86 = 4; + + // Make RelocRvaReaderWin32. + RelocRvaReaderWin32 reloc_rva_reader(image_, reloc_region_, + reloc_block_offsets_, 0, image_size); + offset_t offset_bound = image_size - kVAWidthX86 + 1; + + // Make RelocReaderWin32 that wraps |reloc_rva_reader|. + auto reader = std::make_unique<RelocReaderWin32>( + std::move(reloc_rva_reader), kRelocTypeX86, offset_bound, translator); + + // Read all references and check. + std::vector<Reference> refs; + for (absl::optional<Reference> ref = reader->GetNext(); ref.has_value(); + ref = reader->GetNext()) { + refs.push_back(ref.value()); + } + std::vector<Reference> exp_refs{ + {0x608, 0x12C0}, {0x61A, 0x2165}, {0x61C, 0x27F8}, {0x61E, 0x2ABC}}; + EXPECT_EQ(exp_refs, refs); + + // Write reference, extract bytes and check. + MutableBufferView mutable_image(&image_data[0], image_data.size()); + auto writer = std::make_unique<RelocWriterWin32>( + kRelocTypeX86, mutable_image, reloc_region_, reloc_block_offsets_, + translator); + + writer->PutNext({0x608, 0x1F83}); + std::vector<uint8_t> exp_reloc_data1 = ParseHexString( + "00 10 04 00 10 00 00 00 83 3F 18 A3 F8 A7 FF 0F " + "00 20 04 00 10 00 00 00 80 A0 65 31 F8 37 BC 3A"); + EXPECT_EQ(exp_reloc_data1, + Sub(image_data, reloc_region_.lo(), reloc_region_.hi())); + + writer->PutNext({0x61C, 0x2950}); + std::vector<uint8_t> exp_reloc_data2 = ParseHexString( + "00 10 04 00 10 00 00 00 83 3F 18 A3 F8 A7 FF 0F " + "00 20 04 00 10 00 00 00 80 A0 65 31 50 39 BC 3A"); + EXPECT_EQ(exp_reloc_data2, + Sub(image_data, reloc_region_.lo(), reloc_region_.hi())); +} + +} // namespace zucchini diff --git a/suffix_array.h b/suffix_array.h new file mode 100644 index 0000000..75b3a38 --- /dev/null +++ b/suffix_array.h @@ -0,0 +1,475 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_SUFFIX_ARRAY_H_ +#define COMPONENTS_ZUCCHINI_SUFFIX_ARRAY_H_ + +#include <algorithm> +#include <iterator> +#include <numeric> +#include <vector> + +#include "base/check.h" + +namespace zucchini { + +// A functor class that implements the naive suffix sorting algorithm that uses +// std::sort with lexicographical compare. This is only meant as reference of +// the interface. +class NaiveSuffixSort { + public: + // Type requirements: + // |InputRng| is an input random access range. + // |KeyType| is an unsigned integer type. + // |SAIt| is a random access iterator with mutable references. + template <class InputRng, class KeyType, class SAIt> + // |str| is the input string on which suffix sort is applied. + // Characters found in |str| must be in the range [0, |key_bound|) + // |suffix_array| is the beginning of the destination range, which is at least + // as large as |str|. + void operator()(const InputRng& str, + KeyType key_bound, + SAIt suffix_array) const { + using size_type = typename SAIt::value_type; + + size_type n = static_cast<size_type>(std::end(str) - std::begin(str)); + + // |suffix_array| is first filled with ordered indices of |str|. + // Those indices are then sorted with lexicographical comparisons in |str|. + std::iota(suffix_array, suffix_array + n, 0); + std::sort(suffix_array, suffix_array + n, [&str](size_type i, size_type j) { + return std::lexicographical_compare(std::begin(str) + i, std::end(str), + std::begin(str) + j, std::end(str)); + }); + } +}; + +// A functor class that implements suffix array induced sorting (SA-IS) +// algorithm with linear time and memory complexity, +// see http://ieeexplore.ieee.org/abstract/document/5582081/ +class InducedSuffixSort { + public: + // Type requirements: + // |InputRng| is an input random access range. + // |KeyType| is an unsigned integer type. + // |SAIt| is a random access iterator with mutable values. + template <class InputRng, class KeyType, class SAIt> + // |str| is the input string on which suffix sort is applied. + // Characters found in |str| must be in the range [0, |key_bound|) + // |suffix_array| is the beginning of the destination range, which is at least + // as large as |str|. + void operator()(const InputRng& str, + KeyType key_bound, + SAIt suffix_array) const { + using value_type = typename InputRng::value_type; + using size_type = typename SAIt::value_type; + + static_assert(std::is_unsigned<value_type>::value, + "SA-IS only supports input string with unsigned values"); + static_assert(std::is_unsigned<KeyType>::value, "KeyType must be unsigned"); + + size_type n = static_cast<size_type>(std::end(str) - std::begin(str)); + + Implementation<size_type, KeyType>::SuffixSort(std::begin(str), n, + key_bound, suffix_array); + } + + // Given string S of length n. We assume S is terminated by a unique sentinel + // $, which is considered as the smallest character. This sentinel does not + // exist in memory and is only treated implicitly, hence |n| does not count + // the sentinel in this implementation. We denote suf(S,i) the suffix formed + // by S[i..n). + + // A suffix suf(S,i) is said to be S-type or L-type, if suf(S,i) < suf(S,i+1) + // or suf(S,i) > suf(S,i+1), respectively. + enum SLType : bool { SType, LType }; + + // A character S[i] is said to be S-type or L-type if the suffix suf(S,i) is + // S-type or L-type, respectively. + + // A character S[i] is called LMS (leftmost S-type), if S[i] is S-type and + // S[i-1] is L-type. A suffix suf(S,i) is called LMS, if S[i] is an LMS + // character. + + // A substring S[i..j) is an LMS-substring if + // (1) S[i] is LMS, S[j] is LMS or the sentinel $, and S[i..j) has no other + // LMS characters, or + // (2) S[i..j) is the sentinel $. + + template <class SizeType, class KeyType> + struct Implementation { + static_assert(std::is_unsigned<SizeType>::value, + "SizeType must be unsigned"); + static_assert(std::is_unsigned<KeyType>::value, "KeyType must be unsigned"); + using size_type = SizeType; + using key_type = KeyType; + + using iterator = typename std::vector<size_type>::iterator; + using const_iterator = typename std::vector<size_type>::const_iterator; + + // Partition every suffix based on SL-type. Returns the number of LMS + // suffixes. + template <class StrIt> + static size_type BuildSLPartition( + StrIt str, + size_type length, + key_type key_bound, + std::vector<SLType>::reverse_iterator sl_partition_it) { + // We will count LMS suffixes (S to L-type or last S-type). + size_type lms_count = 0; + + // |previous_type| is initialized to L-type to avoid counting an extra + // LMS suffix at the end + SLType previous_type = LType; + + // Initialized to dummy, impossible key. + key_type previous_key = key_bound; + + // We're travelling backward to determine the partition, + // as if we prepend one character at a time to the string, ex: + // b$ is L-type because b > $. + // ab$ is S-type because a < b, implying ab$ < b$. + // bab$ is L-type because b > a, implying bab$ > ab$. + // bbab$ is L-type, because bab$ was also L-type, implying bbab$ > bab$. + for (auto str_it = std::reverse_iterator<StrIt>(str + length); + str_it != std::reverse_iterator<StrIt>(str); + ++str_it, ++sl_partition_it) { + key_type current_key = *str_it; + + if (current_key > previous_key || previous_key == key_bound) { + // S[i] > S[i + 1] or S[i] is last character. + if (previous_type == SType) + // suf(S,i) is L-type and suf(S,i + 1) is S-type, therefore, + // suf(S,i+1) was a LMS suffix. + ++lms_count; + + previous_type = LType; // For next round. + } else if (current_key < previous_key) { + // S[i] < S[i + 1] + previous_type = SType; // For next round. + } + // Else, S[i] == S[i + 1]: + // The next character that differs determines the SL-type, + // so we reuse the last seen type. + + *sl_partition_it = previous_type; + previous_key = current_key; // For next round. + } + + return lms_count; + } + + // Find indices of LMS suffixes and write result to |lms_indices|. + static void FindLmsSuffixes(const std::vector<SLType>& sl_partition, + iterator lms_indices) { + // |previous_type| is initialized to S-type to avoid counting an extra + // LMS suffix at the beginning + SLType previous_type = SType; + for (size_type i = 0; i < sl_partition.size(); ++i) { + if (sl_partition[i] == SType && previous_type == LType) + *lms_indices++ = i; + previous_type = sl_partition[i]; + } + } + + template <class StrIt> + static std::vector<size_type> MakeBucketCount(StrIt str, + size_type length, + key_type key_bound) { + // Occurrence of every unique character is counted in |buckets| + std::vector<size_type> buckets(static_cast<size_type>(key_bound)); + + for (auto it = str; it != str + length; ++it) + ++buckets[*it]; + return buckets; + } + + // Apply induced sort from |lms_indices| to |suffix_array| associated with + // the string |str|. + template <class StrIt, class SAIt> + static void InducedSort(StrIt str, + size_type length, + const std::vector<SLType>& sl_partition, + const std::vector<size_type>& lms_indices, + const std::vector<size_type>& buckets, + SAIt suffix_array) { + // All indices are first marked as unset with the illegal value |length|. + std::fill(suffix_array, suffix_array + length, length); + + // Used to mark bucket boundaries (head or end) as indices in str. + DCHECK(!buckets.empty()); + std::vector<size_type> bucket_bounds(buckets.size()); + + // Step 1: Assign indices for LMS suffixes, populating the end of + // respective buckets but keeping relative order. + + // Find the end of each bucket and write it to |bucket_bounds|. + std::partial_sum(buckets.begin(), buckets.end(), bucket_bounds.begin()); + + // Process each |lms_indices| backward, and assign them to the end of + // their respective buckets, so relative order is preserved. + for (auto it = lms_indices.crbegin(); it != lms_indices.crend(); ++it) { + key_type key = str[*it]; + suffix_array[--bucket_bounds[key]] = *it; + } + + // Step 2 + // Scan forward |suffix_array|; for each modified suf(S,i) for which + // suf(S,SA(i) - 1) is L-type, place suf(S,SA(i) - 1) to the current + // head of the corresponding bucket and forward the bucket head to the + // right. + + // Find the head of each bucket and write it to |bucket_bounds|. Since + // only LMS suffixes where inserted in |suffix_array| during Step 1, + // |bucket_bounds| does not contains the head of each bucket and needs to + // be updated. + bucket_bounds[0] = 0; + std::partial_sum(buckets.begin(), buckets.end() - 1, + bucket_bounds.begin() + 1); + + // From Step 1, the sentinel $, which we treat implicitly, would have + // been placed at the beginning of |suffix_array|, since $ is always + // considered as the smallest character. We then have to deal with the + // previous (last) suffix. + if (sl_partition[length - 1] == LType) { + key_type key = str[length - 1]; + suffix_array[bucket_bounds[key]++] = length - 1; + } + for (auto it = suffix_array; it != suffix_array + length; ++it) { + size_type suffix_index = *it; + + // While the original algorithm marks unset suffixes with -1, + // we found that marking them with |length| is also possible and more + // convenient because we are working with unsigned integers. + if (suffix_index != length && suffix_index > 0 && + sl_partition[--suffix_index] == LType) { + key_type key = str[suffix_index]; + suffix_array[bucket_bounds[key]++] = suffix_index; + } + } + + // Step 3 + // Scan backward |suffix_array|; for each modified suf(S, i) for which + // suf(S,SA(i) - 1) is S-type, place suf(S,SA(i) - 1) to the current + // end of the corresponding bucket and forward the bucket head to the + // left. + + // Find the end of each bucket and write it to |bucket_bounds|. Since + // only L-type suffixes where inserted in |suffix_array| during Step 2, + // |bucket_bounds| does not contain the end of each bucket and needs to + // be updated. + std::partial_sum(buckets.begin(), buckets.end(), bucket_bounds.begin()); + + for (auto it = std::reverse_iterator<SAIt>(suffix_array + length); + it != std::reverse_iterator<SAIt>(suffix_array); ++it) { + size_type suffix_index = *it; + if (suffix_index != length && suffix_index > 0 && + sl_partition[--suffix_index] == SType) { + key_type key = str[suffix_index]; + suffix_array[--bucket_bounds[key]] = suffix_index; + } + } + // Deals with the last suffix, because of the sentinel. + if (sl_partition[length - 1] == SType) { + key_type key = str[length - 1]; + suffix_array[--bucket_bounds[key]] = length - 1; + } + } + + // Given a string S starting at |str| with length |length|, an array + // starting at |substring_array| containing lexicographically ordered LMS + // terminated substring indices of S and an SL-Type partition |sl_partition| + // of S, assigns a unique label to every unique LMS substring. The sorted + // labels for all LMS substrings are written to |lms_str|, while the indices + // of LMS suffixes are written to |lms_indices|. In addition, returns the + // total number of unique labels. + template <class StrIt, class SAIt> + static size_type LabelLmsSubstrings(StrIt str, + size_type length, + const std::vector<SLType>& sl_partition, + SAIt suffix_array, + iterator lms_indices, + iterator lms_str) { + // Labelling starts at 0. + size_type label = 0; + + // |previous_lms| is initialized to 0 to indicate it is unset. + // Note that suf(S,0) is never a LMS suffix. Substrings will be visited in + // lexicographical order. + size_type previous_lms = 0; + for (auto it = suffix_array; it != suffix_array + length; ++it) { + if (*it > 0 && sl_partition[*it] == SType && + sl_partition[*it - 1] == LType) { + // suf(S, *it) is a LMS suffix. + + size_type current_lms = *it; + if (previous_lms != 0) { + // There was a previous LMS suffix. Check if the current LMS + // substring is equal to the previous one. + SLType current_lms_type = SType; + SLType previous_lms_type = SType; + for (size_type k = 0;; ++k) { + // |current_lms_end| and |previous_lms_end| denote whether we have + // reached the end of the current and previous LMS substring, + // respectively + bool current_lms_end = false; + bool previous_lms_end = false; + + // Check for both previous and current substring ends. + // Note that it is more convenient to check if + // suf(S,current_lms + k) is an LMS suffix than to retrieve it + // from lms_indices. + if (current_lms + k >= length || + (current_lms_type == LType && + sl_partition[current_lms + k] == SType)) { + current_lms_end = true; + } + if (previous_lms + k >= length || + (previous_lms_type == LType && + sl_partition[previous_lms + k] == SType)) { + previous_lms_end = true; + } + + if (current_lms_end && previous_lms_end) { + break; // Previous and current substrings are identical. + } else if (current_lms_end != previous_lms_end || + str[current_lms + k] != str[previous_lms + k]) { + // Previous and current substrings differ, a new label is used. + ++label; + break; + } + + current_lms_type = sl_partition[current_lms + k]; + previous_lms_type = sl_partition[previous_lms + k]; + } + } + *lms_indices++ = *it; + *lms_str++ = label; + previous_lms = current_lms; + } + } + + return label + 1; + } + + // Implementation of the SA-IS algorithm. |str| must be a random access + // iterator pointing at the beginning of S with length |length|. The result + // is writtend in |suffix_array|, a random access iterator. + template <class StrIt, class SAIt> + static void SuffixSort(StrIt str, + size_type length, + key_type key_bound, + SAIt suffix_array) { + if (length == 1) + *suffix_array = 0; + if (length < 2) + return; + + std::vector<SLType> sl_partition(length); + size_type lms_count = + BuildSLPartition(str, length, key_bound, sl_partition.rbegin()); + std::vector<size_type> lms_indices(lms_count); + FindLmsSuffixes(sl_partition, lms_indices.begin()); + std::vector<size_type> buckets = MakeBucketCount(str, length, key_bound); + + if (lms_indices.size() > 1) { + // Given |lms_indices| in the same order they appear in |str|, induce + // LMS substrings relative order and write result to |suffix_array|. + InducedSort(str, length, sl_partition, lms_indices, buckets, + suffix_array); + std::vector<size_type> lms_str(lms_indices.size()); + + // Given LMS substrings in relative order found in |suffix_array|, + // map LMS substrings to unique labels to form a new string, |lms_str|. + size_type label_count = + LabelLmsSubstrings(str, length, sl_partition, suffix_array, + lms_indices.begin(), lms_str.begin()); + + if (label_count < lms_str.size()) { + // Reorder |lms_str| to have LMS suffixes in the same order they + // appear in |str|. + for (size_type i = 0; i < lms_indices.size(); ++i) + suffix_array[lms_indices[i]] = lms_str[i]; + + SLType previous_type = SType; + for (size_type i = 0, j = 0; i < sl_partition.size(); ++i) { + if (sl_partition[i] == SType && previous_type == LType) { + lms_str[j] = suffix_array[i]; + lms_indices[j++] = i; + } + previous_type = sl_partition[i]; + } + + // Recursively apply SuffixSort on |lms_str|, which is formed from + // labeled LMS suffixes in the same order they appear in |str|. + // Note that |KeyType| will be size_type because |lms_str| contains + // indices. |lms_str| is at most half the length of |str|. + Implementation<size_type, size_type>::SuffixSort( + lms_str.begin(), static_cast<size_type>(lms_str.size()), + label_count, suffix_array); + + // Map LMS labels back to indices in |str| and write result to + // |lms_indices|. We're using |suffix_array| as a temporary buffer. + for (size_type i = 0; i < lms_indices.size(); ++i) + suffix_array[i] = lms_indices[suffix_array[i]]; + std::copy_n(suffix_array, lms_indices.size(), lms_indices.begin()); + + // At this point, |lms_indices| contains sorted LMS suffixes of |str|. + } + } + // Given |lms_indices| where LMS suffixes are sorted, induce the full + // order of suffixes in |str|. + InducedSort(str, length, sl_partition, lms_indices, buckets, + suffix_array); + } + + Implementation() = delete; + Implementation(const Implementation&) = delete; + const Implementation& operator=(const Implementation&) = delete; + }; +}; + +// Generates a sorted suffix array for the input string |str| using the functor +// |Algorithm| which provides an interface equivalent to NaiveSuffixSort. +/// Characters found in |str| are assumed to be in range [0, |key_bound|). +// Returns the suffix array as a vector. +// |StrRng| is an input random access range. +// |KeyType| is an unsigned integer type. +template <class Algorithm, class StrRng, class KeyType> +std::vector<typename StrRng::size_type> MakeSuffixArray(const StrRng& str, + KeyType key_bound) { + Algorithm sort; + std::vector<typename StrRng::size_type> suffix_array(str.end() - str.begin()); + sort(str, key_bound, suffix_array.begin()); + return suffix_array; +} + +// Type requirements: +// |SARng| is an input random access range. +// |StrIt1| is a random access iterator. +// |StrIt2| is a forward iterator. +template <class SARng, class StrIt1, class StrIt2> +// Lexicographical lower bound using binary search for +// [|str2_first|, |str2_last|) in the suffix array |suffix_array| of a string +// starting at |str1_first|. This does not necessarily return the index of +// the longest matching substring. +auto SuffixLowerBound(const SARng& suffix_array, + StrIt1 str1_first, + StrIt2 str2_first, + StrIt2 str2_last) -> decltype(std::begin(suffix_array)) { + using size_type = typename SARng::value_type; + + size_t n = std::end(suffix_array) - std::begin(suffix_array); + auto it = std::lower_bound( + std::begin(suffix_array), std::end(suffix_array), str2_first, + [str1_first, str2_last, n](size_type a, StrIt2 b) { + return std::lexicographical_compare(str1_first + a, str1_first + n, b, + str2_last); + }); + return it; +} + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_SUFFIX_ARRAY_H_ diff --git a/suffix_array_unittest.cc b/suffix_array_unittest.cc new file mode 100644 index 0000000..69fca94 --- /dev/null +++ b/suffix_array_unittest.cc @@ -0,0 +1,342 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/suffix_array.h" + +#include <stddef.h> +#include <stdint.h> + +#include <algorithm> +#include <initializer_list> +#include <string> +#include <vector> + +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +namespace { + +using SLType = InducedSuffixSort::SLType; + +} // namespace + +using ustring = std::basic_string<unsigned char>; + +constexpr uint16_t kNumChar = 256; + +ustring MakeUnsignedString(const std::string& str) { + return {str.begin(), str.end()}; +} + +template <class T> +std::vector<T> MakeVector(const std::initializer_list<T>& ilist) { + return {ilist.begin(), ilist.end()}; +} + +void TestSlPartition(std::initializer_list<SLType> expected_sl_partition, + std::initializer_list<size_t> expected_lms_indices, + std::string str) { + using SaisImpl = InducedSuffixSort::Implementation<size_t, uint16_t>; + + std::vector<SLType> sl_partition(str.size()); + EXPECT_EQ(expected_lms_indices.size(), + SaisImpl::BuildSLPartition(str.begin(), str.size(), kNumChar, + sl_partition.rbegin())); + EXPECT_EQ(MakeVector(expected_sl_partition), sl_partition); + + std::vector<size_t> lms_indices(expected_lms_indices.size()); + SaisImpl::FindLmsSuffixes(expected_sl_partition, lms_indices.begin()); + EXPECT_EQ(MakeVector(expected_lms_indices), lms_indices); +} + +TEST(InducedSuffixSortTest, BuildSLPartition) { + TestSlPartition({}, {}, ""); + TestSlPartition( + { + SLType::LType, + }, + {}, "a"); + TestSlPartition( + { + SLType::LType, + SLType::LType, + }, + {}, "ba"); + TestSlPartition( + { + SLType::SType, + SLType::LType, + }, + {}, "ab"); + TestSlPartition( + { + SLType::SType, + SLType::SType, + SLType::LType, + }, + {}, "aab"); + TestSlPartition( + { + SLType::LType, + SLType::LType, + SLType::LType, + }, + {}, "bba"); + TestSlPartition( + { + SLType::LType, + SLType::SType, + SLType::LType, + }, + {1}, "bab"); + TestSlPartition( + { + SLType::LType, + SLType::SType, + SLType::SType, + SLType::LType, + }, + {1}, "baab"); + + TestSlPartition( + { + SLType::LType, // zucchini + SLType::LType, // ucchini + SLType::SType, // cchini + SLType::SType, // chini + SLType::SType, // hini + SLType::SType, // ini + SLType::LType, // ni + SLType::LType, // i + }, + {2}, "zucchini"); +} + +std::vector<size_t> BucketCount(const std::initializer_list<unsigned char> str, + uint16_t max_key) { + using SaisImpl = InducedSuffixSort::Implementation<size_t, uint16_t>; + return SaisImpl::MakeBucketCount(str.begin(), str.size(), max_key); +} + +TEST(InducedSuffixSortTest, BucketCount) { + using vec = std::vector<size_t>; + + EXPECT_EQ(vec({0, 0, 0, 0}), BucketCount({}, 4)); + EXPECT_EQ(vec({1, 0, 0, 0}), BucketCount({0}, 4)); + EXPECT_EQ(vec({0, 2, 0, 1}), BucketCount({1, 1, 3}, 4)); +} + +std::vector<size_t> InducedSortSubstring(ustring str) { + using SaisImpl = InducedSuffixSort::Implementation<size_t, uint16_t>; + std::vector<SLType> sl_partition(str.size()); + size_t lms_count = SaisImpl::BuildSLPartition( + str.begin(), str.size(), kNumChar, sl_partition.rbegin()); + std::vector<size_t> lms_indices(lms_count); + SaisImpl::FindLmsSuffixes(sl_partition, lms_indices.begin()); + auto buckets = SaisImpl::MakeBucketCount(str.begin(), str.size(), kNumChar); + + std::vector<size_t> suffix_array(str.size()); + SaisImpl::InducedSort(str, str.size(), sl_partition, lms_indices, buckets, + suffix_array.begin()); + + return suffix_array; +} + +TEST(InducedSuffixSortTest, InducedSortSubstring) { + using vec = std::vector<size_t>; + + auto us = MakeUnsignedString; + + // L; a$ + EXPECT_EQ(vec({0}), InducedSortSubstring(us("a"))); + + // SL; ab$, b$ + EXPECT_EQ(vec({0, 1}), InducedSortSubstring(us("ab"))); + + // LL; a$, ba$ + EXPECT_EQ(vec({1, 0}), InducedSortSubstring(us("ba"))); + + // SLL; a$, aba$, ba$ + EXPECT_EQ(vec({2, 0, 1}), InducedSortSubstring(us("aba"))); + + // LSL; ab$, b$, ba + EXPECT_EQ(vec({1, 2, 0}), InducedSortSubstring(us("bab"))); + + // SSL; aab$, ab$, b$ + EXPECT_EQ(vec({0, 1, 2}), InducedSortSubstring(us("aab"))); + + // LSSL; aab$, ab$, b$, ba + EXPECT_EQ(vec({1, 2, 3, 0}), InducedSortSubstring(us("baab"))); +} + +template <class Algorithm> +void TestSuffixSort(ustring test_str) { + std::vector<size_t> suffix_array = + MakeSuffixArray<Algorithm>(test_str, kNumChar); + EXPECT_EQ(test_str.size(), suffix_array.size()); + + // Expect that I[] is a permutation of [0, len]. + std::vector<size_t> sorted_suffix(suffix_array.begin(), suffix_array.end()); + std::sort(sorted_suffix.begin(), sorted_suffix.end()); + for (size_t i = 0; i < test_str.size(); ++i) + EXPECT_EQ(i, sorted_suffix[i]); + + // Expect that all suffixes are strictly ordered. + auto end = test_str.end(); + for (size_t i = 1; i < test_str.size(); ++i) { + auto suf1 = test_str.begin() + suffix_array[i - 1]; + auto suf2 = test_str.begin() + suffix_array[i]; + bool is_less = std::lexicographical_compare(suf1, end, suf2, end); + EXPECT_TRUE(is_less); + } +} + +constexpr const char* test_strs[] = { + "", + "a", + "aa", + "za", + "CACAO", + "aaaaa", + "banana", + "tobeornottobe", + "The quick brown fox jumps over the lazy dog.", + "elephantelephantelephantelephantelephant", + "walawalawashington", + "-------------------------", + "011010011001011010010110011010010", + "3141592653589793238462643383279502884197169399375105", + "\xFF\xFE\xFF\xFE\xFD\x80\x30\x31\x32\x80\x30\xFF\x01\xAB\xCD", + "abccbaabccbaabccbaabccbaabccbaabccbaabccbaabccba", + "0123456789876543210", + "9876543210123456789", + "aababcabcdabcdeabcdefabcdefg", + "asdhklgalksdjghalksdjghalksdjgh", +}; + +TEST(SuffixSortTest, NaiveSuffixSort) { + for (const std::string& test_str : test_strs) { + TestSuffixSort<NaiveSuffixSort>(MakeUnsignedString(test_str)); + } +} + +TEST(SuffixSortTest, InducedSuffixSortSort) { + for (const std::string& test_str : test_strs) { + TestSuffixSort<InducedSuffixSort>(MakeUnsignedString(test_str)); + } +} + +// Test with sequence that has every character. +TEST(SuffixSortTest, AllChar) { + std::vector<unsigned char> all_char(kNumChar); + std::iota(all_char.begin(), all_char.end(), 0); + + { + std::vector<size_t> suffix_array = + MakeSuffixArray<InducedSuffixSort>(all_char, kNumChar); + for (size_t i = 0; i < kNumChar; ++i) + EXPECT_EQ(i, suffix_array[i]); + } + + std::vector<unsigned char> all_char_reverse(all_char.rbegin(), + all_char.rend()); + { + std::vector<size_t> suffix_array = + MakeSuffixArray<InducedSuffixSort>(all_char_reverse, kNumChar); + for (size_t i = 0; i < kNumChar; ++i) + EXPECT_EQ(kNumChar - i - 1, suffix_array[i]); + } +} + +void TestSuffixLowerBound(ustring base_str, ustring search_str) { + std::vector<size_t> suffix_array = + MakeSuffixArray<NaiveSuffixSort>(base_str, kNumChar); + + auto pos = SuffixLowerBound(suffix_array, base_str.begin(), + search_str.begin(), search_str.end()); + + auto end = base_str.end(); + if (pos != suffix_array.begin()) { + // Previous suffix is less than |search_str|. + auto suf = base_str.begin() + pos[-1]; + bool is_less = std::lexicographical_compare(suf, end, search_str.begin(), + search_str.end()); + EXPECT_TRUE(is_less); + } + if (pos != suffix_array.end()) { + // Current suffix is greater of equal to |search_str|. + auto suf = base_str.begin() + *pos; + bool is_less = std::lexicographical_compare(suf, end, search_str.begin(), + search_str.end()); + EXPECT_FALSE(is_less); + } +} + +TEST(SuffixArrayTest, LowerBound) { + auto us = MakeUnsignedString; + + TestSuffixLowerBound(us(""), us("")); + TestSuffixLowerBound(us(""), us("a")); + TestSuffixLowerBound(us("b"), us("")); + TestSuffixLowerBound(us("b"), us("a")); + TestSuffixLowerBound(us("b"), us("c")); + TestSuffixLowerBound(us("b"), us("bc")); + TestSuffixLowerBound(us("aa"), us("a")); + TestSuffixLowerBound(us("aa"), us("aa")); + + ustring sentence = us("the quick brown fox jumps over the lazy dog."); + // Entire string: exact and unique. + TestSuffixLowerBound(sentence, sentence); + // Empty string: exact and non-unique. + TestSuffixLowerBound(sentence, us("")); + // Exact and unique suffix matches. + TestSuffixLowerBound(sentence, us(".")); + TestSuffixLowerBound(sentence, us("the lazy dog.")); + // Exact and unique non-suffix matches. + TestSuffixLowerBound(sentence, us("quick")); + TestSuffixLowerBound(sentence, us("the quick")); + // Partial and unique matches. + TestSuffixLowerBound(sentence, us("fox jumps with the hosps")); + TestSuffixLowerBound(sentence, us("xyz")); + // Exact and non-unique match: take lexicographical first. + TestSuffixLowerBound(sentence, us("the")); + TestSuffixLowerBound(sentence, us(" ")); + // Partial and non-unique match. + // query < "the l"... < "the q"... + TestSuffixLowerBound(sentence, us("the apple")); + // "the l"... < query < "the q"... + TestSuffixLowerBound(sentence, us("the opera")); + // "the l"... < "the q"... < query + TestSuffixLowerBound(sentence, us("the zebra")); + // Prefix match dominates suffix match (unique). + TestSuffixLowerBound(sentence, us("over quick brown fox")); + // Empty matchs. + TestSuffixLowerBound(sentence, us(",")); + TestSuffixLowerBound(sentence, us("1234")); + TestSuffixLowerBound(sentence, us("THE QUICK BROWN FOX")); + TestSuffixLowerBound(sentence, us("(the")); +} + +TEST(SuffixArrayTest, LowerBoundExact) { + for (const std::string& test_str : test_strs) { + ustring test_ustr = MakeUnsignedString(test_str); + + std::vector<size_t> suffix_array = + MakeSuffixArray<InducedSuffixSort>(test_ustr, kNumChar); + + for (size_t lo = 0; lo < test_str.size(); ++lo) { + for (size_t hi = lo + 1; hi <= test_str.size(); ++hi) { + ustring query(test_ustr.begin() + lo, test_ustr.begin() + hi); + ASSERT_EQ(query.size(), hi - lo); + auto pos = SuffixLowerBound(suffix_array, test_ustr.begin(), + query.begin(), query.end()); + EXPECT_TRUE( + std::equal(query.begin(), query.end(), test_ustr.begin() + *pos)); + } + } + } +} + +} // namespace zucchini diff --git a/target_pool.cc b/target_pool.cc new file mode 100644 index 0000000..23551fd --- /dev/null +++ b/target_pool.cc @@ -0,0 +1,84 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/target_pool.h" + +#include <algorithm> +#include <iterator> +#include <utility> + +#include "base/check.h" +#include "components/zucchini/algorithm.h" +#include "components/zucchini/equivalence_map.h" + +namespace zucchini { + +TargetPool::TargetPool() = default; + +TargetPool::TargetPool(std::vector<offset_t>&& targets) { + DCHECK(targets_.empty()); + DCHECK(std::is_sorted(targets.begin(), targets.end())); + targets_ = std::move(targets); +} + +TargetPool::TargetPool(TargetPool&&) = default; +TargetPool::TargetPool(const TargetPool&) = default; +TargetPool::~TargetPool() = default; + +void TargetPool::InsertTargets(const std::vector<offset_t>& targets) { + std::copy(targets.begin(), targets.end(), std::back_inserter(targets_)); + SortAndUniquify(&targets_); +} + +void TargetPool::InsertTargets(TargetSource* targets) { + for (auto target = targets->GetNext(); target.has_value(); + target = targets->GetNext()) { + targets_.push_back(*target); + } + // InsertTargets() can be called many times (number of reference types for the + // pool) in succession. Calling SortAndUniquify() every time enables deduping + // to occur more often. This prioritizes peak memory reduction over running + // time. + SortAndUniquify(&targets_); +} + +void TargetPool::InsertTargets(const std::vector<Reference>& references) { + // This can be called many times, so it's better to let std::back_inserter() + // manage |targets_| resize, instead of manually reserving space. + std::transform(references.begin(), references.end(), + std::back_inserter(targets_), + [](const Reference& ref) { return ref.target; }); + SortAndUniquify(&targets_); +} + +void TargetPool::InsertTargets(ReferenceReader&& references) { + for (auto ref = references.GetNext(); ref.has_value(); + ref = references.GetNext()) { + targets_.push_back(ref->target); + } + SortAndUniquify(&targets_); +} + +key_t TargetPool::KeyForOffset(offset_t offset) const { + auto pos = std::lower_bound(targets_.begin(), targets_.end(), offset); + DCHECK(pos != targets_.end() && *pos == offset); + return static_cast<offset_t>(pos - targets_.begin()); +} + +key_t TargetPool::KeyForNearestOffset(offset_t offset) const { + auto pos = std::lower_bound(targets_.begin(), targets_.end(), offset); + if (pos != targets_.begin()) { + // If distances are equal, prefer lower key. + if (pos == targets_.end() || *pos - offset >= offset - pos[-1]) + --pos; + } + return static_cast<offset_t>(pos - targets_.begin()); +} + +void TargetPool::FilterAndProject(const OffsetMapper& offset_mapper) { + offset_mapper.ForwardProjectAll(&targets_); + std::sort(targets_.begin(), targets_.end()); +} + +} // namespace zucchini diff --git a/target_pool.h b/target_pool.h new file mode 100644 index 0000000..27884d6 --- /dev/null +++ b/target_pool.h @@ -0,0 +1,80 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_TARGET_POOL_H_ +#define COMPONENTS_ZUCCHINI_TARGET_POOL_H_ + +#include <stddef.h> + +#include <vector> + +#include "components/zucchini/image_utils.h" +#include "components/zucchini/patch_reader.h" + +namespace zucchini { + +class OffsetMapper; +class TargetSource; + +// Ordered container of distinct targets that have the same semantics, along +// with a list of associated reference types, only used during patch generation. +class TargetPool { + public: + using const_iterator = std::vector<offset_t>::const_iterator; + + TargetPool(); + // Initializes the object with given sorted and unique |targets|. + explicit TargetPool(std::vector<offset_t>&& targets); + TargetPool(TargetPool&&); + TargetPool(const TargetPool&); + ~TargetPool(); + + // Insert new targets from various sources. These invalidate all previous key + // lookups. + // - From a list of targets, useful for adding extra targets in Zucchini-gen: + void InsertTargets(const std::vector<offset_t>& targets); + // - From TargetSource, useful for adding extra targets in Zucchini-apply: + void InsertTargets(TargetSource* targets); + // - From list of References, useful for listing targets in Zucchini-gen: + void InsertTargets(const std::vector<Reference>& references); + // - From ReferenceReader, useful for listing targets in Zucchini-apply: + void InsertTargets(ReferenceReader&& references); + + // Adds |type| as a reference type associated with the pool of targets. + void AddType(TypeTag type) { types_.push_back(type); } + + // Returns a canonical key associated with a valid target at |offset|. + key_t KeyForOffset(offset_t offset) const; + + // Returns a canonical key associated with the target nearest to |offset|. + key_t KeyForNearestOffset(offset_t offset) const; + + // Returns the target for a |key|, which is assumed to be valid and held by + // this class. + offset_t OffsetForKey(key_t key) const { return targets_[key]; } + + // Returns whether a particular key is valid. + bool KeyIsValid(key_t key) const { return key < targets_.size(); } + + // Uses |offset_mapper| to transform "old" |targets_| to "new" |targets_|, + // resulting in sorted and unique targets. + void FilterAndProject(const OffsetMapper& offset_mapper); + + // Accessors for testing. + const std::vector<offset_t>& targets() const { return targets_; } + const std::vector<TypeTag>& types() const { return types_; } + + // Returns the number of targets. + size_t size() const { return targets_.size(); } + const_iterator begin() const { return targets_.cbegin(); } + const_iterator end() const { return targets_.cend(); } + + private: + std::vector<TypeTag> types_; // Enumerates type_tag for this pool. + std::vector<offset_t> targets_; // Targets for pool in ascending order. +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_TARGET_POOL_H_ diff --git a/target_pool_unittest.cc b/target_pool_unittest.cc new file mode 100644 index 0000000..4c3efec --- /dev/null +++ b/target_pool_unittest.cc @@ -0,0 +1,64 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/target_pool.h" + +#include <cmath> +#include <string> +#include <utility> +#include <vector> + +#include "components/zucchini/image_utils.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +namespace { + +using OffsetVector = std::vector<offset_t>; + +} // namespace + +TEST(TargetPoolTest, InsertTargetsFromReferences) { + auto test_insert = [](std::vector<Reference>&& references) -> OffsetVector { + TargetPool target_pool; + target_pool.InsertTargets(references); + // Return copy since |target_pool| goes out of scope. + return target_pool.targets(); + }; + + EXPECT_EQ(OffsetVector(), test_insert({})); + EXPECT_EQ(OffsetVector({0, 1}), test_insert({{0, 0}, {10, 1}})); + EXPECT_EQ(OffsetVector({0, 1}), test_insert({{0, 1}, {10, 0}})); + EXPECT_EQ(OffsetVector({0, 1, 2}), test_insert({{0, 1}, {10, 0}, {20, 2}})); + EXPECT_EQ(OffsetVector({0}), test_insert({{0, 0}, {10, 0}})); + EXPECT_EQ(OffsetVector({0, 1}), test_insert({{0, 0}, {10, 0}, {20, 1}})); +} + +TEST(TargetPoolTest, KeyOffset) { + auto test_key_offset = [](const std::string& nearest_offsets_key, + OffsetVector&& targets) { + TargetPool target_pool(std::move(targets)); + for (offset_t offset : target_pool.targets()) { + offset_t key = target_pool.KeyForOffset(offset); + EXPECT_LT(key, target_pool.size()); + EXPECT_EQ(offset, target_pool.OffsetForKey(key)); + } + for (offset_t offset = 0; offset < nearest_offsets_key.size(); ++offset) { + key_t key = target_pool.KeyForNearestOffset(offset); + EXPECT_EQ(key, static_cast<key_t>(nearest_offsets_key[offset] - '0')); + } + }; + test_key_offset("0000000000000000", {}); + test_key_offset("0000000000000000", {0}); + test_key_offset("0000000000000000", {1}); + test_key_offset("0111111111111111", {0, 1}); + test_key_offset("0011111111111111", {0, 2}); + test_key_offset("0011111111111111", {1, 2}); + test_key_offset("0001111111111111", {1, 3}); + test_key_offset("0001112223334444", {1, 3, 7, 9, 13}); + test_key_offset("0000011112223333", {1, 7, 9, 13}); +} + +} // namespace zucchini diff --git a/targets_affinity.cc b/targets_affinity.cc new file mode 100644 index 0000000..d083787 --- /dev/null +++ b/targets_affinity.cc @@ -0,0 +1,108 @@ +// Copyright 2016 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/targets_affinity.h" + +#include <algorithm> + +#include "base/check_op.h" +#include "components/zucchini/equivalence_map.h" + +namespace zucchini { + +namespace { + +constexpr uint32_t kNoLabel = 0; +} + +TargetsAffinity::TargetsAffinity() = default; +TargetsAffinity::~TargetsAffinity() = default; + +void TargetsAffinity::InferFromSimilarities( + const EquivalenceMap& equivalences, + const std::vector<offset_t>& old_targets, + const std::vector<offset_t>& new_targets) { + forward_association_.assign(old_targets.size(), {}); + backward_association_.assign(new_targets.size(), {}); + + if (old_targets.empty() || new_targets.empty()) + return; + + key_t new_key = 0; + for (auto candidate : equivalences) { // Sorted by |dst_offset|. + DCHECK_GT(candidate.similarity, 0.0); + while (new_key < new_targets.size() && + new_targets[new_key] < candidate.eq.dst_offset) { + ++new_key; + } + + // Visit each new target covered by |candidate.eq| and find / update its + // associated old target. + for (; new_key < new_targets.size() && + new_targets[new_key] < candidate.eq.dst_end(); + ++new_key) { + if (backward_association_[new_key].affinity >= candidate.similarity) + continue; + + DCHECK_GE(new_targets[new_key], candidate.eq.dst_offset); + offset_t old_target = new_targets[new_key] - candidate.eq.dst_offset + + candidate.eq.src_offset; + auto old_it = + std::lower_bound(old_targets.begin(), old_targets.end(), old_target); + // If new target can be mapped via |candidate.eq| to an old target, then + // attempt to associate them. Multiple new targets can compete for the + // same old target. The heuristic here makes selections to maximize + // |candidate.similarity|, and if a tie occurs, minimize new target offset + // (by first-come, first-served). + if (old_it != old_targets.end() && *old_it == old_target) { + key_t old_key = static_cast<key_t>(old_it - old_targets.begin()); + if (candidate.similarity > forward_association_[old_key].affinity) { + // Reset other associations. + if (forward_association_[old_key].affinity > 0.0) + backward_association_[forward_association_[old_key].other] = {}; + if (backward_association_[new_key].affinity > 0.0) + forward_association_[backward_association_[new_key].other] = {}; + // Assign new association. + forward_association_[old_key] = {new_key, candidate.similarity}; + backward_association_[new_key] = {old_key, candidate.similarity}; + } + } + } + } +} + +uint32_t TargetsAffinity::AssignLabels(double min_affinity, + std::vector<uint32_t>* old_labels, + std::vector<uint32_t>* new_labels) { + old_labels->assign(forward_association_.size(), kNoLabel); + new_labels->assign(backward_association_.size(), kNoLabel); + + uint32_t label = kNoLabel + 1; + for (key_t old_key = 0; old_key < forward_association_.size(); ++old_key) { + Association association = forward_association_[old_key]; + if (association.affinity >= min_affinity) { + (*old_labels)[old_key] = label; + DCHECK_EQ(0U, (*new_labels)[association.other]); + (*new_labels)[association.other] = label; + ++label; + } + } + return label; +} + +double TargetsAffinity::AffinityBetween(key_t old_key, key_t new_key) const { + DCHECK_LT(old_key, forward_association_.size()); + DCHECK_LT(new_key, backward_association_.size()); + if (forward_association_[old_key].affinity > 0.0 && + forward_association_[old_key].other == new_key) { + DCHECK_EQ(backward_association_[new_key].other, old_key); + DCHECK_EQ(forward_association_[old_key].affinity, + backward_association_[new_key].affinity); + return forward_association_[old_key].affinity; + } + return -std::max(forward_association_[old_key].affinity, + backward_association_[new_key].affinity); +} + +} // namespace zucchini diff --git a/targets_affinity.h b/targets_affinity.h new file mode 100644 index 0000000..dff1741 --- /dev/null +++ b/targets_affinity.h @@ -0,0 +1,73 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_TARGETS_AFFINITY_H_ +#define COMPONENTS_ZUCCHINI_TARGETS_AFFINITY_H_ + +#include <stddef.h> +#include <stdint.h> + +#include <vector> + +#include "components/zucchini/image_utils.h" + +namespace zucchini { + +class EquivalenceMap; + +// Computes and stores affinity between old and new targets for a single target +// pool. This is only used during patch generation. +class TargetsAffinity { + public: + TargetsAffinity(); + TargetsAffinity(const TargetsAffinity&) = delete; + const TargetsAffinity& operator=(const TargetsAffinity&) = delete; + ~TargetsAffinity(); + + // Infers affinity between |old_targets| and |new_targets| using similarities + // described by |equivalence_map|, and updates internal state for retrieval of + // affinity scores. Both |old_targets| and |new_targets| are targets in the + // same pool and are sorted in ascending order. + void InferFromSimilarities(const EquivalenceMap& equivalence_map, + const std::vector<offset_t>& old_targets, + const std::vector<offset_t>& new_targets); + + // Assigns labels to targets based on associations previously inferred, using + // |min_affinity| to reject associations with weak |affinity|. Label 0 is + // assigned to unassociated targets. Labels for old targets are written to + // |old_labels| and labels for new targets are written to |new_labels|. + // Returns the upper bound on assigned labels (>= 1 since 0 is used). + uint32_t AssignLabels(double min_affinity, + std::vector<uint32_t>* old_labels, + std::vector<uint32_t>* new_labels); + + // Returns the affinity score between targets identified by |old_key| and + // |new_keys|. Affinity > 0 means an association is likely, < 0 means + // incompatible association, and 0 means neither targets have been associated. + double AffinityBetween(key_t old_key, key_t new_key) const; + + private: + struct Association { + key_t other = 0; + double affinity = 0.0; + }; + + // Forward and backward associations between old and new targets. For each + // Association element, if |affinity == 0.0| then no association is defined + // (and |other| is meaningless|. Otherwise |affinity > 0.0|, and the + // association between |old_labels[old_key]| and |new_labels[new_key]| is + // represented by: + // forward_association_[old_key].other == new_key; + // backward_association_[new_key].other == old_key; + // forward_association_[old_key].affinity == + // backward_association_[new_key].affinity; + // The two lists contain the same information, but having both enables quick + // lookup, given |old_key| or |new_key|. + std::vector<Association> forward_association_; + std::vector<Association> backward_association_; +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_TARGETS_AFFINITY_H_ diff --git a/targets_affinity_unittest.cc b/targets_affinity_unittest.cc new file mode 100644 index 0000000..86182f9 --- /dev/null +++ b/targets_affinity_unittest.cc @@ -0,0 +1,131 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/targets_affinity.h" + +#include <stddef.h> +#include <stdint.h> + +#include <vector> + +#include "components/zucchini/equivalence_map.h" +#include "components/zucchini/image_utils.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +TEST(TargetsAffinityTest, AffinityBetween) { + using AffinityVector = std::vector<std::vector<double>>; + + // A common TargetsAffinity is used across independent tests. This is to + // reflect actual usage, in which common TargetsAffinity is used so that + // internal buffers get reused. + TargetsAffinity targets_affinity; + + auto test_affinity = [&targets_affinity]( + const EquivalenceMap& equivalence_map, + const std::vector<offset_t>& old_targets, + const std::vector<offset_t>& new_targets) { + targets_affinity.InferFromSimilarities(equivalence_map, old_targets, + new_targets); + AffinityVector affinities(old_targets.size()); + for (key_t i = 0; i < old_targets.size(); ++i) { + for (key_t j = 0; j < new_targets.size(); ++j) { + affinities[i].push_back(targets_affinity.AffinityBetween(i, j)); + } + } + return affinities; + }; + + EXPECT_EQ(AffinityVector({}), test_affinity(EquivalenceMap(), {}, {})); + EXPECT_EQ(AffinityVector({}), + test_affinity(EquivalenceMap({{{0, 0, 8}, 1.0}}), {}, {})); + + EXPECT_EQ(AffinityVector({{0.0, 0.0}, {0.0, 0.0}}), + test_affinity(EquivalenceMap(), {0, 10}, {0, 5})); + + EXPECT_EQ(AffinityVector({{1.0, -1.0}, {-1.0, 0.0}}), + test_affinity(EquivalenceMap({{{0, 0, 1}, 1.0}}), {0, 10}, {0, 5})); + + EXPECT_EQ(AffinityVector({{1.0, -1.0}, {-1.0, 0.0}}), + test_affinity(EquivalenceMap({{{0, 0, 2}, 1.0}}), {1, 10}, {1, 5})); + + EXPECT_EQ(AffinityVector({{0.0, 0.0}, {0.0, 0.0}}), + test_affinity(EquivalenceMap({{{0, 1, 2}, 1.0}}), {1, 10}, {1, 5})); + + EXPECT_EQ(AffinityVector({{1.0, -1.0}, {-1.0, 0.0}}), + test_affinity(EquivalenceMap({{{0, 1, 2}, 1.0}}), {0, 10}, {1, 5})); + + EXPECT_EQ(AffinityVector({{2.0, -2.0}, {-2.0, 0.0}}), + test_affinity(EquivalenceMap({{{0, 0, 1}, 2.0}}), {0, 10}, {0, 5})); + + EXPECT_EQ( + AffinityVector({{1.0, -1.0}, {-1.0, 1.0}, {-1.0, -1.0}}), + test_affinity(EquivalenceMap({{{0, 0, 6}, 1.0}}), {0, 5, 10}, {0, 5})); + + EXPECT_EQ(AffinityVector({{-2.0, 2.0}, {1.0, -2.0}, {-1.0, -2.0}}), + test_affinity(EquivalenceMap({{{5, 0, 2}, 1.0}, {{0, 5, 2}, 2.0}}), + {0, 5, 10}, {0, 5})); + + EXPECT_EQ(AffinityVector({{-2.0, 2.0}, {0.0, -2.0}, {0.0, -2.0}}), + test_affinity(EquivalenceMap({{{0, 0, 2}, 1.0}, {{0, 5, 2}, 2.0}}), + {0, 5, 10}, {0, 5})); +} + +TEST(TargetsAffinityTest, AssignLabels) { + // A common TargetsAffinity is used across independent tests. This is to + // reflect actual usage, in which common TargetsAffinity is used so that + // internal buffers get reused. + TargetsAffinity targets_affinity; + + auto test_labels_assignment = + [&targets_affinity](const EquivalenceMap& equivalence_map, + const std::vector<offset_t>& old_targets, + const std::vector<offset_t>& new_targets, + double min_affinity, + const std::vector<uint32_t>& expected_old_labels, + const std::vector<uint32_t>& expected_new_labels) { + targets_affinity.InferFromSimilarities(equivalence_map, old_targets, + new_targets); + std::vector<uint32_t> old_labels; + std::vector<uint32_t> new_labels; + size_t bound = targets_affinity.AssignLabels(min_affinity, &old_labels, + &new_labels); + EXPECT_EQ(expected_old_labels, old_labels); + EXPECT_EQ(expected_new_labels, new_labels); + return bound; + }; + + EXPECT_EQ(1U, test_labels_assignment(EquivalenceMap(), {}, {}, 1.0, {}, {})); + EXPECT_EQ(1U, test_labels_assignment(EquivalenceMap({{{0, 0, 8}, 1.0}}), {}, + {}, 1.0, {}, {})); + + EXPECT_EQ(1U, test_labels_assignment(EquivalenceMap(), {0, 10}, {0, 5}, 1.0, + {0, 0}, {0, 0})); + + EXPECT_EQ(2U, test_labels_assignment(EquivalenceMap({{{0, 0, 1}, 1.0}}), + {0, 10}, {0, 5}, 1.0, {1, 0}, {1, 0})); + EXPECT_EQ(1U, test_labels_assignment(EquivalenceMap({{{0, 0, 1}, 0.99}}), + {0, 10}, {0, 5}, 1.0, {0, 0}, {0, 0})); + EXPECT_EQ(1U, test_labels_assignment(EquivalenceMap({{{0, 0, 1}, 1.0}}), + {0, 10}, {0, 5}, 1.01, {0, 0}, {0, 0})); + EXPECT_EQ(1U, test_labels_assignment(EquivalenceMap({{{0, 0, 1}, 1.0}}), + {0, 10}, {0, 5}, 15.0, {0, 0}, {0, 0})); + EXPECT_EQ(2U, test_labels_assignment(EquivalenceMap({{{0, 0, 1}, 15.0}}), + {0, 10}, {0, 5}, 15.0, {1, 0}, {1, 0})); + + EXPECT_EQ(2U, test_labels_assignment(EquivalenceMap({{{0, 1, 2}, 1.0}}), + {0, 10}, {1, 5}, 1.0, {1, 0}, {1, 0})); + EXPECT_EQ( + 3U, test_labels_assignment(EquivalenceMap({{{0, 0, 6}, 1.0}}), {0, 5, 10}, + {0, 5}, 1.0, {1, 2, 0}, {1, 2})); + EXPECT_EQ(3U, test_labels_assignment( + EquivalenceMap({{{5, 0, 2}, 1.0}, {{0, 5, 2}, 2.0}}), + {0, 5, 10}, {0, 5}, 1.0, {1, 2, 0}, {2, 1})); + EXPECT_EQ(2U, test_labels_assignment( + EquivalenceMap({{{0, 0, 2}, 1.0}, {{0, 5, 2}, 2.0}}), + {0, 5, 10}, {0, 5}, 1.0, {1, 0, 0}, {0, 1})); +} + +} // namespace zucchini diff --git a/test_disassembler.cc b/test_disassembler.cc new file mode 100644 index 0000000..2d6727b --- /dev/null +++ b/test_disassembler.cc @@ -0,0 +1,61 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/test_disassembler.h" + +#include "components/zucchini/test_reference_reader.h" + +namespace zucchini { + +// |num_equivalence_iterations_| = 2 to cover common case for testing. +TestDisassembler::TestDisassembler(const ReferenceTypeTraits& traits1, + const std::vector<Reference>& refs1, + const ReferenceTypeTraits& traits2, + const std::vector<Reference>& refs2, + const ReferenceTypeTraits& traits3, + const std::vector<Reference>& refs3) + : Disassembler(2), + traits_{traits1, traits2, traits3}, + refs_{refs1, refs2, refs3} {} + +TestDisassembler::~TestDisassembler() = default; + +ExecutableType TestDisassembler::GetExeType() const { + return kExeTypeUnknown; +} + +std::string TestDisassembler::GetExeTypeString() const { + return "(Unknown)"; +} + +std::vector<ReferenceGroup> TestDisassembler::MakeReferenceGroups() const { + return { + {traits_[0], &TestDisassembler::MakeReadRefs1, + &TestDisassembler::MakeWriteRefs1}, + {traits_[1], &TestDisassembler::MakeReadRefs2, + &TestDisassembler::MakeWriteRefs2}, + {traits_[2], &TestDisassembler::MakeReadRefs3, + &TestDisassembler::MakeWriteRefs3}, + }; +} + +bool TestDisassembler::Parse(ConstBufferView image) { + return true; +} + +std::unique_ptr<ReferenceReader> TestDisassembler::MakeReadRefs(int type) { + return std::make_unique<TestReferenceReader>(refs_[type]); +} + +std::unique_ptr<ReferenceWriter> TestDisassembler::MakeWriteRefs( + MutableBufferView image) { + class NoOpWriter : public ReferenceWriter { + public: + // ReferenceWriter: + void PutNext(Reference) override {} + }; + return std::make_unique<NoOpWriter>(); +} + +} // namespace zucchini diff --git a/test_disassembler.h b/test_disassembler.h new file mode 100644 index 0000000..e434fc4 --- /dev/null +++ b/test_disassembler.h @@ -0,0 +1,77 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_TEST_DISASSEMBLER_H_ +#define COMPONENTS_ZUCCHINI_TEST_DISASSEMBLER_H_ + +#include <memory> +#include <string> +#include <vector> + +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/disassembler.h" +#include "components/zucchini/image_utils.h" + +namespace zucchini { + +// A trivial Disassembler that reads injected references of 3 different types. +// This is only meant for testing and is not a full implementation of a +// disassembler. Reading reference ignores bounds, and writing references does +// nothing. +class TestDisassembler : public Disassembler { + public: + TestDisassembler(const ReferenceTypeTraits& traits1, + const std::vector<Reference>& refs1, + const ReferenceTypeTraits& traits2, + const std::vector<Reference>& refs2, + const ReferenceTypeTraits& traits3, + const std::vector<Reference>& refs3); + TestDisassembler(const TestDisassembler&) = delete; + const TestDisassembler& operator=(const TestDisassembler&) = delete; + ~TestDisassembler() override; + + // Disassembler: + ExecutableType GetExeType() const override; + std::string GetExeTypeString() const override; + std::vector<ReferenceGroup> MakeReferenceGroups() const override; + + // Disassembler::ReaderFactory: + std::unique_ptr<ReferenceReader> MakeReadRefs1(offset_t /*lower*/, + offset_t /*upper*/) { + return MakeReadRefs(0); + } + std::unique_ptr<ReferenceReader> MakeReadRefs2(offset_t /*lower*/, + offset_t /*upper*/) { + return MakeReadRefs(1); + } + std::unique_ptr<ReferenceReader> MakeReadRefs3(offset_t /*lower*/, + offset_t /*upper*/) { + return MakeReadRefs(2); + } + + // Disassembler::WriterFactory: + std::unique_ptr<ReferenceWriter> MakeWriteRefs1(MutableBufferView image) { + return MakeWriteRefs(image); + } + std::unique_ptr<ReferenceWriter> MakeWriteRefs2(MutableBufferView image) { + return MakeWriteRefs(image); + } + std::unique_ptr<ReferenceWriter> MakeWriteRefs3(MutableBufferView image) { + return MakeWriteRefs(image); + } + + private: + // Disassembler: + bool Parse(ConstBufferView image) override; + + std::unique_ptr<ReferenceReader> MakeReadRefs(int type); + std::unique_ptr<ReferenceWriter> MakeWriteRefs(MutableBufferView image); + + ReferenceTypeTraits traits_[3]; + std::vector<Reference> refs_[3]; +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_TEST_DISASSEMBLER_H_ diff --git a/test_reference_reader.cc b/test_reference_reader.cc new file mode 100644 index 0000000..b7f8ece --- /dev/null +++ b/test_reference_reader.cc @@ -0,0 +1,20 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/test_reference_reader.h" + +namespace zucchini { + +TestReferenceReader::TestReferenceReader(const std::vector<Reference>& refs) + : references_(refs) {} + +TestReferenceReader::~TestReferenceReader() = default; + +absl::optional<Reference> TestReferenceReader::GetNext() { + if (index_ == references_.size()) + return absl::nullopt; + return references_[index_++]; +} + +} // namespace zucchini diff --git a/test_reference_reader.h b/test_reference_reader.h new file mode 100644 index 0000000..cc8c0de --- /dev/null +++ b/test_reference_reader.h @@ -0,0 +1,32 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_TEST_REFERENCE_READER_H_ +#define COMPONENTS_ZUCCHINI_TEST_REFERENCE_READER_H_ + +#include <stddef.h> + +#include <vector> + +#include "components/zucchini/image_utils.h" +#include "third_party/abseil-cpp/absl/types/optional.h" + +namespace zucchini { + +// A trivial ReferenceReader that reads injected references. +class TestReferenceReader : public ReferenceReader { + public: + explicit TestReferenceReader(const std::vector<Reference>& refs); + ~TestReferenceReader() override; + + absl::optional<Reference> GetNext() override; + + private: + std::vector<Reference> references_; + size_t index_ = 0; +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_TEST_REFERENCE_READER_H_ diff --git a/test_utils.cc b/test_utils.cc new file mode 100644 index 0000000..bc912b4 --- /dev/null +++ b/test_utils.cc @@ -0,0 +1,26 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/test_utils.h" + +#include <ios> +#include <sstream> + +#include "base/check_op.h" + +namespace zucchini { + +std::vector<uint8_t> ParseHexString(const std::string& hex_string) { + std::vector<uint8_t> ret; + std::istringstream iss(hex_string); + iss >> std::hex; + uint32_t temp = 0; // Cannot be uint8_t: istringstream treats this as char! + while (iss >> temp) { + CHECK_LE(temp, 0xFFU); + ret.push_back(temp); + } + return ret; +} + +} // namespace zucchini diff --git a/test_utils.h b/test_utils.h new file mode 100644 index 0000000..e922343 --- /dev/null +++ b/test_utils.h @@ -0,0 +1,35 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_TEST_UTILS_H_ +#define COMPONENTS_ZUCCHINI_TEST_UTILS_H_ + +#include <stdint.h> + +#include <string> +#include <vector> + +namespace zucchini { + +// Parses space-separated list of byte hex values into list. +std::vector<uint8_t> ParseHexString(const std::string& hex_string); + +// Returns a vector that's the contatenation of two vectors of the same type. +// Elements are copied by value. +template <class T> +std::vector<T> Cat(const std::vector<T>& a, const std::vector<T>& b) { + std::vector<T> ret(a); + ret.insert(ret.end(), b.begin(), b.end()); + return ret; +} + +// Returns a subvector of a vector. Elements are copied by value. +template <class T> +std::vector<T> Sub(const std::vector<T>& a, size_t lo, size_t hi) { + return std::vector<T>(a.begin() + lo, a.begin() + hi); +} + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_TEST_UTILS_H_ diff --git a/testdata/chrome64_1.exe.sha1 b/testdata/chrome64_1.exe.sha1 new file mode 100644 index 0000000..9b4f113 --- /dev/null +++ b/testdata/chrome64_1.exe.sha1 @@ -0,0 +1 @@ +4970ef6f342f6a0da9ae7a4ed462f93ef68f142c
\ No newline at end of file diff --git a/testdata/chrome64_2.exe.sha1 b/testdata/chrome64_2.exe.sha1 new file mode 100644 index 0000000..e4a96a2 --- /dev/null +++ b/testdata/chrome64_2.exe.sha1 @@ -0,0 +1 @@ +c3a974589d50956a3c8c17572fee078b9276ad9b
\ No newline at end of file diff --git a/testdata/setup1.exe.sha1 b/testdata/setup1.exe.sha1 new file mode 100644 index 0000000..2304621 --- /dev/null +++ b/testdata/setup1.exe.sha1 @@ -0,0 +1 @@ +5d0e8fed8e9e091e184adb2e2e0e668def9cd2c5
\ No newline at end of file diff --git a/testdata/setup2.exe.sha1 b/testdata/setup2.exe.sha1 new file mode 100644 index 0000000..9fa4d0c --- /dev/null +++ b/testdata/setup2.exe.sha1 @@ -0,0 +1 @@ +12194273e8d509b6e81e4a6b63621081e1426028
\ No newline at end of file diff --git a/type_dex.h b/type_dex.h new file mode 100644 index 0000000..432a031 --- /dev/null +++ b/type_dex.h @@ -0,0 +1,291 @@ +// Copyright 2018 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_TYPE_DEX_H_ +#define COMPONENTS_ZUCCHINI_TYPE_DEX_H_ + +#include <stdint.h> + +namespace zucchini { +namespace dex { +// Contains types that models DEX executable format data structures. +// See https://source.android.com/devices/tech/dalvik/dex-format + +// The supported versions are 035 and 037. + +enum class FormatId : uint8_t { + b, // 22b. + c, // 21c, 22c, 31c, 35c, 3rc. + h, // 21h. + i, // 31i. + l, // 51l. + n, // 11n. + s, // 21s, 22s. + t, // 10t, 20t, 21t, 22t, 30t, 31t. + x, // 10x, 11x, 12x, 22x, 23x, 32x. +}; + +struct Instruction { + Instruction() = default; + constexpr Instruction(uint8_t opcode_in, + uint8_t layout_in, + FormatId format_in, + uint8_t variant_in = 1) + : opcode(opcode_in), + layout(layout_in), + format(format_in), + variant(variant_in) {} + + // The opcode that identifies the instruction. + uint8_t opcode; + // Number of uint16_t units for the instruction. + uint8_t layout; + // Identifier that groups similar instructions, as quick filter. + FormatId format; + // Number of successive opcodes that have the same format. + uint8_t variant = 1; +}; + +constexpr Instruction kByteCode[] = { + {0x00, 1, FormatId::x}, + {0x01, 1, FormatId::x}, + {0x02, 2, FormatId::x}, + {0x03, 3, FormatId::x}, + {0x04, 1, FormatId::x}, + {0x05, 2, FormatId::x}, + {0x06, 3, FormatId::x}, + {0x07, 1, FormatId::x}, + {0x08, 2, FormatId::x}, + {0x09, 3, FormatId::x}, + {0x0A, 1, FormatId::x}, + {0x0B, 1, FormatId::x}, + {0x0C, 1, FormatId::x}, + {0x0D, 1, FormatId::x}, + {0x0E, 1, FormatId::x}, + {0x0F, 1, FormatId::x}, + {0x10, 1, FormatId::x}, + {0x11, 1, FormatId::x}, + {0x12, 1, FormatId::n}, + {0x13, 2, FormatId::s}, + {0x14, 3, FormatId::i}, + {0x15, 2, FormatId::h}, + {0x16, 2, FormatId::s}, + {0x17, 3, FormatId::i}, + {0x18, 5, FormatId::l}, + {0x19, 2, FormatId::h}, + {0x1A, 2, FormatId::c}, + {0x1B, 3, FormatId::c}, + {0x1C, 2, FormatId::c}, + {0x1D, 1, FormatId::x}, + {0x1E, 1, FormatId::x}, + {0x1F, 2, FormatId::c}, + {0x20, 2, FormatId::c}, + {0x21, 1, FormatId::x}, + {0x22, 2, FormatId::c}, + {0x23, 2, FormatId::c}, + {0x24, 3, FormatId::c}, + {0x25, 3, FormatId::c}, + {0x26, 3, FormatId::t}, + {0x27, 1, FormatId::x}, + {0x28, 1, FormatId::t}, + {0x29, 2, FormatId::t}, + {0x2A, 3, FormatId::t}, + {0x2B, 3, FormatId::t}, + {0x2C, 3, FormatId::t}, + {0x2D, 2, FormatId::x, 5}, + {0x32, 2, FormatId::t, 6}, + {0x38, 2, FormatId::t, 6}, + // {0x3E, 1, FormatId::x, 6}, unused + {0x44, 2, FormatId::x, 14}, + {0x52, 2, FormatId::c, 14}, + {0x60, 2, FormatId::c, 14}, + {0x6E, 3, FormatId::c, 5}, + // {0x73, 1, FormatId::x}, unused + {0x74, 3, FormatId::c, 5}, + // {0x79, 1, FormatId::x, 2}, unused + {0x7B, 1, FormatId::x, 21}, + {0x90, 2, FormatId::x, 32}, + {0xB0, 1, FormatId::x, 32}, + {0xD0, 2, FormatId::s, 8}, + {0xD8, 2, FormatId::b, 11}, + // {0xE3, 1, FormatId::x, 29}, unused +}; + +// Supported by MSVC, g++, and clang++. Ensures no gaps in packing. +#pragma pack(push, 1) + +// header_item: Appears in the header section. +struct HeaderItem { + uint8_t magic[8]; + uint32_t checksum; + uint8_t signature[20]; + uint32_t file_size; + uint32_t header_size; + uint32_t endian_tag; + uint32_t link_size; + uint32_t link_off; + uint32_t map_off; + uint32_t string_ids_size; + uint32_t string_ids_off; + uint32_t type_ids_size; + uint32_t type_ids_off; + uint32_t proto_ids_size; + uint32_t proto_ids_off; + uint32_t field_ids_size; + uint32_t field_ids_off; + uint32_t method_ids_size; + uint32_t method_ids_off; + uint32_t class_defs_size; + uint32_t class_defs_off; + uint32_t data_size; + uint32_t data_off; +}; + +// string_id_item: String identifiers list. +struct StringIdItem { + uint32_t string_data_off; +}; + +// type_id_item: Type identifiers list. +struct TypeIdItem { + uint32_t descriptor_idx; +}; + +// proto_id_item: Method prototype identifiers list. +struct ProtoIdItem { + uint32_t shorty_idx; + uint32_t return_type_idx; + uint32_t parameters_off; +}; + +// field_id_item: Field identifiers list. +struct FieldIdItem { + uint16_t class_idx; + uint16_t type_idx; + uint32_t name_idx; +}; + +// method_id_item: Method identifiers list. +struct MethodIdItem { + uint16_t class_idx; + uint16_t proto_idx; + uint32_t name_idx; +}; + +// class_def_item: Class definitions list. +struct ClassDefItem { + uint32_t class_idx; + uint32_t access_flags; + uint32_t superclass_idx; + uint32_t interfaces_off; + uint32_t source_file_idx; + uint32_t annotations_off; + uint32_t class_data_off; + uint32_t static_values_off; +}; + +// code_item: Header of a code item. +struct CodeItem { + uint16_t registers_size; + uint16_t ins_size; + uint16_t outs_size; + uint16_t tries_size; + uint32_t debug_info_off; + uint32_t insns_size; + // Variable length data follow for complete code item. +}; + +constexpr uint32_t kMaxItemListSize = 18; + +// map_item +struct MapItem { + uint16_t type; + uint16_t unused; + uint32_t size; + uint32_t offset; +}; + +// map_list +struct MapList { + uint32_t size; + MapItem list[kMaxItemListSize]; +}; + +// type_item +struct TypeItem { + uint16_t type_idx; +}; + +// annotation_set_ref_item +struct AnnotationSetRefItem { + uint32_t annotations_off; +}; + +// annotation_off_item +struct AnnotationOffItem { + uint32_t annotation_off; +}; + +// field_annotation +struct FieldAnnotation { + uint32_t field_idx; + uint32_t annotations_off; +}; + +// method_annotation +struct MethodAnnotation { + uint32_t method_idx; + uint32_t annotations_off; +}; + +// parameter_annotation +struct ParameterAnnotation { + uint32_t method_idx; + uint32_t annotations_off; +}; + +// annotations_directory_item +struct AnnotationsDirectoryItem { + uint32_t class_annotations_off; + uint32_t fields_size; + uint32_t annotated_methods_size; + uint32_t annotated_parameters_size; + // FieldAnnotation field_annotations[fields_size]; + // MethodAnnotation method_annotations[annotated_methods_size]; + // ParameterAnnotation parameter_annotations[annotated_parameters_size]; + // All *Annotation are 8 bytes each. +}; + +// try_item +struct TryItem { + uint32_t start_addr; + uint16_t insn_count; + uint16_t handler_off; +}; + +constexpr uint16_t kTypeHeaderItem = 0x0000; +constexpr uint16_t kTypeStringIdItem = 0x0001; +constexpr uint16_t kTypeTypeIdItem = 0x0002; +constexpr uint16_t kTypeProtoIdItem = 0x0003; +constexpr uint16_t kTypeFieldIdItem = 0x0004; +constexpr uint16_t kTypeMethodIdItem = 0x0005; +constexpr uint16_t kTypeClassDefItem = 0x0006; +constexpr uint16_t kTypeMapList = 0x1000; +constexpr uint16_t kTypeTypeList = 0x1001; +constexpr uint16_t kTypeAnnotationSetRefList = 0x1002; +constexpr uint16_t kTypeAnnotationSetItem = 0x1003; +constexpr uint16_t kTypeClassDataItem = 0x2000; +constexpr uint16_t kTypeCodeItem = 0x2001; +constexpr uint16_t kTypeStringDataItem = 0x2002; +constexpr uint16_t kTypeDebugInfoItem = 0x2003; +constexpr uint16_t kTypeAnnotationItem = 0x2004; +constexpr uint16_t kTypeEncodedArrayItem = 0x2005; +constexpr uint16_t kTypeAnnotationsDirectoryItem = 0x2006; + +#pragma pack(pop) + +} // namespace dex +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_TYPE_DEX_H_ diff --git a/type_elf.h b/type_elf.h new file mode 100644 index 0000000..2a522b1 --- /dev/null +++ b/type_elf.h @@ -0,0 +1,283 @@ +// Copyright 2018 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_TYPE_ELF_H_ +#define COMPONENTS_ZUCCHINI_TYPE_ELF_H_ + +#include <stdint.h> + +namespace zucchini { + +// Structures and constants taken from linux/elf.h and following identical +// layout. This is used for parsing of Executable and Linkable Format (ELF). +namespace elf { +// Supported by MSVC, g++, and clang++. Ensures no gaps in packing. +#pragma pack(push, 1) + +// This header defines various types from the ELF file spec, but no code +// related to using them. + +typedef uint32_t Elf32_Addr; // Unsigned program address. +typedef uint16_t Elf32_Half; // Unsigned medium integer. +typedef uint32_t Elf32_Off; // Unsigned file offset. +typedef int32_t Elf32_Sword; // Signed large integer. +typedef uint32_t Elf32_Word; // Unsigned large integer. + +typedef uint64_t Elf64_Addr; // Unsigned program address. +typedef uint16_t Elf64_Half; // Unsigned medium integer. +typedef uint64_t Elf64_Off; // Unsigned file offset. +typedef int32_t Elf64_Sword; // Signed large integer. +typedef uint32_t Elf64_Word; // Unsigned large integer. +typedef int64_t Elf64_Sxword; // Signed extra large integer. +typedef uint64_t Elf64_Xword; // Unsigned extra large integer. + +// The header at the top of the file. +struct Elf32_Ehdr { + unsigned char e_ident[16]; + Elf32_Half e_type; + Elf32_Half e_machine; + Elf32_Word e_version; + Elf32_Addr e_entry; + Elf32_Off e_phoff; + Elf32_Off e_shoff; + Elf32_Word e_flags; + Elf32_Half e_ehsize; + Elf32_Half e_phentsize; + Elf32_Half e_phnum; + Elf32_Half e_shentsize; + Elf32_Half e_shnum; + Elf32_Half e_shstrndx; +}; + +struct Elf64_Ehdr { + unsigned char e_ident[16]; + Elf64_Half e_type; + Elf64_Half e_machine; + Elf64_Word e_version; + Elf64_Addr e_entry; + Elf64_Off e_phoff; + Elf64_Off e_shoff; + Elf64_Word e_flags; + Elf64_Half e_ehsize; + Elf64_Half e_phentsize; + Elf64_Half e_phnum; + Elf64_Half e_shentsize; + Elf64_Half e_shnum; + Elf64_Half e_shstrndx; +}; + +// Identification Indexes in header->e_ident. +enum IdentificationIndex { + EI_MAG0 = 0, // File identification. + EI_MAG1 = 1, // File identification. + EI_MAG2 = 2, // File identification. + EI_MAG3 = 3, // File identification. + EI_CLASS = 4, // File class. + EI_DATA = 5, // Data encoding. + EI_VERSION = 6, // File version. + EI_OSABI = 7, // Operating system/ABI identification. + EI_ABIVERSION = 8, // ABI version. + EI_PAD = 9, // Start of padding bytes. + EI_NIDENT = 16 // Size of e_ident[]. +}; + +// Values for header->e_ident[EI_CLASS]. +enum FileClass { + ELFCLASSNONE = 0, // Invalid class. + ELFCLASS32 = 1, // 32-bit objects. + ELFCLASS64 = 2 // 64-bit objects. +}; + +// Values for header->e_type. +enum FileType { + ET_NONE = 0, // No file type + ET_REL = 1, // Relocatable file + ET_EXEC = 2, // Executable file + ET_DYN = 3, // Shared object file + ET_CORE = 4, // Core file + ET_LOPROC = 0xFF00, // Processor-specific + ET_HIPROC = 0xFFFF // Processor-specific +}; + +// Values for header->e_machine. +enum MachineArchitecture { + EM_NONE = 0, // No machine. + EM_386 = 3, // Intel Architecture. + EM_ARM = 40, // ARM Architecture. + EM_X86_64 = 62, // Intel x86-64 Architecture. + EM_AARCH64 = 183, // ARM Architecture, 64-bit. + // Other values skipped. +}; + +// A section header in the section header table. +struct Elf32_Shdr { + Elf32_Word sh_name; + Elf32_Word sh_type; + Elf32_Word sh_flags; + Elf32_Addr sh_addr; + Elf32_Off sh_offset; + Elf32_Word sh_size; + Elf32_Word sh_link; + Elf32_Word sh_info; + Elf32_Word sh_addralign; + Elf32_Word sh_entsize; +}; + +struct Elf64_Shdr { + Elf64_Word sh_name; + Elf64_Word sh_type; + Elf64_Xword sh_flags; + Elf64_Addr sh_addr; + Elf64_Off sh_offset; + Elf64_Xword sh_size; + Elf64_Word sh_link; + Elf64_Word sh_info; + Elf64_Xword sh_addralign; + Elf64_Xword sh_entsize; +}; + +// Values for the section type field in a section header. +enum sh_type_values { + SHT_NULL = 0, + SHT_PROGBITS = 1, + SHT_SYMTAB = 2, + SHT_STRTAB = 3, + SHT_RELA = 4, + SHT_HASH = 5, + SHT_DYNAMIC = 6, + SHT_NOTE = 7, + SHT_NOBITS = 8, + SHT_REL = 9, + SHT_SHLIB = 10, + SHT_DYNSYM = 11, + SHT_INIT_ARRAY = 14, + SHT_FINI_ARRAY = 15, + SHT_LOPROC = 0x70000000, + SHT_HIPROC = 0x7FFFFFFF, + SHT_LOUSER = 0x80000000, + SHT_HIUSER = 0xFFFFFFFF +}; + +enum sh_flag_masks { + SHF_WRITE = 1 << 0, + SHF_ALLOC = 1 << 1, + SHF_EXECINSTR = 1 << 2, + // 1 << 3 is reserved. + SHF_MERGE = 1 << 4, + SHF_STRINGS = 1 << 5, + SHF_INFO_LINK = 1 << 6, + SHF_LINK_ORDER = 1 << 7, + SHF_OS_NONCONFORMING = 1 << 8, + SHF_GROUP = 1 << 9, + SHF_TLS = 1 << 10, + SHF_COMPRESSED = 1 << 11, +}; + +struct Elf32_Phdr { + Elf32_Word p_type; + Elf32_Off p_offset; + Elf32_Addr p_vaddr; + Elf32_Addr p_paddr; + Elf32_Word p_filesz; + Elf32_Word p_memsz; + Elf32_Word p_flags; + Elf32_Word p_align; +}; + +struct Elf64_Phdr { + Elf64_Word p_type; + Elf64_Word p_flags; + Elf64_Off p_offset; + Elf64_Addr p_vaddr; + Elf64_Addr p_paddr; + Elf64_Xword p_filesz; + Elf64_Xword p_memsz; + Elf64_Xword p_align; +}; + +// Values for the segment type field in a program segment header. +enum ph_type_values { + PT_NULL = 0, + PT_LOAD = 1, + PT_DYNAMIC = 2, + PT_INTERP = 3, + PT_NOTE = 4, + PT_SHLIB = 5, + PT_PHDR = 6, + PT_LOPROC = 0x70000000, + PT_HIPROC = 0x7FFFFFFF +}; + +struct Elf32_Rel { + Elf32_Addr r_offset; + Elf32_Word r_info; +}; + +struct Elf64_Rel { + Elf64_Addr r_offset; + Elf64_Xword r_info; +}; + +struct Elf32_Rela { + Elf32_Addr r_offset; + Elf32_Word r_info; + Elf32_Sword r_addend; +}; + +struct Elf64_Rela { + Elf64_Addr r_offset; + Elf64_Xword r_info; + Elf64_Sxword r_addend; +}; + +enum elf32_rel_386_type_values { + R_386_NONE = 0, + R_386_32 = 1, + R_386_PC32 = 2, + R_386_GOT32 = 3, + R_386_PLT32 = 4, + R_386_COPY = 5, + R_386_GLOB_DAT = 6, + R_386_JMP_SLOT = 7, + R_386_RELATIVE = 8, + R_386_GOTOFF = 9, + R_386_GOTPC = 10, + R_386_TLS_TPOFF = 14, +}; + +enum elf32_rel_x86_64_type_values { + R_X86_64_NONE = 0, + R_X86_64_64 = 1, + R_X86_64_PC32 = 2, + R_X86_64_GOT32 = 3, + R_X86_64_PLT32 = 4, + R_X86_64_COPY = 5, + R_X86_64_GLOB_DAT = 6, + R_X86_64_JUMP_SLOT = 7, + R_X86_64_RELATIVE = 8, + R_X86_64_GOTPCREL = 9, + R_X86_64_32 = 10, + R_X86_64_32S = 11, + R_X86_64_16 = 12, + R_X86_64_PC16 = 13, + R_X86_64_8 = 14, + R_X86_64_PC8 = 15, +}; + +enum elf32_rel_arm_type_values { + R_ARM_RELATIVE = 23, +}; + +enum elf64_rel_aarch64_type_values { + R_AARCH64_GLOB_DAT = 0x401, + R_AARCH64_JUMP_SLOT = 0x402, + R_AARCH64_RELATIVE = 0x403, +}; + +#pragma pack(pop) + +} // namespace elf +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_TYPE_ELF_H_ diff --git a/type_win_pe.h b/type_win_pe.h new file mode 100644 index 0000000..56996fe --- /dev/null +++ b/type_win_pe.h @@ -0,0 +1,191 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_TYPE_WIN_PE_H_ +#define COMPONENTS_ZUCCHINI_TYPE_WIN_PE_H_ + +#include <stddef.h> +#include <stdint.h> + +namespace zucchini { + +// Structures and constants taken from WINNT.h and following identical layout. +// This is used for parsing of Portable Executable (PE) file format. +namespace pe { +// Supported by MSVC, g++, and clang++. Ensures no gaps in packing. +#pragma pack(push, 1) + +// IMAGE_NUMBEROF_DIRECTORY_ENTRIES +constexpr size_t kImageNumberOfDirectoryEntries = 16; + +// IMAGE_FILE_BASE_RELOCATION_TABLE +constexpr size_t kIndexOfBaseRelocationTable = 5; + +constexpr uint32_t kImageScnMemExecute = 0x20000000; // IMAGE_SCN_MEM_EXECUTE +constexpr uint32_t kImageScnMemRead = 0x40000000; // IMAGE_SCN_MEM_READ + +// IMAGE_DOS_HEADER +struct ImageDOSHeader { + uint16_t e_magic; // 0x00 + uint16_t e_cblp; + uint16_t e_cp; + uint16_t e_crlc; + uint16_t e_cparhdr; + uint16_t e_minalloc; + uint16_t e_maxalloc; + uint16_t e_ss; + uint16_t e_sp; // 0x10 + uint16_t e_csum; + uint16_t e_ip; + uint16_t e_cs; + uint16_t e_lfarlc; + uint16_t e_ovno; + uint16_t e_res[4]; + uint16_t e_oemid; // 0x24 + uint16_t e_oeminfo; + uint16_t e_res2[10]; + uint32_t e_lfanew; // 0x3C +}; +static_assert(sizeof(ImageDOSHeader) == 0x40, + "DOS header size should be 0x40 bytes"); + +// IMAGE_SECTION_HEADER +struct ImageSectionHeader { + char name[8]; + uint32_t virtual_size; + uint32_t virtual_address; + uint32_t size_of_raw_data; + uint32_t file_offset_of_raw_data; + uint32_t pointer_to_relocations; // Always zero in an image. + uint32_t pointer_to_line_numbers; // Always zero in an image. + uint16_t number_of_relocations; // Always zero in an image. + uint16_t number_of_line_numbers; // Always zero in an image. + uint32_t characteristics; +}; +static_assert(sizeof(ImageSectionHeader) == 0x28, + "Section header size should be 0x28 bytes"); + +// IMAGE_DATA_DIRECTORY +struct ImageDataDirectory { + uint32_t virtual_address; + uint32_t size; +}; +static_assert(sizeof(ImageDataDirectory) == 0x08, + "Data directory size should be 0x08 bytes"); + +// IMAGE_FILE_HEADER +struct ImageFileHeader { + uint16_t machine; + uint16_t number_of_sections; + uint32_t time_date_stamp; + uint32_t pointer_to_symbol_table; + uint32_t number_of_symbols; + uint16_t size_of_optional_header; + uint16_t characteristics; +}; +static_assert(sizeof(ImageFileHeader) == 0x14, + "File header size should be 0x14 bytes"); + +// IMAGE_OPTIONAL_HEADER +struct ImageOptionalHeader { + uint16_t magic; // 0x00: 0x10B + uint8_t major_linker_version; + uint8_t minor_linker_version; + uint32_t size_of_code; + uint32_t size_of_initialized_data; + uint32_t size_of_uninitialized_data; + uint32_t address_of_entry_point; // 0x10 + uint32_t base_of_code; + uint32_t base_of_data; + + uint32_t image_base; + uint32_t section_alignment; // 0x20 + uint32_t file_alignment; + uint16_t major_operating_system_version; + uint16_t minor_operating_system_version; + uint16_t major_image_version; + uint16_t minor_image_version; + uint16_t major_subsystem_version; // 0x30 + uint16_t minor_subsystem_version; + uint32_t win32_version_value; + uint32_t size_of_image; + uint32_t size_of_headers; + uint32_t check_sum; // 0x40 + uint16_t subsystem; + uint16_t dll_characteristics; + uint32_t size_of_stack_reserve; + uint32_t size_of_stack_commit; + uint32_t size_of_heap_reserve; // 0x50 + uint32_t size_of_heap_commit; + uint32_t loader_flags; + uint32_t number_of_rva_and_sizes; + + // The number of elements is actually |number_of_rva_and_sizes|, so accesses + // to |data_directory| should be checked against the bound. + ImageDataDirectory data_directory[kImageNumberOfDirectoryEntries]; // 0x60 + /* 0xE0 */ +}; +static_assert(sizeof(ImageOptionalHeader) == 0xE0, + "Optional header (32) size should be 0xE0 bytes"); + +// IMAGE_OPTIONAL_HEADER64 +struct ImageOptionalHeader64 { + uint16_t magic; // 0x00: 0x20B + uint8_t major_linker_version; + uint8_t minor_linker_version; + uint32_t size_of_code; + uint32_t size_of_initialized_data; + uint32_t size_of_uninitialized_data; + uint32_t address_of_entry_point; // 0x10 + uint32_t base_of_code; + + uint64_t image_base; + uint32_t section_alignment; // 0x20 + uint32_t file_alignment; + uint16_t major_operating_system_version; + uint16_t minor_operating_system_version; + uint16_t major_image_version; + uint16_t minor_image_version; + uint16_t major_subsystem_version; // 0x30 + uint16_t minor_subsystem_version; + uint32_t win32_version_value; + uint32_t size_of_image; + uint32_t size_of_headers; + uint32_t check_sum; // 0x40 + uint16_t subsystem; + uint16_t dll_characteristics; + uint64_t size_of_stack_reserve; + uint64_t size_of_stack_commit; // 0x50 + uint64_t size_of_heap_reserve; + uint64_t size_of_heap_commit; // 0x60 + uint32_t loader_flags; + uint32_t number_of_rva_and_sizes; + ImageDataDirectory data_directory[kImageNumberOfDirectoryEntries]; // 0x70 + /* 0xF0 */ +}; +static_assert(sizeof(ImageOptionalHeader64) == 0xF0, + "Optional header (64) size should be 0xF0 bytes"); + +struct RelocHeader { + uint32_t rva_hi; + uint32_t size; +}; +static_assert(sizeof(RelocHeader) == 8, "RelocHeader size should be 8 bytes"); + +#pragma pack(pop) + +} // namespace pe + +// Constants and offsets gleaned from WINNT.h and various articles on the +// format of Windows PE executables. + +constexpr char const* kTextSectionName = ".text"; + +// Bitfield with characteristics usually associated with code sections. +const uint32_t kCodeCharacteristics = + pe::kImageScnMemExecute | pe::kImageScnMemRead; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_TYPE_WIN_PE_H_ diff --git a/type_ztf.h b/type_ztf.h new file mode 100644 index 0000000..8ecc9ca --- /dev/null +++ b/type_ztf.h @@ -0,0 +1,54 @@ +// Copyright 2018 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_TYPE_ZTF_H_ +#define COMPONENTS_ZUCCHINI_TYPE_ZTF_H_ + +#include <stddef.h> +#include <stdint.h> + +namespace zucchini { + +namespace ztf { + +typedef int16_t dim_t; + +// A exclusive upper bound on number of lines and/or columns. Throughout the ZTF +// code a dimension (dim) refers to a block of 1-3 digits which contain a line +// or column number. +enum : size_t { kMaxDimValue = 1000 }; + +enum SignChar : uint8_t { + kMinus = '-', + kPlus = '+', +}; + +// Lines and columns are 1-based to follow the convention of most modern text +// editing software. |line| and |col| should be positive, but int16_t is used to +// limit ranges such that it matches DeltaLineCol. +struct LineCol { + dim_t line; + dim_t col; +}; + +struct DeltaLineCol { + dim_t line; + dim_t col; +}; + +constexpr DeltaLineCol operator-(const LineCol& lhs, const LineCol& rhs) { + return DeltaLineCol{static_cast<dim_t>(lhs.line - rhs.line), + static_cast<dim_t>(lhs.col - rhs.col)}; +} + +constexpr LineCol operator+(const LineCol& lhs, const DeltaLineCol& rhs) { + return LineCol{static_cast<dim_t>(lhs.line + rhs.line), + static_cast<dim_t>(lhs.col + rhs.col)}; +} + +} // namespace ztf + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_TYPE_ZTF_H_ diff --git a/typed_value.h b/typed_value.h new file mode 100644 index 0000000..868397c --- /dev/null +++ b/typed_value.h @@ -0,0 +1,57 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_TYPED_VALUE_H_ +#define COMPONENTS_ZUCCHINI_TYPED_VALUE_H_ + +#include <ostream> + +namespace zucchini { + +// Strong typed values, with compare and convert functions for underlying data. +// Typically one would use strongly typed enums for this. However, for Zucchini, +// the number of bytes is not fixed, and must be represented as an integer for +// iteration. +// |Tag| is a type tag used to uniquely identify TypedValue. +// |T| is an integral type used to hold values. +// Example: +// struct Foo : TypedValue<Foo, int> { +// using Foo::TypedValue::TypedValue; // inheriting constructor. +// }; +// Foo will be used to hold values of type |int|, but with a distinct type from +// any other TypedValue. +template <class Tag, class T> +class TypedValue { + public: + constexpr TypedValue() = default; + explicit constexpr TypedValue(const T& value) : value_(value) {} + + explicit operator T() const { return value_; } + const T value() const { return value_; } + + friend bool operator==(const TypedValue& a, const TypedValue& b) { + return a.value_ == b.value_; + } + friend bool operator!=(const TypedValue& a, const TypedValue& b) { + return !(a == b); + } + friend bool operator<(const TypedValue& a, const TypedValue& b) { + return a.value_ < b.value_; + } + friend bool operator>(const TypedValue& a, const TypedValue& b) { + return b < a; + } + + private: + T value_ = {}; +}; + +template <class Tag, class T> +std::ostream& operator<<(std::ostream& os, const TypedValue<Tag, T>& tag) { + return os << tag.value(); +} + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_TYPED_VALUE_H_ diff --git a/typed_value_unittest.cc b/typed_value_unittest.cc new file mode 100644 index 0000000..bc0d4f1 --- /dev/null +++ b/typed_value_unittest.cc @@ -0,0 +1,40 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/typed_value.h" + +#include <type_traits> + +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +struct ValueA : TypedValue<ValueA, int> { + using ValueA::TypedValue::TypedValue; +}; + +struct ValueB : TypedValue<ValueB, int> { + using ValueB::TypedValue::TypedValue; +}; + +TEST(TypedIdTest, Value) { + EXPECT_EQ(42, ValueA(42).value()); + EXPECT_EQ(42, static_cast<int>(ValueA(42))); // explicit cast +} + +TEST(TypedIdTest, Comparison) { + EXPECT_TRUE(ValueA(0) == ValueA(0)); + EXPECT_FALSE(ValueA(0) == ValueA(42)); + EXPECT_FALSE(ValueA(0) != ValueA(0)); + EXPECT_TRUE(ValueA(0) != ValueA(42)); +} + +TEST(TypedIdTest, StrongType) { + static_assert(!std::is_convertible<ValueA, ValueB>::value, + "ValueA should not be convertible to ValueB"); + static_assert(!std::is_convertible<ValueB, ValueA>::value, + "ValueB should not be convertible to ValueA"); +} + +} // namespace zucchini diff --git a/zucchini.h b/zucchini.h new file mode 100644 index 0000000..9847440 --- /dev/null +++ b/zucchini.h @@ -0,0 +1,72 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_ZUCCHINI_H_ +#define COMPONENTS_ZUCCHINI_ZUCCHINI_H_ + +#include <string> + +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/patch_reader.h" +#include "components/zucchini/patch_writer.h" + +// Core Zucchini library, consisting of: +// - Global constants. +// - Patch gen and apply functions, where "old" and "new" data are represented +// as buffers, and patch data represented as EnsemblePatchWriter or +// EnsemblePatchReader. + +namespace zucchini { + +namespace status { + +// Zucchini status code, which can also be used as process exit code. Therefore +// success is explicitly 0. +enum Code { + kStatusSuccess = 0, + kStatusInvalidParam = 1, + kStatusFileReadError = 2, + kStatusFileWriteError = 3, + kStatusPatchReadError = 4, + kStatusPatchWriteError = 5, + kStatusInvalidOldImage = 6, + kStatusInvalidNewImage = 7, + kStatusFatal = 8, +}; + +} // namespace status + +// Generates ensemble patch from |old_image| to |new_image| using the default +// element detection and matching heuristics, writes the results to +// |patch_writer|, and returns a status::Code. +status::Code GenerateBuffer(ConstBufferView old_image, + ConstBufferView new_image, + EnsemblePatchWriter* patch_writer); + +// Same as GenerateEnsemble(), but if |imposed_matches| is non-empty, then +// overrides default element detection and matching heuristics with custom +// element matching encoded in |imposed_matches|, which should be formatted as: +// "#+#=#+#,#+#=#+#,..." (e.g., "1+2=3+4", "1+2=3+4,5+6=7+8"), +// where "#+#=#+#" encodes a match as 4 unsigned integers: +// [offset in "old", size in "old", offset in "new", size in "new"]. +status::Code GenerateBufferImposed(ConstBufferView old_image, + ConstBufferView new_image, + std::string imposed_matches, + EnsemblePatchWriter* patch_writer); + +// Generates raw patch from |old_image| to |new_image|, and writes it to +// |patch_writer|. +status::Code GenerateBufferRaw(ConstBufferView old_image, + ConstBufferView new_image, + EnsemblePatchWriter* patch_writer); + +// Applies |patch_reader| to |old_image| to build |new_image|, which refers to +// preallocated memory of sufficient size. +status::Code ApplyBuffer(ConstBufferView old_image, + const EnsemblePatchReader& patch_reader, + MutableBufferView new_image); + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_ZUCCHINI_H_ diff --git a/zucchini_apply.cc b/zucchini_apply.cc new file mode 100644 index 0000000..10c5638 --- /dev/null +++ b/zucchini_apply.cc @@ -0,0 +1,217 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/zucchini_apply.h" + +#include <algorithm> +#include <map> +#include <memory> +#include <utility> + +#include "base/logging.h" +#include "base/numerics/safe_conversions.h" +#include "components/zucchini/disassembler.h" +#include "components/zucchini/element_detection.h" +#include "components/zucchini/equivalence_map.h" +#include "components/zucchini/image_index.h" + +namespace zucchini { + +bool ApplyEquivalenceAndExtraData(ConstBufferView old_image, + const PatchElementReader& patch_reader, + MutableBufferView new_image) { + EquivalenceSource equiv_source = patch_reader.GetEquivalenceSource(); + ExtraDataSource extra_data_source = patch_reader.GetExtraDataSource(); + MutableBufferView::iterator dst_it = new_image.begin(); + + for (auto equivalence = equiv_source.GetNext(); equivalence.has_value(); + equivalence = equiv_source.GetNext()) { + MutableBufferView::iterator next_dst_it = + new_image.begin() + equivalence->dst_offset; + CHECK(next_dst_it >= dst_it); + + offset_t gap = static_cast<offset_t>(next_dst_it - dst_it); + absl::optional<ConstBufferView> extra_data = extra_data_source.GetNext(gap); + if (!extra_data) { + LOG(ERROR) << "Error reading extra_data"; + return false; + } + // |extra_data| length is based on what was parsed from the patch so this + // copy should be valid. + dst_it = std::copy(extra_data->begin(), extra_data->end(), dst_it); + CHECK_EQ(dst_it, next_dst_it); + dst_it = std::copy_n(old_image.begin() + equivalence->src_offset, + equivalence->length, dst_it); + CHECK_EQ(dst_it, next_dst_it + equivalence->length); + } + offset_t gap = static_cast<offset_t>(new_image.end() - dst_it); + absl::optional<ConstBufferView> extra_data = extra_data_source.GetNext(gap); + if (!extra_data) { + LOG(ERROR) << "Error reading extra_data"; + return false; + } + std::copy(extra_data->begin(), extra_data->end(), dst_it); + if (!equiv_source.Done() || !extra_data_source.Done()) { + LOG(ERROR) << "Found trailing equivalence and extra_data"; + return false; + } + return true; +} + +bool ApplyRawDelta(const PatchElementReader& patch_reader, + MutableBufferView new_image) { + EquivalenceSource equiv_source = patch_reader.GetEquivalenceSource(); + RawDeltaSource raw_delta_source = patch_reader.GetRawDeltaSource(); + // Traverse |equiv_source| and |raw_delta_source| in lockstep. + auto equivalence = equiv_source.GetNext(); + offset_t base_copy_offset = 0; + for (auto delta = raw_delta_source.GetNext(); delta.has_value(); + delta = raw_delta_source.GetNext()) { + while (equivalence.has_value() && + base_copy_offset + equivalence->length <= delta->copy_offset) { + base_copy_offset += equivalence->length; + equivalence = equiv_source.GetNext(); + } + if (!equivalence.has_value()) { + LOG(ERROR) << "Error reading equivalences"; + return false; + } + CHECK_GE(delta->copy_offset, base_copy_offset); + CHECK_LT(delta->copy_offset, base_copy_offset + equivalence->length); + + // Invert byte diff. + new_image[equivalence->dst_offset - base_copy_offset + + delta->copy_offset] += delta->diff; + } + if (!raw_delta_source.Done()) { + LOG(ERROR) << "Found trailing raw_delta"; + return false; + } + return true; +} + +bool ApplyReferencesCorrection(ExecutableType exe_type, + ConstBufferView old_image, + const PatchElementReader& patch, + MutableBufferView new_image) { + auto old_disasm = MakeDisassemblerOfType(old_image, exe_type); + auto new_disasm = + MakeDisassemblerOfType(ConstBufferView(new_image), exe_type); + if (!old_disasm || !new_disasm) { + LOG(ERROR) << "Failed to create Disassembler"; + return false; + } + if (old_disasm->size() != old_image.size() || + new_disasm->size() != new_image.size()) { + LOG(ERROR) << "Disassembler and element size mismatch"; + return false; + } + + ReferenceDeltaSource ref_delta_source = patch.GetReferenceDeltaSource(); + std::map<PoolTag, std::vector<ReferenceGroup>> pool_groups; + for (const auto& ref_group : old_disasm->MakeReferenceGroups()) + pool_groups[ref_group.pool_tag()].push_back(ref_group); + + OffsetMapper offset_mapper(patch.GetEquivalenceSource(), + base::checked_cast<offset_t>(old_image.size()), + base::checked_cast<offset_t>(new_image.size())); + + std::vector<ReferenceGroup> new_groups = new_disasm->MakeReferenceGroups(); + for (const auto& pool_and_sub_groups : pool_groups) { + PoolTag pool_tag = pool_and_sub_groups.first; + const std::vector<ReferenceGroup>& sub_groups = pool_and_sub_groups.second; + + TargetPool targets; + // Load "old" targets, then filter and map them to "new" targets. + for (ReferenceGroup group : sub_groups) + targets.InsertTargets(std::move(*group.GetReader(old_disasm.get()))); + targets.FilterAndProject(offset_mapper); + + // Load extra targets from patch. + TargetSource target_source = patch.GetExtraTargetSource(pool_tag); + targets.InsertTargets(&target_source); + if (!target_source.Done()) { + LOG(ERROR) << "Found trailing extra_targets"; + return false; + } + + // Correct all new references, and write results to |new_disasm|. + for (ReferenceGroup group : sub_groups) { + std::unique_ptr<ReferenceWriter> ref_writer = + new_groups[group.type_tag().value()].GetWriter(new_image, + new_disasm.get()); + + EquivalenceSource equiv_source = patch.GetEquivalenceSource(); + for (auto equivalence = equiv_source.GetNext(); equivalence.has_value(); + equivalence = equiv_source.GetNext()) { + std::unique_ptr<ReferenceReader> ref_gen = group.GetReader( + equivalence->src_offset, equivalence->src_end(), old_disasm.get()); + for (auto ref = ref_gen->GetNext(); ref.has_value(); + ref = ref_gen->GetNext()) { + DCHECK_GE(ref->location, equivalence->src_offset); + DCHECK_LT(ref->location, equivalence->src_end()); + + offset_t projected_target = + offset_mapper.ExtendedForwardProject(ref->target); + offset_t expected_key = targets.KeyForNearestOffset(projected_target); + auto delta = ref_delta_source.GetNext(); + if (!delta.has_value()) { + LOG(ERROR) << "Error reading reference_delta"; + return false; + } + const key_t key = expected_key + delta.value(); + if (!targets.KeyIsValid(key)) { + LOG(ERROR) << "Invalid reference_delta"; + return false; + } + ref->target = targets.OffsetForKey(expected_key + delta.value()); + ref->location = + ref->location - equivalence->src_offset + equivalence->dst_offset; + ref_writer->PutNext(*ref); + } + } + } + } + if (!ref_delta_source.Done()) { + LOG(ERROR) << "Found trailing ref_delta_source"; + return false; + } + return true; +} + +bool ApplyElement(ExecutableType exe_type, + ConstBufferView old_image, + const PatchElementReader& patch_reader, + MutableBufferView new_image) { + return ApplyEquivalenceAndExtraData(old_image, patch_reader, new_image) && + ApplyRawDelta(patch_reader, new_image) && + ApplyReferencesCorrection(exe_type, old_image, patch_reader, + new_image); +} + +/******** Exported Functions ********/ + +status::Code ApplyBuffer(ConstBufferView old_image, + const EnsemblePatchReader& patch_reader, + MutableBufferView new_image) { + if (!patch_reader.CheckOldFile(old_image)) { + LOG(ERROR) << "Invalid old_image."; + return status::kStatusInvalidOldImage; + } + + for (const auto& element_patch : patch_reader.elements()) { + ElementMatch match = element_patch.element_match(); + if (!ApplyElement(match.exe_type(), old_image[match.old_element.region()], + element_patch, new_image[match.new_element.region()])) + return status::kStatusFatal; + } + + if (!patch_reader.CheckNewFile(ConstBufferView(new_image))) { + LOG(ERROR) << "Invalid new_image."; + return status::kStatusInvalidNewImage; + } + return status::kStatusSuccess; +} + +} // namespace zucchini diff --git a/zucchini_apply.h b/zucchini_apply.h new file mode 100644 index 0000000..abab384 --- /dev/null +++ b/zucchini_apply.h @@ -0,0 +1,41 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_ZUCCHINI_APPLY_H_ +#define COMPONENTS_ZUCCHINI_ZUCCHINI_APPLY_H_ + +#include "components/zucchini/image_utils.h" +#include "components/zucchini/patch_reader.h" +#include "components/zucchini/zucchini.h" + +namespace zucchini { + +// Reads equivalences from |patch_reader| to form preliminary |new_image|, +// copying regions from |old_image| and writing extra data from |patch_reader|. +bool ApplyEquivalenceAndExtraData(ConstBufferView old_image, + const PatchElementReader& patch_reader, + MutableBufferView new_image); + +// Reads raw delta from |patch_reader| and applies corrections to |new_image|. +bool ApplyRawDelta(const PatchElementReader& patch_reader, + MutableBufferView new_image); + +// Corrects references in |new_image| by projecting references from |old_image| +// and applying corrections from |patch_reader|. Both |old_image| and +// |new_image| are matching elements associated with |exe_type|. +bool ApplyReferencesCorrection(ExecutableType exe_type, + ConstBufferView old_image, + const PatchElementReader& patch_reader, + MutableBufferView new_image); + +// Applies patch element with type |exe_type| from |patch_reader| on |old_image| +// to produce |new_image|. +bool ApplyElement(ExecutableType exe_type, + ConstBufferView old_image, + const PatchElementReader& patch_reader, + MutableBufferView new_image); + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_ZUCCHINI_APPLY_H_ diff --git a/zucchini_apply_unittest.cc b/zucchini_apply_unittest.cc new file mode 100644 index 0000000..f1cb853 --- /dev/null +++ b/zucchini_apply_unittest.cc @@ -0,0 +1,14 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/zucchini_apply.h" + +#include "components/zucchini/image_utils.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +// TODO(huangs): Add tests. + +} // namespace zucchini diff --git a/zucchini_commands.cc b/zucchini_commands.cc new file mode 100644 index 0000000..93929bd --- /dev/null +++ b/zucchini_commands.cc @@ -0,0 +1,141 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/zucchini_commands.h" + +#include <stddef.h> +#include <stdint.h> + +#include <ostream> +#include <string> +#include <utility> + +#include "base/command_line.h" +#include "base/files/file.h" +#include "base/files/file_path.h" +#include "base/files/memory_mapped_file.h" +#include "base/logging.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/crc32.h" +#include "components/zucchini/io_utils.h" +#include "components/zucchini/mapped_file.h" +#include "components/zucchini/patch_writer.h" +#include "components/zucchini/zucchini_integration.h" +#include "components/zucchini/zucchini_tools.h" + +namespace { + +/******** Command-line Switches ********/ + +constexpr char kSwitchDump[] = "dump"; +constexpr char kSwitchImpose[] = "impose"; +constexpr char kSwitchKeep[] = "keep"; +constexpr char kSwitchRaw[] = "raw"; + +} // namespace + +zucchini::status::Code MainGen(MainParams params) { + CHECK_EQ(3U, params.file_paths.size()); + return zucchini::Generate( + params.file_paths[0], params.file_paths[1], params.file_paths[2], + params.command_line.HasSwitch(kSwitchKeep), + params.command_line.HasSwitch(kSwitchRaw), + params.command_line.GetSwitchValueASCII(kSwitchImpose)); +} + +zucchini::status::Code MainApply(MainParams params) { + CHECK_EQ(3U, params.file_paths.size()); + return zucchini::Apply(params.file_paths[0], params.file_paths[1], + params.file_paths[2], + params.command_line.HasSwitch(kSwitchKeep)); +} + +zucchini::status::Code MainRead(MainParams params) { + CHECK_EQ(1U, params.file_paths.size()); + base::File input_file(params.file_paths[0], + base::File::FLAG_OPEN | base::File::FLAG_READ | + base::File::FLAG_SHARE_DELETE); + zucchini::MappedFileReader input(std::move(input_file)); + if (input.HasError()) { + LOG(ERROR) << "Error with file " << params.file_paths[0].value() << ": " + << input.error(); + return zucchini::status::kStatusFileReadError; + } + + bool do_dump = params.command_line.HasSwitch(kSwitchDump); + zucchini::status::Code status = zucchini::ReadReferences( + {input.data(), input.length()}, do_dump, params.out); + if (status != zucchini::status::kStatusSuccess) + params.err << "Fatal error found when dumping references." << std::endl; + return status; +} + +zucchini::status::Code MainDetect(MainParams params) { + CHECK_EQ(1U, params.file_paths.size()); + base::File input_file(params.file_paths[0], + base::File::FLAG_OPEN | base::File::FLAG_READ | + base::File::FLAG_SHARE_DELETE); + zucchini::MappedFileReader input(std::move(input_file)); + if (input.HasError()) { + LOG(ERROR) << "Error with file " << params.file_paths[0].value() << ": " + << input.error(); + return zucchini::status::kStatusFileReadError; + } + + std::vector<zucchini::ConstBufferView> sub_image_list; + zucchini::status::Code result = zucchini::DetectAll( + {input.data(), input.length()}, params.out, &sub_image_list); + if (result != zucchini::status::kStatusSuccess) + params.err << "Fatal error found when detecting executables." << std::endl; + return result; +} + +zucchini::status::Code MainMatch(MainParams params) { + CHECK_EQ(2U, params.file_paths.size()); + using base::File; + File old_file(params.file_paths[0], File::FLAG_OPEN | File::FLAG_READ | + base::File::FLAG_SHARE_DELETE); + zucchini::MappedFileReader old_image(std::move(old_file)); + if (old_image.HasError()) { + LOG(ERROR) << "Error with file " << params.file_paths[0].value() << ": " + << old_image.error(); + return zucchini::status::kStatusFileReadError; + } + File new_file(params.file_paths[1], File::FLAG_OPEN | File::FLAG_READ | + base::File::FLAG_SHARE_DELETE); + zucchini::MappedFileReader new_image(std::move(new_file)); + if (new_image.HasError()) { + LOG(ERROR) << "Error with file " << params.file_paths[1].value() << ": " + << new_image.error(); + return zucchini::status::kStatusFileReadError; + } + + std::string imposed_matches = + params.command_line.GetSwitchValueASCII(kSwitchImpose); + zucchini::status::Code status = + zucchini::MatchAll({old_image.data(), old_image.length()}, + {new_image.data(), new_image.length()}, + std::move(imposed_matches), params.out); + if (status != zucchini::status::kStatusSuccess) + params.err << "Fatal error found when matching executables." << std::endl; + return status; +} + +zucchini::status::Code MainCrc32(MainParams params) { + CHECK_EQ(1U, params.file_paths.size()); + base::File image_file(params.file_paths[0], + base::File::FLAG_OPEN | base::File::FLAG_READ | + base::File::FLAG_SHARE_DELETE); + zucchini::MappedFileReader image(std::move(image_file)); + if (image.HasError()) { + LOG(ERROR) << "Error with file " << params.file_paths[0].value() << ": " + << image.error(); + return zucchini::status::kStatusFileReadError; + } + + uint32_t crc = + zucchini::CalculateCrc32(image.data(), image.data() + image.length()); + params.out << "CRC32: " << zucchini::AsHex<8>(crc) << std::endl; + return zucchini::status::kStatusSuccess; +} diff --git a/zucchini_commands.h b/zucchini_commands.h new file mode 100644 index 0000000..cef18dc --- /dev/null +++ b/zucchini_commands.h @@ -0,0 +1,51 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_ZUCCHINI_COMMANDS_H_ +#define COMPONENTS_ZUCCHINI_ZUCCHINI_COMMANDS_H_ + +#include <iosfwd> +#include <vector> + +#include "base/files/file_path.h" +#include "components/zucchini/zucchini.h" + +// Zucchini commands and tools that can be invoked from command-line. + +namespace base { + +class CommandLine; + +} // namespace base + +// Aggregated parameter for Main*() functions, to simplify interface. +struct MainParams { + const base::CommandLine& command_line; + const std::vector<base::FilePath>& file_paths; + std::ostream& out; + std::ostream& err; +}; + +// Signature of a Zucchini Command Function. +using CommandFunction = zucchini::status::Code (*)(MainParams); + +// Command Function: Patch generation. +zucchini::status::Code MainGen(MainParams params); + +// Command Function: Patch application. +zucchini::status::Code MainApply(MainParams params); + +// Command Function: Read and dump references from an executable. +zucchini::status::Code MainRead(MainParams params); + +// Command Function: Scan an archive file and detect executables. +zucchini::status::Code MainDetect(MainParams params); + +// Command Function: Scan two archive files and match detected executables. +zucchini::status::Code MainMatch(MainParams params); + +// Command Function: Compute CRC-32 of a file. +zucchini::status::Code MainCrc32(MainParams params); + +#endif // COMPONENTS_ZUCCHINI_ZUCCHINI_COMMANDS_H_ diff --git a/zucchini_exe_version.rc.version b/zucchini_exe_version.rc.version new file mode 100644 index 0000000..9d46a4b --- /dev/null +++ b/zucchini_exe_version.rc.version @@ -0,0 +1,46 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <verrsrc.h> + +///////////////////////////////////////////////////////////////////////////// +// +// Version +// + +VS_VERSION_INFO VERSIONINFO + FILEVERSION @MAJOR@,@MINOR@,@BUILD@,@PATCH@ + PRODUCTVERSION @MAJOR@,@MINOR@,@BUILD@,@PATCH@ + FILEFLAGSMASK 0x17L +#ifdef _DEBUG + FILEFLAGS 0x1L +#else + FILEFLAGS 0x0L +#endif + FILEOS 0x4L + FILETYPE 0x1L + FILESUBTYPE 0x0L +BEGIN + BLOCK "StringFileInfo" + BEGIN + BLOCK "040904b0" + BEGIN + VALUE "CompanyName", "@COMPANY_FULLNAME@" + VALUE "FileDescription", "Zucchini" + VALUE "FileVersion", "@MAJOR@.@MINOR@.@BUILD@.@PATCH@" + VALUE "InternalName", "zucchini" + VALUE "LegalCopyright", "@COPYRIGHT@" + VALUE "ProductName", "Zucchini" + VALUE "ProductVersion", "@MAJOR@.@MINOR@.@BUILD@.@PATCH@" + VALUE "CompanyShortName", "@COMPANY_SHORTNAME@" + VALUE "ProductShortName", "Zucchini" + VALUE "LastChange", "@LASTCHANGE@" + VALUE "Official Build", "@OFFICIAL_BUILD@" + END + END + BLOCK "VarFileInfo" + BEGIN + VALUE "Translation", 0x409, 1200 + END +END diff --git a/zucchini_gen.cc b/zucchini_gen.cc new file mode 100644 index 0000000..3735d0f --- /dev/null +++ b/zucchini_gen.cc @@ -0,0 +1,461 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/zucchini_gen.h" + +#include <stddef.h> +#include <stdint.h> + +#include <algorithm> +#include <map> +#include <memory> +#include <string> +#include <utility> + +#include "base/logging.h" +#include "base/numerics/safe_conversions.h" +#include "components/zucchini/disassembler.h" +#include "components/zucchini/element_detection.h" +#include "components/zucchini/encoded_view.h" +#include "components/zucchini/ensemble_matcher.h" +#include "components/zucchini/equivalence_map.h" +#include "components/zucchini/heuristic_ensemble_matcher.h" +#include "components/zucchini/image_index.h" +#include "components/zucchini/imposed_ensemble_matcher.h" +#include "components/zucchini/patch_writer.h" +#include "components/zucchini/reference_bytes_mixer.h" +#include "components/zucchini/suffix_array.h" +#include "components/zucchini/targets_affinity.h" + +namespace zucchini { + +namespace { + +// Parameters for patch generation. +constexpr double kMinEquivalenceSimilarity = 12.0; +constexpr double kMinLabelAffinity = 64.0; + +} // namespace + +std::vector<offset_t> FindExtraTargets(const TargetPool& projected_old_targets, + const TargetPool& new_targets) { + std::vector<offset_t> extra_targets; + std::set_difference( + new_targets.begin(), new_targets.end(), projected_old_targets.begin(), + projected_old_targets.end(), std::back_inserter(extra_targets)); + return extra_targets; +} + +// Label matching (between "old" and "new") can guide EquivalenceMap +// construction; but EquivalenceMap induces Label matching. This apparent "chick +// and egg" problem is solved by alternating 2 steps |num_iterations| times: +// - Associate targets based on previous EquivalenceMap. Note on the first +// iteration, EquivalenceMap is empty, resulting in a no-op. +// - Construct refined EquivalenceMap based on new targets associations. +EquivalenceMap CreateEquivalenceMap(const ImageIndex& old_image_index, + const ImageIndex& new_image_index, + int num_iterations) { + size_t pool_count = old_image_index.PoolCount(); + // |target_affinities| is outside the loop to reduce allocation. + std::vector<TargetsAffinity> target_affinities(pool_count); + + EquivalenceMap equivalence_map; + for (int i = 0; i < num_iterations; ++i) { + EncodedView old_view(old_image_index); + EncodedView new_view(new_image_index); + + // Associate targets from "old" to "new" image based on |equivalence_map| + // for each reference pool. + for (const auto& old_pool_tag_and_targets : + old_image_index.target_pools()) { + PoolTag pool_tag = old_pool_tag_and_targets.first; + target_affinities[pool_tag.value()].InferFromSimilarities( + equivalence_map, old_pool_tag_and_targets.second.targets(), + new_image_index.pool(pool_tag).targets()); + + // Creates labels for strongly associated targets. + std::vector<uint32_t> old_labels; + std::vector<uint32_t> new_labels; + size_t label_bound = target_affinities[pool_tag.value()].AssignLabels( + kMinLabelAffinity, &old_labels, &new_labels); + old_view.SetLabels(pool_tag, std::move(old_labels), label_bound); + new_view.SetLabels(pool_tag, std::move(new_labels), label_bound); + } + // Build equivalence map, where references in "old" and "new" that share + // common semantics (i.e., their respective targets were associated earlier + // on) are considered equivalent. + equivalence_map.Build( + MakeSuffixArray<InducedSuffixSort>(old_view, old_view.Cardinality()), + old_view, new_view, target_affinities, kMinEquivalenceSimilarity); + } + + return equivalence_map; +} + +bool GenerateEquivalencesAndExtraData(ConstBufferView new_image, + const EquivalenceMap& equivalence_map, + PatchElementWriter* patch_writer) { + // Make 2 passes through |equivalence_map| to reduce write churn. + // Pass 1: Write all equivalences. + EquivalenceSink equivalences_sink; + for (const EquivalenceCandidate& candidate : equivalence_map) + equivalences_sink.PutNext(candidate.eq); + patch_writer->SetEquivalenceSink(std::move(equivalences_sink)); + + // Pass 2: Write data in gaps in |new_image| before / between after + // |equivalence_map| as "extra data". + ExtraDataSink extra_data_sink; + offset_t dst_offset = 0; + for (const EquivalenceCandidate& candidate : equivalence_map) { + extra_data_sink.PutNext( + new_image[{dst_offset, candidate.eq.dst_offset - dst_offset}]); + dst_offset = candidate.eq.dst_end(); + DCHECK_LE(dst_offset, new_image.size()); + } + extra_data_sink.PutNext( + new_image[{dst_offset, new_image.size() - dst_offset}]); + patch_writer->SetExtraDataSink(std::move(extra_data_sink)); + return true; +} + +bool GenerateRawDelta(ConstBufferView old_image, + ConstBufferView new_image, + const EquivalenceMap& equivalence_map, + const ImageIndex& new_image_index, + ReferenceBytesMixer* reference_bytes_mixer, + PatchElementWriter* patch_writer) { + RawDeltaSink raw_delta_sink; + + // Visit |equivalence_map| blocks in |new_image| order. Find and emit all + // bytewise differences. + offset_t base_copy_offset = 0; + for (const EquivalenceCandidate& candidate : equivalence_map) { + Equivalence equivalence = candidate.eq; + // For each bytewise delta from |old_image| to |new_image|, compute "copy + // offset" and pass it along with delta to the sink. + for (offset_t i = 0; i < equivalence.length;) { + if (new_image_index.IsReference(equivalence.dst_offset + i)) { + DCHECK(new_image_index.IsToken(equivalence.dst_offset + i)); + TypeTag type_tag = + new_image_index.LookupType(equivalence.dst_offset + i); + + // Reference delta has its own flow. On some architectures (e.g., x86) + // this does not involve raw delta, so we skip. On other architectures + // (e.g., ARM) references are mixed with other bits that may change, so + // we need to "mix" data and store some changed bits into raw delta. + int num_bytes = reference_bytes_mixer->NumBytes(type_tag.value()); + if (num_bytes) { + ConstBufferView mixed_ref_bytes = reference_bytes_mixer->Mix( + type_tag.value(), old_image, equivalence.src_offset + i, + new_image, equivalence.dst_offset + i); + for (int j = 0; j < num_bytes; ++j) { + int8_t diff = + mixed_ref_bytes[j] - old_image[equivalence.src_offset + i + j]; + if (diff) + raw_delta_sink.PutNext({base_copy_offset + i + j, diff}); + } + } + i += new_image_index.refs(type_tag).width(); + DCHECK_LE(i, equivalence.length); + } else { + int8_t diff = new_image[equivalence.dst_offset + i] - + old_image[equivalence.src_offset + i]; + if (diff) + raw_delta_sink.PutNext({base_copy_offset + i, diff}); + ++i; + } + } + base_copy_offset += equivalence.length; + } + patch_writer->SetRawDeltaSink(std::move(raw_delta_sink)); + return true; +} + +bool GenerateReferencesDelta(const ReferenceSet& src_refs, + const ReferenceSet& dst_refs, + const TargetPool& projected_target_pool, + const OffsetMapper& offset_mapper, + const EquivalenceMap& equivalence_map, + ReferenceDeltaSink* reference_delta_sink) { + size_t ref_width = src_refs.width(); + auto dst_ref = dst_refs.begin(); + + // For each equivalence, for each covered |dst_ref| and the matching + // |src_ref|, emit the delta between the respective target labels. Note: By + // construction, each reference location (with |ref_width|) lies either + // completely inside an equivalence or completely outside. We perform + // "straddle checks" throughout to verify this assertion. + for (const auto& candidate : equivalence_map) { + const Equivalence equiv = candidate.eq; + // Increment |dst_ref| until it catches up to |equiv|. + while (dst_ref != dst_refs.end() && dst_ref->location < equiv.dst_offset) + ++dst_ref; + if (dst_ref == dst_refs.end()) + break; + if (dst_ref->location >= equiv.dst_end()) + continue; + // Straddle check. + DCHECK_LE(dst_ref->location + ref_width, equiv.dst_end()); + + offset_t src_loc = + equiv.src_offset + (dst_ref->location - equiv.dst_offset); + auto src_ref = std::lower_bound( + src_refs.begin(), src_refs.end(), src_loc, + [](const Reference& a, offset_t b) { return a.location < b; }); + for (; dst_ref != dst_refs.end() && + dst_ref->location + ref_width <= equiv.dst_end(); + ++dst_ref, ++src_ref) { + // Local offset of |src_ref| should match that of |dst_ref|. + DCHECK_EQ(src_ref->location - equiv.src_offset, + dst_ref->location - equiv.dst_offset); + offset_t old_offset = src_ref->target; + offset_t new_estimated_offset = + offset_mapper.ExtendedForwardProject(old_offset); + offset_t new_estimated_key = + projected_target_pool.KeyForNearestOffset(new_estimated_offset); + offset_t new_offset = dst_ref->target; + offset_t new_key = projected_target_pool.KeyForOffset(new_offset); + + reference_delta_sink->PutNext( + static_cast<int32_t>(new_key - new_estimated_key)); + } + if (dst_ref == dst_refs.end()) + break; // Done. + // Straddle check. + DCHECK_GE(dst_ref->location, equiv.dst_end()); + } + return true; +} + +bool GenerateExtraTargets(const std::vector<offset_t>& extra_targets, + PoolTag pool_tag, + PatchElementWriter* patch_writer) { + TargetSink target_sink; + for (offset_t target : extra_targets) + target_sink.PutNext(target); + patch_writer->SetTargetSink(pool_tag, std::move(target_sink)); + return true; +} + +bool GenerateRawElement(const std::vector<offset_t>& old_sa, + ConstBufferView old_image, + ConstBufferView new_image, + PatchElementWriter* patch_writer) { + ImageIndex old_image_index(old_image); + ImageIndex new_image_index(new_image); + + EquivalenceMap equivalences; + equivalences.Build(old_sa, EncodedView(old_image_index), + EncodedView(new_image_index), {}, + kMinEquivalenceSimilarity); + + patch_writer->SetReferenceDeltaSink({}); + + ReferenceBytesMixer no_op_bytes_mixer; + return GenerateEquivalencesAndExtraData(new_image, equivalences, + patch_writer) && + GenerateRawDelta(old_image, new_image, equivalences, new_image_index, + &no_op_bytes_mixer, patch_writer); +} + +bool GenerateExecutableElement(ExecutableType exe_type, + ConstBufferView old_image, + ConstBufferView new_image, + PatchElementWriter* patch_writer) { + // Initialize Disassemblers. + std::unique_ptr<Disassembler> old_disasm = + MakeDisassemblerOfType(old_image, exe_type); + std::unique_ptr<Disassembler> new_disasm = + MakeDisassemblerOfType(new_image, exe_type); + if (!old_disasm || !new_disasm) { + LOG(ERROR) << "Failed to create Disassembler."; + return false; + } + DCHECK_EQ(old_disasm->GetExeType(), new_disasm->GetExeType()); + + // Initialize ImageIndexes. + ImageIndex old_image_index(old_image); + ImageIndex new_image_index(new_image); + if (!old_image_index.Initialize(old_disasm.get()) || + !new_image_index.Initialize(new_disasm.get())) { + LOG(ERROR) << "Failed to create ImageIndex: Overlapping references found?"; + return false; + } + DCHECK_EQ(old_image_index.PoolCount(), new_image_index.PoolCount()); + + EquivalenceMap equivalences = + CreateEquivalenceMap(old_image_index, new_image_index, + new_disasm->num_equivalence_iterations()); + OffsetMapper offset_mapper(equivalences, + base::checked_cast<offset_t>(old_image.size()), + base::checked_cast<offset_t>(new_image.size())); + + ReferenceDeltaSink reference_delta_sink; + for (const auto& old_targets : old_image_index.target_pools()) { + PoolTag pool_tag = old_targets.first; + TargetPool projected_old_targets = old_targets.second; + projected_old_targets.FilterAndProject(offset_mapper); + std::vector<offset_t> extra_target = + FindExtraTargets(projected_old_targets, new_image_index.pool(pool_tag)); + projected_old_targets.InsertTargets(extra_target); + + if (!GenerateExtraTargets(extra_target, pool_tag, patch_writer)) + return false; + for (TypeTag type_tag : old_targets.second.types()) { + if (!GenerateReferencesDelta(old_image_index.refs(type_tag), + new_image_index.refs(type_tag), + projected_old_targets, offset_mapper, + equivalences, &reference_delta_sink)) { + return false; + } + } + } + patch_writer->SetReferenceDeltaSink(std::move(reference_delta_sink)); + std::unique_ptr<ReferenceBytesMixer> reference_bytes_mixer = + ReferenceBytesMixer::Create(*old_disasm, *new_disasm); + return GenerateEquivalencesAndExtraData(new_image, equivalences, + patch_writer) && + GenerateRawDelta(old_image, new_image, equivalences, new_image_index, + reference_bytes_mixer.get(), patch_writer); +} + +status::Code GenerateBufferCommon(ConstBufferView old_image, + ConstBufferView new_image, + std::unique_ptr<EnsembleMatcher> matcher, + EnsemblePatchWriter* patch_writer) { + if (!matcher->RunMatch(old_image, new_image)) { + LOG(INFO) << "RunMatch() failed, generating raw patch."; + return GenerateBufferRaw(old_image, new_image, patch_writer); + } + + const std::vector<ElementMatch>& matches = matcher->matches(); + LOG(INFO) << "Matching: Found " << matches.size() + << " nontrivial matches and " << matcher->num_identical() + << " identical matches."; + size_t num_elements = matches.size(); + if (num_elements == 0) { + LOG(INFO) << "No nontrival matches, generating raw patch."; + return GenerateBufferRaw(old_image, new_image, patch_writer); + } + + // "Gaps" are |new_image| bytes not covered by new_elements in |matches|. + // These are treated as raw data, and patched against the entire |old_image|. + + // |patch_element_map| (keyed by "new" offsets) stores PatchElementWriter + // results so elements and "gap" results can be computed separately (to reduce + // peak memory usage), and later, properly serialized to |patch_writer| + // ordered by "new" offset. + std::map<offset_t, PatchElementWriter> patch_element_map; + + // Variables to track element patching successes. + std::vector<BufferRegion> covered_new_regions; + size_t covered_new_bytes = 0; + + // Process elements first, since non-fatal failures may turn some into gaps. + for (const ElementMatch& match : matches) { + BufferRegion new_region = match.new_element.region(); + LOG(INFO) << "--- Match [" << new_region.lo() << "," << new_region.hi() + << ")"; + + auto it_and_success = patch_element_map.emplace( + base::checked_cast<offset_t>(new_region.lo()), match); + DCHECK(it_and_success.second); + PatchElementWriter& patch_element = it_and_success.first->second; + + ConstBufferView old_sub_image = old_image[match.old_element.region()]; + ConstBufferView new_sub_image = new_image[new_region]; + if (GenerateExecutableElement(match.exe_type(), old_sub_image, + new_sub_image, &patch_element)) { + covered_new_regions.push_back(new_region); + covered_new_bytes += new_region.size; + } else { + LOG(INFO) << "Fall back to raw patching."; + patch_element_map.erase(it_and_success.first); + } + } + + if (covered_new_bytes < new_image.size()) { + // Process all "gaps", which are patched against the entire "old" image. To + // compute equivalence maps, "gaps" share a common suffix array + // |old_sa_raw|, whose lifetime is kept separated from elements' suffix + // arrays to reduce peak memory. + Element entire_old_element(old_image.local_region(), kExeTypeNoOp); + ImageIndex old_image_index(old_image); + EncodedView old_view_raw(old_image_index); + std::vector<offset_t> old_sa_raw = + MakeSuffixArray<InducedSuffixSort>(old_view_raw, size_t(256)); + + offset_t gap_lo = 0; + // Add sentinel that points to end of "new" file, to simplify gap iteration. + covered_new_regions.emplace_back(BufferRegion{new_image.size(), 0}); + + for (const BufferRegion& covered : covered_new_regions) { + offset_t gap_hi = base::checked_cast<offset_t>(covered.lo()); + DCHECK_GE(gap_hi, gap_lo); + offset_t gap_size = gap_hi - gap_lo; + if (gap_size > 0) { + LOG(INFO) << "--- Gap [" << gap_lo << "," << gap_hi << ")"; + + ElementMatch gap_match{{entire_old_element, kExeTypeNoOp}, + {{gap_lo, gap_size}, kExeTypeNoOp}}; + auto it_and_success = patch_element_map.emplace(gap_lo, gap_match); + DCHECK(it_and_success.second); + PatchElementWriter& patch_element = it_and_success.first->second; + + ConstBufferView new_sub_image = new_image[{gap_lo, gap_size}]; + if (!GenerateRawElement(old_sa_raw, old_image, new_sub_image, + &patch_element)) { + return status::kStatusFatal; + } + } + gap_lo = base::checked_cast<offset_t>(covered.hi()); + } + } + + // Write all PatchElementWriter sorted by "new" offset. + for (auto& new_lo_and_patch_element : patch_element_map) + patch_writer->AddElement(std::move(new_lo_and_patch_element.second)); + + return status::kStatusSuccess; +} + +/******** Exported Functions ********/ + +status::Code GenerateBuffer(ConstBufferView old_image, + ConstBufferView new_image, + EnsemblePatchWriter* patch_writer) { + return GenerateBufferCommon( + old_image, new_image, std::make_unique<HeuristicEnsembleMatcher>(nullptr), + patch_writer); +} + +status::Code GenerateBufferImposed(ConstBufferView old_image, + ConstBufferView new_image, + std::string imposed_matches, + EnsemblePatchWriter* patch_writer) { + if (imposed_matches.empty()) + return GenerateBuffer(old_image, new_image, patch_writer); + + return GenerateBufferCommon( + old_image, new_image, + std::make_unique<ImposedEnsembleMatcher>(imposed_matches), patch_writer); +} + +status::Code GenerateBufferRaw(ConstBufferView old_image, + ConstBufferView new_image, + EnsemblePatchWriter* patch_writer) { + ImageIndex old_image_index(old_image); + EncodedView old_view(old_image_index); + std::vector<offset_t> old_sa = + MakeSuffixArray<InducedSuffixSort>(old_view, old_view.Cardinality()); + + PatchElementWriter patch_element( + {Element(old_image.local_region()), Element(new_image.local_region())}); + if (!GenerateRawElement(old_sa, old_image, new_image, &patch_element)) + return status::kStatusFatal; + patch_writer->AddElement(std::move(patch_element)); + return status::kStatusSuccess; +} + +} // namespace zucchini diff --git a/zucchini_gen.h b/zucchini_gen.h new file mode 100644 index 0000000..ac28263 --- /dev/null +++ b/zucchini_gen.h @@ -0,0 +1,85 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_ZUCCHINI_GEN_H_ +#define COMPONENTS_ZUCCHINI_ZUCCHINI_GEN_H_ + +#include <vector> + +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/image_utils.h" +#include "components/zucchini/zucchini.h" + +namespace zucchini { + +class EquivalenceMap; +class OffsetMapper; +class ImageIndex; +class PatchElementWriter; +class ReferenceBytesMixer; +class ReferenceDeltaSink; +class ReferenceSet; +class TargetPool; + +// Extract all targets in |new_targets| with no associated target in +// |projected_old_targets| and returns these targets in a new vector. +std::vector<offset_t> FindExtraTargets(const TargetPool& projected_old_targets, + const TargetPool& new_targets); + +// Creates an EquivalenceMap from "old" image to "new" image and returns the +// result. The params |*_image_index|: +// - Provide "old" and "new" raw image data and references. +// - Mediate Label matching, which links references between "old" and "new", and +// guides EquivalenceMap construction. +EquivalenceMap CreateEquivalenceMap(const ImageIndex& old_image_index, + const ImageIndex& new_image_index); + +// Writes equivalences from |equivalence_map|, and extra data from |new_image| +// found in gaps between equivalences to |patch_writer|. +bool GenerateEquivalencesAndExtraData(ConstBufferView new_image, + const EquivalenceMap& equivalence_map, + PatchElementWriter* patch_writer); + +// Writes raw delta between |old_image| and |new_image| matched by +// |equivalence_map| to |patch_writer|, using |new_image_index| to ignore +// reference bytes. +bool GenerateRawDelta(ConstBufferView old_image, + ConstBufferView new_image, + const EquivalenceMap& equivalence_map, + const ImageIndex& new_image_index, + ReferenceBytesMixer* reference_bytes_mixer, + PatchElementWriter* patch_writer); + +// Writes reference delta between references from |old_refs| and from +// |new_refs| to |patch_writer|. |projected_target_pool| contains projected +// targets from old to new image for references pool associated with |new_refs|. +bool GenerateReferencesDelta(const ReferenceSet& src_refs, + const ReferenceSet& dst_refs, + const TargetPool& projected_target_pool, + const OffsetMapper& offset_mapper, + const EquivalenceMap& equivalence_map, + ReferenceDeltaSink* reference_delta_sink); + +// Writes |extra_targets| associated with |pool_tag| to |patch_writer|. +bool GenerateExtraTargets(const std::vector<offset_t>& extra_targets, + PoolTag pool_tag, + PatchElementWriter* patch_writer); + +// Generates raw patch element data between |old_image| and |new_image|, and +// writes them to |patch_writer|. |old_sa| is the suffix array for |old_image|. +bool GenerateRawElement(const std::vector<offset_t>& old_sa, + ConstBufferView old_image, + ConstBufferView new_image, + PatchElementWriter* patch_writer); + +// Generates patch element of type |exe_type| from |old_image| to |new_image|, +// and writes it to |patch_writer|. +bool GenerateExecutableElement(ExecutableType exe_type, + ConstBufferView old_image, + ConstBufferView new_image, + PatchElementWriter* patch_writer); + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_ZUCCHINI_GEN_H_ diff --git a/zucchini_gen_unittest.cc b/zucchini_gen_unittest.cc new file mode 100644 index 0000000..3a6d2cb --- /dev/null +++ b/zucchini_gen_unittest.cc @@ -0,0 +1,180 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/zucchini_gen.h" + +#include <stdint.h> + +#include <utility> +#include <vector> + +#include "components/zucchini/equivalence_map.h" +#include "components/zucchini/image_index.h" +#include "components/zucchini/image_utils.h" +#include "components/zucchini/test_disassembler.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +namespace { + +using OffsetVector = std::vector<offset_t>; + +// In normal usage, 0.0 is an unrealistic similarity value for an +// EquivalenceCandiate. Since similarity doesn't affect results for various unit +// tests in this file, we use this dummy value for simplicity. +constexpr double kDummySim = 0.0; + +// Helper function wrapping GenerateReferencesDelta(). +std::vector<int32_t> GenerateReferencesDeltaTest( + std::vector<Reference>&& old_references, + std::vector<Reference>&& new_references, + std::vector<offset_t>&& exp_old_targets, + std::vector<offset_t>&& exp_projected_old_targets, + EquivalenceMap&& equivalence_map) { + // OffsetMapper needs image sizes for forward-projection overflow check. These + // are tested elsewhere, so just use arbitrary large value. + constexpr offset_t kOldImageSize = 1000000; + constexpr offset_t kNewImageSize = 1001000; + + ReferenceDeltaSink reference_delta_sink; + + TargetPool old_targets; + old_targets.InsertTargets(old_references); + ReferenceSet old_refs({1, TypeTag(0), PoolTag(0)}, old_targets); + old_refs.InitReferences(old_references); + EXPECT_EQ(exp_old_targets, old_targets.targets()); + + TargetPool new_targets; + new_targets.InsertTargets(new_references); + ReferenceSet new_refs({1, TypeTag(0), PoolTag(0)}, new_targets); + new_refs.InitReferences(new_references); + + OffsetMapper offset_mapper(equivalence_map, kOldImageSize, kNewImageSize); + TargetPool projected_old_targets = old_targets; + projected_old_targets.FilterAndProject(offset_mapper); + + std::vector<offset_t> extra_target = + FindExtraTargets(projected_old_targets, new_targets); + projected_old_targets.InsertTargets(extra_target); + EXPECT_EQ(exp_projected_old_targets, projected_old_targets.targets()); + + GenerateReferencesDelta(old_refs, new_refs, projected_old_targets, + offset_mapper, equivalence_map, + &reference_delta_sink); + + // Serialize |reference_delta_sink| to patch format, and read it back as + // std::vector<int32_t>. + std::vector<uint8_t> buffer(reference_delta_sink.SerializedSize()); + BufferSink sink(buffer.data(), buffer.size()); + reference_delta_sink.SerializeInto(&sink); + + BufferSource source(buffer.data(), buffer.size()); + ReferenceDeltaSource reference_delta_source; + EXPECT_TRUE(reference_delta_source.Initialize(&source)); + std::vector<int32_t> delta_vec; + for (auto delta = reference_delta_source.GetNext(); delta.has_value(); + delta = reference_delta_source.GetNext()) { + delta_vec.push_back(*delta); + } + EXPECT_TRUE(reference_delta_source.Done()); + return delta_vec; +} + +} // namespace + +TEST(ZucchiniGenTest, FindExtraTargets) { + EXPECT_EQ(OffsetVector(), FindExtraTargets({}, {})); + EXPECT_EQ(OffsetVector(), FindExtraTargets(TargetPool({3}), {})); + EXPECT_EQ(OffsetVector(), FindExtraTargets(TargetPool({3}), TargetPool({3}))); + EXPECT_EQ(OffsetVector({4}), + FindExtraTargets(TargetPool({3}), TargetPool({4}))); + EXPECT_EQ(OffsetVector({4}), + FindExtraTargets(TargetPool({3}), TargetPool({3, 4}))); + EXPECT_EQ(OffsetVector({4}), + FindExtraTargets(TargetPool({2, 3}), TargetPool({3, 4}))); + EXPECT_EQ(OffsetVector({3, 5}), + FindExtraTargets(TargetPool({2, 4}), TargetPool({3, 5}))); +} + +TEST(ZucchiniGenTest, GenerateReferencesDelta) { + // No equivalences. + EXPECT_EQ(std::vector<int32_t>(), + GenerateReferencesDeltaTest({}, {}, {}, {}, EquivalenceMap())); + EXPECT_EQ(std::vector<int32_t>(), + GenerateReferencesDeltaTest({{10, 0}}, {{20, 0}}, {0}, {0}, + EquivalenceMap())); + + // Simple cases with one equivalence. + EXPECT_EQ( + std::vector<int32_t>({0}), // {0 - 0}. + GenerateReferencesDeltaTest( + {{10, 3}}, {{20, 3}}, {3}, {3}, + EquivalenceMap({{{3, 3, 1}, kDummySim}, {{10, 20, 4}, kDummySim}}))); + EXPECT_EQ( + std::vector<int32_t>({-1}), // {0 - 1}. + GenerateReferencesDeltaTest( + {{10, 3}}, {{20, 3}}, {3}, {3, 4}, + EquivalenceMap({{{3, 4, 1}, kDummySim}, {{10, 20, 4}, kDummySim}}))); + EXPECT_EQ( + std::vector<int32_t>({1}), // {1 - 0}. + GenerateReferencesDeltaTest( + {{10, 3}}, {{20, 3}}, {3}, {2, 3}, + EquivalenceMap({{{3, 2, 1}, kDummySim}, {{10, 20, 4}, kDummySim}}))); + EXPECT_EQ(std::vector<int32_t>({1, -1}), // {1 - 0, 0 - 1}. + GenerateReferencesDeltaTest( + {{10, 3}, {11, 4}}, {{20, 3}, {21, 4}}, {3, 4}, {2, 3, 4, 5}, + EquivalenceMap({{{3, 2, 1}, kDummySim}, + {{4, 5, 1}, kDummySim}, + {{10, 20, 4}, kDummySim}}))); + + EXPECT_EQ( + std::vector<int32_t>({0, 0}), // {1 - 1, 2 - 2}. + GenerateReferencesDeltaTest( + {{10, 3}, {11, 4}, {12, 5}, {13, 6}}, + {{20, 3}, {21, 4}, {22, 5}, {23, 6}}, {3, 4, 5, 6}, {3, 4, 5, 6}, + EquivalenceMap({{{3, 3, 4}, kDummySim}, {{11, 21, 2}, kDummySim}}))); + + // Multiple equivalences. + EXPECT_EQ(std::vector<int32_t>({-1, 1}), // {0 - 1, 1 - 0}. + GenerateReferencesDeltaTest( + {{10, 0}, {12, 1}}, {{10, 0}, {12, 1}}, {0, 1}, {0, 1}, + EquivalenceMap({{{0, 0, 2}, kDummySim}, + {{12, 10, 2}, kDummySim}, + {{10, 12, 2}, kDummySim}}))); + EXPECT_EQ( + std::vector<int32_t>({0, 0}), // {0 - 0, 1 - 1}. + GenerateReferencesDeltaTest( + {{0, 0}, {2, 2}}, {{0, 0}, {2, 2}}, {0, 2}, {0, 2}, + EquivalenceMap({{{2, 0, 2}, kDummySim}, {{0, 2, 2}, kDummySim}}))); + + EXPECT_EQ(std::vector<int32_t>({-2, 2}), // {0 - 2, 2 - 0}. + GenerateReferencesDeltaTest( + {{10, 0}, {12, 1}, {14, 2}}, {{10, 0}, {12, 1}, {14, 2}}, + {0, 1, 2}, {0, 1, 2}, + EquivalenceMap({{{0, 0, 3}, kDummySim}, + {{14, 10, 2}, kDummySim}, + {{10, 14, 2}, kDummySim}}))); + + EXPECT_EQ(std::vector<int32_t>({-2, 2}), // {0 - 2, 2 - 0}. + GenerateReferencesDeltaTest( + {{11, 0}, {14, 1}, {17, 2}}, {{11, 0}, {14, 1}, {17, 2}}, + {0, 1, 2}, {0, 1, 2}, + EquivalenceMap({{{0, 0, 3}, kDummySim}, + {{16, 10, 3}, kDummySim}, + {{10, 16, 3}, kDummySim}}))); + + EXPECT_EQ( + std::vector<int32_t>({-2, 2}), // {0 - 2, 2 - 0}. + GenerateReferencesDeltaTest({{10, 0}, {14, 2}, {16, 1}}, + {{10, 0}, {14, 2}}, {0, 1, 2}, {0, 1, 2}, + EquivalenceMap({{{0, 0, 3}, kDummySim}, + {{14, 10, 2}, kDummySim}, + {{12, 12, 2}, kDummySim}, + {{10, 14, 2}, kDummySim}}))); +} + +// TODO(huangs): Add more tests. + +} // namespace zucchini diff --git a/zucchini_integration.cc b/zucchini_integration.cc new file mode 100644 index 0000000..ff7e792 --- /dev/null +++ b/zucchini_integration.cc @@ -0,0 +1,209 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/zucchini_integration.h" + +#include <utility> + +#include "base/logging.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/mapped_file.h" +#include "components/zucchini/patch_reader.h" + +namespace zucchini { + +namespace { + +struct FileNames { + FileNames() : is_dummy(true) { + // Use fake names. + old_name = old_name.AppendASCII("old_name"); + new_name = new_name.AppendASCII("new_name"); + patch_name = patch_name.AppendASCII("patch_name"); + } + + FileNames(const base::FilePath& old_name, + const base::FilePath& new_name, + const base::FilePath& patch_name) + : old_name(old_name), + new_name(new_name), + patch_name(patch_name), + is_dummy(false) {} + + base::FilePath old_name; + base::FilePath new_name; + base::FilePath patch_name; + + // A flag to decide whether the filenames are only for error output. + const bool is_dummy; +}; + +status::Code GenerateCommon(base::File old_file, + base::File new_file, + base::File patch_file, + const FileNames& names, + bool force_keep, + bool is_raw, + std::string imposed_matches) { + MappedFileReader mapped_old(std::move(old_file)); + if (mapped_old.HasError()) { + LOG(ERROR) << "Error with file " << names.old_name.value() << ": " + << mapped_old.error(); + return status::kStatusFileReadError; + } + + MappedFileReader mapped_new(std::move(new_file)); + if (mapped_new.HasError()) { + LOG(ERROR) << "Error with file " << names.new_name.value() << ": " + << mapped_new.error(); + return status::kStatusFileReadError; + } + + status::Code result = status::kStatusSuccess; + EnsemblePatchWriter patch_writer(mapped_old.region(), mapped_new.region()); + if (is_raw) { + result = GenerateBufferRaw(mapped_old.region(), mapped_new.region(), + &patch_writer); + } else { + result = GenerateBufferImposed(mapped_old.region(), mapped_new.region(), + std::move(imposed_matches), &patch_writer); + } + if (result != status::kStatusSuccess) { + LOG(ERROR) << "Fatal error encountered when generating patch."; + return result; + } + + // By default, delete patch on destruction, to avoid having lingering files in + // case of a failure. On Windows deletion can be done by the OS. + MappedFileWriter mapped_patch(names.patch_name, std::move(patch_file), + patch_writer.SerializedSize()); + if (mapped_patch.HasError()) { + LOG(ERROR) << "Error with file " << names.patch_name.value() << ": " + << mapped_patch.error(); + return status::kStatusFileWriteError; + } + if (force_keep) + mapped_patch.Keep(); + + if (!patch_writer.SerializeInto(mapped_patch.region())) + return status::kStatusPatchWriteError; + + // Successfully created patch. Explicitly request file to be kept. + if (!mapped_patch.Keep()) + return status::kStatusFileWriteError; + return status::kStatusSuccess; +} + +status::Code ApplyCommon(base::File old_file, + base::File patch_file, + base::File new_file, + const FileNames& names, + bool force_keep) { + MappedFileReader mapped_patch(std::move(patch_file)); + if (mapped_patch.HasError()) { + LOG(ERROR) << "Error with file " << names.patch_name.value() << ": " + << mapped_patch.error(); + return status::kStatusFileReadError; + } + + auto patch_reader = EnsemblePatchReader::Create(mapped_patch.region()); + if (!patch_reader.has_value()) { + LOG(ERROR) << "Error reading patch header."; + return status::kStatusPatchReadError; + } + + MappedFileReader mapped_old(std::move(old_file)); + if (mapped_old.HasError()) { + LOG(ERROR) << "Error with file " << names.old_name.value() << ": " + << mapped_old.error(); + return status::kStatusFileReadError; + } + + PatchHeader header = patch_reader->header(); + // By default, delete output on destruction, to avoid having lingering files + // in case of a failure. On Windows deletion can be done by the OS. + MappedFileWriter mapped_new(names.new_name, std::move(new_file), + header.new_size); + if (mapped_new.HasError()) { + LOG(ERROR) << "Error with file " << names.new_name.value() << ": " + << mapped_new.error(); + return status::kStatusFileWriteError; + } + if (force_keep) + mapped_new.Keep(); + + status::Code result = + ApplyBuffer(mapped_old.region(), *patch_reader, mapped_new.region()); + if (result != status::kStatusSuccess) { + LOG(ERROR) << "Fatal error encountered while applying patch."; + return result; + } + + // Successfully patch |mapped_new|. Explicitly request file to be kept. + if (!mapped_new.Keep()) + return status::kStatusFileWriteError; + return status::kStatusSuccess; +} + +} // namespace + +status::Code Generate(base::File old_file, + base::File new_file, + base::File patch_file, + bool force_keep, + bool is_raw, + std::string imposed_matches) { + const FileNames file_names; + return GenerateCommon(std::move(old_file), std::move(new_file), + std::move(patch_file), file_names, force_keep, is_raw, + std::move(imposed_matches)); +} + +status::Code Generate(const base::FilePath& old_path, + const base::FilePath& new_path, + const base::FilePath& patch_path, + bool force_keep, + bool is_raw, + std::string imposed_matches) { + using base::File; + File old_file(old_path, File::FLAG_OPEN | File::FLAG_READ | + base::File::FLAG_SHARE_DELETE); + File new_file(new_path, File::FLAG_OPEN | File::FLAG_READ | + base::File::FLAG_SHARE_DELETE); + File patch_file(patch_path, File::FLAG_CREATE_ALWAYS | File::FLAG_READ | + File::FLAG_WRITE | File::FLAG_SHARE_DELETE | + File::FLAG_CAN_DELETE_ON_CLOSE); + const FileNames file_names(old_path, new_path, patch_path); + return GenerateCommon(std::move(old_file), std::move(new_file), + std::move(patch_file), file_names, force_keep, is_raw, + std::move(imposed_matches)); +} + +status::Code Apply(base::File old_file, + base::File patch_file, + base::File new_file, + bool force_keep) { + const FileNames file_names; + return ApplyCommon(std::move(old_file), std::move(patch_file), + std::move(new_file), file_names, force_keep); +} + +status::Code Apply(const base::FilePath& old_path, + const base::FilePath& patch_path, + const base::FilePath& new_path, + bool force_keep) { + using base::File; + File old_file(old_path, File::FLAG_OPEN | File::FLAG_READ | + base::File::FLAG_SHARE_DELETE); + File patch_file(patch_path, File::FLAG_OPEN | File::FLAG_READ | + base::File::FLAG_SHARE_DELETE); + File new_file(new_path, File::FLAG_CREATE_ALWAYS | File::FLAG_READ | + File::FLAG_WRITE | File::FLAG_SHARE_DELETE | + File::FLAG_CAN_DELETE_ON_CLOSE); + const FileNames file_names(old_path, new_path, patch_path); + return ApplyCommon(std::move(old_file), std::move(patch_file), + std::move(new_file), file_names, force_keep); +} + +} // namespace zucchini diff --git a/zucchini_integration.h b/zucchini_integration.h new file mode 100644 index 0000000..2ae6091 --- /dev/null +++ b/zucchini_integration.h @@ -0,0 +1,68 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_ZUCCHINI_INTEGRATION_H_ +#define COMPONENTS_ZUCCHINI_ZUCCHINI_INTEGRATION_H_ + +#include <string> + +#include "base/files/file.h" +#include "base/files/file_path.h" +#include "components/zucchini/zucchini.h" + +// Zucchini integration interface to wrap core Zucchini library with file I/O. + +namespace zucchini { + +// Generates a patch to transform |old_file| to |new_file|, and writes the +// result to |patch_file|. Since this uses memory mapped files, crashes are +// expected in case of I/O errors. On Windows, |patch_file| is kept iff returned +// code is kStatusSuccess or if |force_keep == true|, and is deleted otherwise. +// For UNIX systems the caller needs to do cleanup since it has ownership of the +// base::File params, and Zucchini has no knowledge of which base::FilePath to +// delete. If |is_raw == true| then uses Raw Zucchini. If |imposed_matches| is +// non-empty, then overrides default element detection and matching heuristics +// with custom element matching encoded in |imposed_matches|, which should be +// formatted as: +// "#+#=#+#,#+#=#+#,..." (e.g., "1+2=3+4", "1+2=3+4,5+6=7+8"), +// where "#+#=#+#" encodes a match as 4 unsigned integers: +// [offset in "old", size in "old", offset in "new", size in "new"]. +status::Code Generate(base::File old_file, + base::File new_file, + base::File patch_file, + bool force_keep = false, + bool is_raw = false, + std::string imposed_matches = ""); + +// Alternative Generate() interface that takes base::FilePath as arguments. +// Performs proper cleanup in Windows and UNIX if failure occurs. +status::Code Generate(const base::FilePath& old_path, + const base::FilePath& new_path, + const base::FilePath& patch_path, + bool force_keep = false, + bool is_raw = false, + std::string imposed_matches = ""); + +// Applies the patch in |patch_file| to |old_file|, and writes the result to +// |new_file|. Since this uses memory mapped files, crashes are expected in case +// of I/O errors. On Windows, |new_file| is kept iff returned code is +// kStatusSuccess or if |force_keep == true|, and is deleted otherwise. For UNIX +// systems the caller needs to do cleanup since it has ownership of the +// base::File params, and Zucchini has no knowledge of which base::FilePath to +// delete. +status::Code Apply(base::File old_file, + base::File patch_file, + base::File new_file, + bool force_keep = false); + +// Alternative Apply() interface that takes base::FilePath as arguments. +// Performs proper cleanup in Windows and UNIX if failure occurs. +status::Code Apply(const base::FilePath& old_path, + const base::FilePath& patch_path, + const base::FilePath& new_path, + bool force_keep = false); + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_ZUCCHINI_INTEGRATION_H_ diff --git a/zucchini_main.cc b/zucchini_main.cc new file mode 100644 index 0000000..9b5e505 --- /dev/null +++ b/zucchini_main.cc @@ -0,0 +1,55 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <iostream> + +#include "base/command_line.h" +#include "base/logging.h" +#include "base/process/memory.h" +#include "build/build_config.h" +#include "components/zucchini/main_utils.h" + +#if defined(OS_WIN) +#include "base/win/process_startup_helper.h" +#endif // defined(OS_WIN) + +namespace { + +void InitLogging() { + logging::LoggingSettings settings; + settings.logging_dest = + logging::LOG_TO_SYSTEM_DEBUG_LOG | logging::LOG_TO_STDERR; + settings.log_file_path = nullptr; + settings.lock_log = logging::DONT_LOCK_LOG_FILE; + settings.delete_old = logging::APPEND_TO_OLD_LOG_FILE; + bool logging_res = logging::InitLogging(settings); + CHECK(logging_res); +} + +void InitErrorHandling(const base::CommandLine& command_line) { + base::EnableTerminationOnHeapCorruption(); + base::EnableTerminationOnOutOfMemory(); +#if defined(OS_WIN) + base::win::RegisterInvalidParamHandler(); + base::win::SetupCRT(command_line); +#endif // defined(OS_WIN) +} + +} // namespace + +int main(int argc, const char* argv[]) { + // Initialize infrastructure from base. + base::CommandLine::Init(argc, argv); + const base::CommandLine& command_line = + *base::CommandLine::ForCurrentProcess(); + InitLogging(); + InitErrorHandling(command_line); + zucchini::status::Code status = + RunZucchiniCommand(command_line, std::cout, std::cerr); + if (!(status == zucchini::status::kStatusSuccess || + status == zucchini::status::kStatusInvalidParam)) { + std::cerr << "Failed with code " << static_cast<int>(status) << std::endl; + } + return static_cast<int>(status); +} diff --git a/zucchini_tools.cc b/zucchini_tools.cc new file mode 100644 index 0000000..e8bf734 --- /dev/null +++ b/zucchini_tools.cc @@ -0,0 +1,140 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/zucchini_tools.h" + +#include <stddef.h> +#include <stdint.h> + +#include <algorithm> +#include <memory> +#include <ostream> +#include <utility> + +#include "base/bind.h" +#include "base/check_op.h" +#include "base/strings/stringprintf.h" +#include "components/zucchini/disassembler.h" +#include "components/zucchini/element_detection.h" +#include "components/zucchini/ensemble_matcher.h" +#include "components/zucchini/heuristic_ensemble_matcher.h" +#include "components/zucchini/imposed_ensemble_matcher.h" +#include "components/zucchini/io_utils.h" + +namespace zucchini { + +status::Code ReadReferences(ConstBufferView image, + bool do_dump, + std::ostream& out) { + std::unique_ptr<Disassembler> disasm = MakeDisassemblerWithoutFallback(image); + if (!disasm) { + out << "Input file not recognized as executable." << std::endl; + return status::kStatusInvalidOldImage; + } + + std::vector<offset_t> targets; + for (const auto& group : disasm->MakeReferenceGroups()) { + targets.clear(); + auto refs = group.GetReader(disasm.get()); + for (auto ref = refs->GetNext(); ref.has_value(); ref = refs->GetNext()) + targets.push_back(ref->target); + + size_t num_locations = targets.size(); + std::sort(targets.begin(), targets.end()); + targets.erase(std::unique(targets.begin(), targets.end()), targets.end()); + size_t num_targets = targets.size(); + + out << "Type " << int(group.type_tag().value()) + << ": Pool=" << static_cast<uint32_t>(group.pool_tag().value()) + << ", width=" << group.width() << ", #locations=" << num_locations + << ", #targets=" << num_targets; + if (num_targets > 0) { + double ratio = static_cast<double>(num_locations) / num_targets; + out << " (ratio=" << base::StringPrintf("%.4f", ratio) << ")"; + } + out << std::endl; + + if (do_dump) { + refs = group.GetReader(disasm.get()); + + for (auto ref = refs->GetNext(); ref; ref = refs->GetNext()) { + out << " " << AsHex<8>(ref->location) << " " << AsHex<8>(ref->target) + << std::endl; + } + } + } + + return status::kStatusSuccess; +} + +status::Code DetectAll(ConstBufferView image, + std::ostream& out, + std::vector<ConstBufferView>* sub_image_list) { + DCHECK_NE(sub_image_list, nullptr); + sub_image_list->clear(); + + const size_t size = image.size(); + size_t last_out_pos = 0; + size_t total_bytes_found = 0; + + auto print_range = [&out](size_t pos, size_t size, const std::string& msg) { + out << "-- " << AsHex<8, size_t>(pos) << " +" << AsHex<8, size_t>(size) + << ": " << msg << std::endl; + }; + + ElementFinder finder(image, + base::BindRepeating(DetectElementFromDisassembler)); + for (auto element = finder.GetNext(); element.has_value(); + element = finder.GetNext()) { + ConstBufferView sub_image = image[element->region()]; + sub_image_list->push_back(sub_image); + size_t pos = sub_image.begin() - image.begin(); + size_t prog_size = sub_image.size(); + if (last_out_pos < pos) + print_range(last_out_pos, pos - last_out_pos, "?"); + auto disasm = MakeDisassemblerOfType(sub_image, element->exe_type); + print_range(pos, prog_size, disasm->GetExeTypeString()); + total_bytes_found += prog_size; + last_out_pos = pos + prog_size; + } + if (last_out_pos < size) + print_range(last_out_pos, size - last_out_pos, "?"); + out << std::endl; + + // Print summary, using decimal instead of hexadecimal. + out << "Detected " << total_bytes_found << "/" << size << " bytes => "; + double percent = total_bytes_found * 100.0 / size; + out << base::StringPrintf("%.2f", percent) << "%." << std::endl; + + return status::kStatusSuccess; +} + +status::Code MatchAll(ConstBufferView old_image, + ConstBufferView new_image, + std::string imposed_matches, + std::ostream& out) { + std::unique_ptr<EnsembleMatcher> matcher; + if (imposed_matches.empty()) { + matcher = std::make_unique<HeuristicEnsembleMatcher>(&out); + } else { + matcher = + std::make_unique<ImposedEnsembleMatcher>(std::move(imposed_matches)); + } + if (!matcher->RunMatch(old_image, new_image)) { + out << "RunMatch() failed."; + return status::kStatusFatal; + } + out << "Found " << matcher->matches().size() << " nontrivial matches and " + << matcher->num_identical() << " identical matches." << std::endl + << "To impose the same matches by command line, use: " << std::endl + << " -impose="; + PrefixSep sep(","); + for (const ElementMatch& match : matcher->matches()) + out << sep << match.ToString(); + out << std::endl; + + return status::kStatusSuccess; +} + +} // namespace zucchini diff --git a/zucchini_tools.h b/zucchini_tools.h new file mode 100644 index 0000000..bf9a95c --- /dev/null +++ b/zucchini_tools.h @@ -0,0 +1,45 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_ZUCCHINI_TOOLS_H_ +#define COMPONENTS_ZUCCHINI_ZUCCHINI_TOOLS_H_ + +#include <iosfwd> +#include <string> +#include <vector> + +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/zucchini.h" + +namespace zucchini { + +// The functions below are called to print diagnosis information, so outputs are +// printed using std::ostream instead of LOG(). + +// Prints stats on references found in |image|. If |do_dump| is true, then +// prints all references (locations and targets). +status::Code ReadReferences(ConstBufferView image, + bool do_dump, + std::ostream& out); + +// Prints regions and types of all detected executables in |image|. Appends +// detected subregions to |sub_image_list|. +status::Code DetectAll(ConstBufferView image, + std::ostream& out, + std::vector<ConstBufferView>* sub_image_list); + +// Prints all matched regions from |old_image| to |new_image|. +// |imposed_matches|, if non-empty, encodes custom element matching to override +// the default element detection and matching heuristics, and is formatted as: +// "#+#=#+#,#+#=#+#,..." (e.g., "1+2=3+4", "1+2=3+4,5+6=7+8"), +// where "#+#=#+#" encodes a match as 4 unsigned integers: +// [offset in "old", size in "old", offset in "new", size in "new"]. +status::Code MatchAll(ConstBufferView old_image, + ConstBufferView new_image, + std::string imposed_matches, + std::ostream& out); + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_ZUCCHINI_TOOLS_H_ |