diff options
author | Samuel Huang <huangs@chromium.org> | 2018-04-09 15:18:17 +0000 |
---|---|---|
committer | Edward Lesmes <ehmaldonado@google.com> | 2021-07-23 22:05:55 +0000 |
commit | a8a2a9446ab207b7b6e401c16205adc68433fe08 (patch) | |
tree | f42117d07b343493d308bc1541f2361cdc1b4111 /disassembler_dex.cc | |
parent | 60ba82943a032470727baca4c1b47f60f1f1391f (diff) | |
download | zucchini-a8a2a9446ab207b7b6e401c16205adc68433fe08.tar.gz |
[Zucchini] Add DEX Reference read / write for basic types.
(Committing on behalf of etiennep@).
This CL adds DEX References read / write for 11 basic types. Details:
- Add InstructionParser to visit DEX instructions in the insns member
of a CodeItem, taking care to skip non-instruction "payloads".
- Add InstructionReferenceReader to visit CodeItem References found in
|[lo, hi)|, using provided callbacks to apply type filters and
extract targets.
- Add ItemReferenceReader to visit fixed-sized items to extract
Referenes from a "member variable of interest", using a callback
to extract targets.
- DisassemblerDex: Add MakeRead*() and MakeWrite*() to return a visitor
to read / write supported DEX References.
- Add DEX unit test to audit the error-prone look-up table from
DisassemblerDex::MakeReferenceGroups().
Change-Id: Ice12a867aab4fdcb4a152bb1946ea7515ad426ef
Reviewed-on: https://chromium-review.googlesource.com/994066
Reviewed-by: Samuel Huang <huangs@chromium.org>
Reviewed-by: Yaron Friedman <yfriedman@chromium.org>
Commit-Queue: Samuel Huang <huangs@chromium.org>
Cr-Commit-Position: refs/heads/master@{#549178}
NOKEYCHECK=True
GitOrigin-RevId: 7b249ec48aa0f1058496a1b7f095b0f126011bf6
Diffstat (limited to 'disassembler_dex.cc')
-rw-r--r-- | disassembler_dex.cc | 726 |
1 files changed, 680 insertions, 46 deletions
diff --git a/disassembler_dex.cc b/disassembler_dex.cc index 23198cc..ac3a693 100644 --- a/disassembler_dex.cc +++ b/disassembler_dex.cc @@ -4,15 +4,25 @@ #include "components/zucchini/disassembler_dex.h" +#include <stddef.h> +#include <stdlib.h> + +#include <algorithm> +#include <cctype> #include <cmath> +#include <iterator> #include <set> #include <utility> +#include "base/bind.h" +#include "base/callback.h" #include "base/logging.h" #include "base/numerics/safe_conversions.h" +#include "base/optional.h" #include "base/strings/stringprintf.h" #include "components/zucchini/buffer_source.h" #include "components/zucchini/buffer_view.h" +#include "components/zucchini/io_utils.h" namespace zucchini { @@ -23,51 +33,6 @@ namespace { // wrecks havoc for base::checked_cast<int16_t>(). constexpr int kInstrUnitSize = static_cast<int>(sizeof(uint16_t)); -// Buffer for ReadDexHeader() to optionally return results. -struct ReadDexHeaderResults { - BufferSource source; - const dex::HeaderItem* header; - int dex_version; -}; - -// Returns whether |image| points to a DEX file. If this is a possibility and -// |opt_results| is not null, then uses it to pass extracted data to enable -// further parsing. -bool ReadDexHeader(ConstBufferView image, ReadDexHeaderResults* opt_results) { - // This part needs to be fairly efficient since it may be called many times. - BufferSource source(image); - const dex::HeaderItem* header = source.GetPointer<dex::HeaderItem>(); - if (!header) - return false; - if (header->magic[0] != 'd' || header->magic[1] != 'e' || - header->magic[2] != 'x' || header->magic[3] != '\n' || - header->magic[7] != '\0') { - return false; - } - - // Magic matches: More detailed tests can be conducted. - int dex_version = 0; - for (int i = 4; i < 7; ++i) { - if (!isdigit(header->magic[i])) - return false; - dex_version = dex_version * 10 + (header->magic[i] - '0'); - } - if (dex_version != 35 && dex_version != 37) - return false; - - if (header->file_size > image.size() || - header->file_size < sizeof(dex::HeaderItem) || - header->map_off < sizeof(dex::HeaderItem)) { - return false; - } - - if (opt_results) - *opt_results = {source, header, dex_version}; - return true; -} - -} // namespace - /******** CodeItemParser ********/ // A parser to extract successive code items from a DEX image whose header has @@ -210,6 +175,360 @@ class CodeItemParser { BufferSource source_; }; +/******** InstructionParser ********/ + +// A class that successively reads |code_item| for Dalvik instructions, which +// are found at |insns|, spanning |insns_size| uint16_t "units". These units +// store instructions followed by optional non-instruction "payload". Finding +// payload boundary requires parsing: On finding an instruction that uses (and +// points to) payload, the boundary is updated. +class InstructionParser { + public: + struct Value { + offset_t instr_offset; + const dex::Instruction* instr = nullptr; // null for unknown instructions. + }; + + // Returns pointer to DEX Instruction data for |opcode|, or null if |opcode| + // is unknown. An internal initialize-on-first-use table is used for fast + // lookup. + const dex::Instruction* FindDalvikInstruction(uint8_t opcode) { + static bool is_init = false; + static const dex::Instruction* instruction_table[256]; + if (!is_init) { + is_init = true; + std::fill(std::begin(instruction_table), std::end(instruction_table), + nullptr); + for (const dex::Instruction& instr : dex::kByteCode) { + std::fill(instruction_table + instr.opcode, + instruction_table + instr.opcode + instr.variant, &instr); + } + } + return instruction_table[opcode]; + } + + InstructionParser() = default; + + InstructionParser(ConstBufferView image, offset_t base_offset) + : image_begin_(image.begin()), + insns_(CodeItemParser::GetCodeItemInsns(image, base_offset)), + payload_boundary_(insns_.end()) {} + + // Reads the next instruction. On success, makes the data read available via + // value() and returns true. Otherwise (done or found error) returns false. + bool ReadNext() { + // Do not scan past payload boundary. + if (insns_.begin() >= payload_boundary_) + return false; + + const offset_t instr_offset = + base::checked_cast<offset_t>(insns_.begin() - image_begin_); + const uint8_t op = insns_.read<uint8_t>(0); + const dex::Instruction* instr = FindDalvikInstruction(op); + + // Stop on finding unknown instructions. ODEX files might trigger this. + if (!instr) { + LOG(WARNING) << "Unknown Dalvik instruction detected at " + << AsHex<8>(instr_offset) << "."; + return false; + } + + const int instr_length_units = instr->layout; + const size_t instr_length_bytes = instr_length_units * kInstrUnitSize; + if (insns_.size() < instr_length_bytes) + return false; + + // Handle instructions with variable-length data payload (31t). + if (instr->opcode == 0x26 || // fill-array-data + instr->opcode == 0x2B || // packed-switch + instr->opcode == 0x2C) { // sparse-switch + const int32_t unsafe_payload_rel_units = insns_.read<int32_t>(2); + // Payload must be in current code item, after current instruction. + if (unsafe_payload_rel_units < instr_length_units || + static_cast<uint32_t>(unsafe_payload_rel_units) >= + insns_.size() / kInstrUnitSize) { + LOG(WARNING) << "Invalid payload found."; + return false; + } + // Update boundary between instructions and payload. + const ConstBufferView::const_iterator payload_it = + insns_.begin() + unsafe_payload_rel_units * kInstrUnitSize; + payload_boundary_ = std::min(payload_boundary_, payload_it); + } + + insns_.remove_prefix(instr_length_bytes); + value_ = {instr_offset, instr}; + return true; + } + + const Value& value() const { return value_; } + + private: + ConstBufferView::const_iterator image_begin_; + ConstBufferView insns_; + ConstBufferView::const_iterator payload_boundary_; + Value value_; +}; + +/******** InstructionReferenceReader ********/ + +// A class to visit |code_items|, parse instructions, and emit embedded +// References of a type determined by |filter_| and |mapper_|. Only References +// located in |[lo, hi)| are emitted. |lo| and |hi| are assumed to never +// straddle the body of a Reference. +class InstructionReferenceReader : public ReferenceReader { + public: + // A function that takes a parsed Dalvik instruction and decides whether it + // contains a specific type of Reference. If true, then returns the Reference + // location. Otherwise returns kInvalidOffset. + using Filter = + base::RepeatingCallback<offset_t(const InstructionParser::Value&)>; + // A function that takes Reference location from |filter_| to extract the + // stored target. If valid, returns it. Otherwise returns kInvalidOffset. + using Mapper = base::RepeatingCallback<offset_t(offset_t)>; + + InstructionReferenceReader(ConstBufferView image, + offset_t lo, + offset_t hi, + const std::vector<offset_t>& code_item_offsets, + Filter&& filter, + Mapper&& mapper) + : image_(image), + lo_(lo), + hi_(hi), + end_it_(code_item_offsets.end()), + filter_(std::move(filter)), + mapper_(std::move(mapper)) { + const auto begin_it = code_item_offsets.begin(); + // Use binary search to find the code item that contains |lo_|. + auto comp = [](offset_t test_offset, offset_t code_item_offset) { + return test_offset < code_item_offset; + }; + cur_it_ = std::upper_bound(begin_it, end_it_, lo_, comp); + if (cur_it_ != begin_it) + --cur_it_; + parser_ = InstructionParser(image_, *cur_it_); + } + + // ReferenceReader: + base::Optional<Reference> GetNext() override { + for (;;) { + while (parser_.ReadNext()) { + const auto& v = parser_.value(); + DCHECK_NE(v.instr, nullptr); + if (v.instr_offset >= hi_) + return base::nullopt; + const offset_t location = filter_.Run(v); + if (location == kInvalidOffset || location < lo_) + continue; + // The general check is |location + reference_width > hi_|. However, by + // assumption |hi_| and |lo_| do not straddle the body of a Reference. + // So |reference_width| is unneeded. + if (location >= hi_) + return base::nullopt; + offset_t target = mapper_.Run(location); + if (target != kInvalidOffset) + return Reference{location, target}; + else + LOG(WARNING) << "Invalid target at " << AsHex<8>(location) << "."; + } + ++cur_it_; + if (cur_it_ == end_it_) + return base::nullopt; + parser_ = InstructionParser(image_, *cur_it_); + } + } + + private: + const ConstBufferView image_; + const offset_t lo_; + const offset_t hi_; + const std::vector<offset_t>::const_iterator end_it_; + const Filter filter_; + const Mapper mapper_; + std::vector<offset_t>::const_iterator cur_it_; + InstructionParser parser_; +}; + +/******** ItemReferenceReader ********/ + +// A class to visit fixed-size item elements (determined by |item_size|) and +// emit a "member variable of interest" (MVI, determined by |rel_location| and +// |mapper|) as Reference. Only MVIs lying in |[lo, hi)| are emitted. |lo| and +// |hi| are assumed to never straddle the body of a Reference. +class ItemReferenceReader : public ReferenceReader { + public: + // A function that takes an MVI's location and emit its target offset. + using Mapper = base::RepeatingCallback<offset_t(offset_t)>; + + // |item_size| is the size of a fixed-size item. |rel_location| is the + // relative location of MVI from the start of the item containing it. + ItemReferenceReader(offset_t lo, + offset_t hi, + const dex::MapItem& map_item, + size_t item_size, + size_t rel_location, + Mapper&& mapper) + : hi_(hi), + item_base_offset_(base::checked_cast<offset_t>(map_item.offset)), + num_items_(base::checked_cast<uint32_t>(map_item.size)), + item_size_(base::checked_cast<uint32_t>(item_size)), + rel_location_(base::checked_cast<uint32_t>(rel_location)), + mapper_(std::move(mapper)) { + static_assert(sizeof(decltype(map_item.offset)) <= sizeof(offset_t), + "map_item.offset too large."); + static_assert(sizeof(decltype(map_item.size)) <= sizeof(offset_t), + "map_item.size too large."); + if (lo < item_base_offset_) { + cur_idx_ = 0; + } else if (lo < OffsetOfIndex(num_items_)) { + cur_idx_ = (lo - item_base_offset_) / item_size_; + // Fine-tune: Advance if |lo| lies beyond the MVI. + if (lo > OffsetOfIndex(cur_idx_) + rel_location_) + ++cur_idx_; + } else { + cur_idx_ = num_items_; + } + } + + // ReferenceReader: + base::Optional<Reference> GetNext() override { + if (cur_idx_ >= num_items_) + return base::nullopt; + + const offset_t item_offset = OffsetOfIndex(cur_idx_); + const offset_t location = item_offset + rel_location_; + // The general check is |location + reference_width > hi_|. However, by + // assumption |hi_| and |lo_| do not straddle the body of a Reference. So + // |reference_width| is unneeded. + if (location >= hi_) + return base::nullopt; + const offset_t target = mapper_.Run(location); + if (target == kInvalidOffset) { + LOG(WARNING) << "Invalid item target at " << AsHex<8>(location) << "."; + return base::nullopt; + } + ++cur_idx_; + return Reference{location, target}; + } + + private: + offset_t OffsetOfIndex(uint32_t idx) { + return base::checked_cast<uint32_t>(item_base_offset_ + idx * item_size_); + } + + const offset_t hi_; + const offset_t item_base_offset_; + const uint32_t num_items_; + const uint32_t item_size_; + const uint32_t rel_location_; + const Mapper mapper_; + offset_t cur_idx_ = 0; +}; + +// Reads an INT index at |location| in |image| and translates the index to the +// offset of a fixed-size item specified by |target_map_item| and +// |target_item_size|. Returns the target offset if valid, or kInvalidOffset +// otherwise. This is compatible with InstructionReferenceReader::Mapper and +// ItemReferenceReader::Mapper. +template <typename INT> +static offset_t ReadTargetIndex(ConstBufferView image, + const dex::MapItem& target_map_item, + size_t target_item_size, + offset_t location) { + static_assert(sizeof(INT) <= sizeof(offset_t), + "INT may not fit into offset_t."); + const offset_t unsafe_idx = image.read<INT>(location); + if (unsafe_idx >= target_map_item.size) + return kInvalidOffset; + return target_map_item.offset + + base::checked_cast<offset_t>(unsafe_idx * target_item_size); +} + +/******** ReferenceWriterAdaptor ********/ + +// A ReferenceWriter that adapts a callback that performs type-specific +// Reference writes. +class ReferenceWriterAdaptor : public ReferenceWriter { + public: + using Writer = base::RepeatingCallback<void(Reference, MutableBufferView)>; + + ReferenceWriterAdaptor(MutableBufferView image, Writer&& writer) + : image_(image), writer_(std::move(writer)) {} + + // ReferenceWriter: + void PutNext(Reference ref) override { writer_.Run(ref, image_); } + + private: + MutableBufferView image_; + Writer writer_; +}; + +// Helper that's compatible with ReferenceWriterAdaptor::Writer. +// Given that |ref.target| points to the start of a fixed size DEX item (e.g., +// FieldIdItem), translates |ref.target| to item index, and writes the result to +// |ref.location| as |INT|. +template <typename INT> +static void WriteTargetIndex(const dex::MapItem& target_map_item, + size_t target_item_size, + Reference ref, + MutableBufferView image) { + const size_t idx = (ref.target - target_map_item.offset) / target_item_size; + // Verify that index is within bound. + DCHECK_LT(idx, target_map_item.size); + // Verify that |ref.target| points to start of item. + DCHECK_EQ(ref.target, target_map_item.offset + idx * target_item_size); + image.write<INT>(ref.location, base::checked_cast<INT>(idx)); +} + +// Buffer for ReadDexHeader() to optionally return results. +struct ReadDexHeaderResults { + BufferSource source; + const dex::HeaderItem* header; + int dex_version; +}; + +// Returns whether |image| points to a DEX file. If this is a possibility and +// |opt_results| is not null, then uses it to pass extracted data to enable +// further parsing. +bool ReadDexHeader(ConstBufferView image, ReadDexHeaderResults* opt_results) { + // This part needs to be fairly efficient since it may be called many times. + BufferSource source(image); + const dex::HeaderItem* header = source.GetPointer<dex::HeaderItem>(); + if (!header) + return false; + if (header->magic[0] != 'd' || header->magic[1] != 'e' || + header->magic[2] != 'x' || header->magic[3] != '\n' || + header->magic[7] != '\0') { + return false; + } + + // Magic matches: More detailed tests can be conducted. + int dex_version = 0; + for (int i = 4; i < 7; ++i) { + if (!isdigit(header->magic[i])) + return false; + dex_version = dex_version * 10 + (header->magic[i] - '0'); + } + + // Only support DEX versions 35 and 37. + // TODO(huangs): Handle version 38. + if (dex_version != 35 && dex_version != 37) + return false; + + if (header->file_size > image.size() || + header->file_size < sizeof(dex::HeaderItem) || + header->map_off < sizeof(dex::HeaderItem)) { + return false; + } + + if (opt_results) + *opt_results = {source, header, dex_version}; + return true; +} + +} // namespace + /******** DisassemblerDex ********/ DisassemblerDex::DisassemblerDex() : Disassembler(4) {} @@ -230,7 +549,322 @@ std::string DisassemblerDex::GetExeTypeString() const { } std::vector<ReferenceGroup> DisassemblerDex::MakeReferenceGroups() const { - return {}; + // Must follow DisassemblerDex::ReferenceType order. Initialized on first use. + return { + {{4, TypeTag(kFieldIdToNameStringId), PoolTag(kStringId)}, + &DisassemblerDex::MakeReadFieldToNameStringId32, + &DisassemblerDex::MakeWriteStringId32}, + {{2, TypeTag(kCodeToStringId16), PoolTag(kStringId)}, + &DisassemblerDex::MakeReadCodeToStringId16, + &DisassemblerDex::MakeWriteStringId16}, + {{4, TypeTag(kCodeToStringId32), PoolTag(kStringId)}, + &DisassemblerDex::MakeReadCodeToStringId32, + &DisassemblerDex::MakeWriteStringId32}, + {{2, TypeTag(kFieldIdToClassTypeId), PoolTag(kTypeId)}, + &DisassemblerDex::MakeReadFieldToClassTypeId16, + &DisassemblerDex::MakeWriteTypeId16}, + {{2, TypeTag(kFieldIdToTypeId), PoolTag(kTypeId)}, + &DisassemblerDex::MakeReadFieldToTypeId16, + &DisassemblerDex::MakeWriteTypeId16}, + {{2, TypeTag(kCodeToTypeId), PoolTag(kTypeId)}, + &DisassemblerDex::MakeReadCodeToTypeId16, + &DisassemblerDex::MakeWriteTypeId16}, + {{2, TypeTag(kCodeToFieldId), PoolTag(kFieldId)}, + &DisassemblerDex::MakeReadCodeToFieldId16, + &DisassemblerDex::MakeWriteFieldId16}, + {{2, TypeTag(kCodeToMethodId), PoolTag(kMethodId)}, + &DisassemblerDex::MakeReadCodeToMethodId16, + &DisassemblerDex::MakeWriteMethodId16}, + {{2, TypeTag(kCodeToRelCode16), PoolTag(kCode)}, + &DisassemblerDex::MakeReadCodeToRelCode16, + &DisassemblerDex::MakeWriteRelCode16}, + {{4, TypeTag(kCodeToRelCode32), PoolTag(kCode)}, + &DisassemblerDex::MakeReadCodeToRelCode32, + &DisassemblerDex::MakeWriteRelCode32}, + {{4, TypeTag(kStringIdToStringData), PoolTag(kStringData)}, + &DisassemblerDex::MakeReadStringIdToStringData, + &DisassemblerDex::MakeWriteAbs32}, + }; +} + +std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadStringIdToStringData( + offset_t lo, + offset_t hi) { + auto mapper = base::BindRepeating( + [](ConstBufferView image, offset_t location) -> offset_t { + const offset_t unsafe_target = + image.read<decltype(dex::StringIdItem::string_data_off)>(location); + // TODO(huangs): Check that |unsafe_target| lies in string data item. + if (unsafe_target >= image.size()) + return kInvalidOffset; + return unsafe_target; + }, + image_); + return std::make_unique<ItemReferenceReader>( + lo, hi, string_map_item_, sizeof(dex::StringIdItem), + offsetof(dex::StringIdItem, string_data_off), std::move(mapper)); +} + +std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadFieldToClassTypeId16( + offset_t lo, + offset_t hi) { + auto mapper = base::BindRepeating( + ReadTargetIndex<decltype(dex::FieldIdItem::class_idx)>, image_, + type_map_item_, sizeof(dex::TypeIdItem)); + return std::make_unique<ItemReferenceReader>( + lo, hi, field_map_item_, sizeof(dex::FieldIdItem), + offsetof(dex::FieldIdItem, class_idx), std::move(mapper)); +} + +std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadFieldToTypeId16( + offset_t lo, + offset_t hi) { + auto mapper = + base::BindRepeating(ReadTargetIndex<decltype(dex::FieldIdItem::type_idx)>, + image_, type_map_item_, sizeof(dex::TypeIdItem)); + return std::make_unique<ItemReferenceReader>( + lo, hi, field_map_item_, sizeof(dex::FieldIdItem), + offsetof(dex::FieldIdItem, type_idx), std::move(mapper)); +} + +std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadFieldToNameStringId32( + offset_t lo, + offset_t hi) { + auto mapper = + base::BindRepeating(ReadTargetIndex<decltype(dex::FieldIdItem::name_idx)>, + image_, string_map_item_, sizeof(dex::StringIdItem)); + return std::make_unique<ItemReferenceReader>( + lo, hi, field_map_item_, sizeof(dex::FieldIdItem), + offsetof(dex::FieldIdItem, name_idx), std::move(mapper)); +} + +std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadCodeToStringId16( + offset_t lo, + offset_t hi) { + auto filter = base::BindRepeating( + [](const InstructionParser::Value& value) -> offset_t { + if (value.instr->format == dex::FormatId::c && + (value.instr->opcode == 0x1A)) { // const-string + // BBBB from e.g., const-string vAA, string@BBBB. + return value.instr_offset + 2; + } + return kInvalidOffset; + }); + auto mapper = + base::BindRepeating(ReadTargetIndex<uint16_t>, image_, string_map_item_, + sizeof(dex::StringIdItem)); + return std::make_unique<InstructionReferenceReader>( + image_, lo, hi, code_item_offsets_, std::move(filter), std::move(mapper)); +} + +std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadCodeToStringId32( + offset_t lo, + offset_t hi) { + auto filter = base::BindRepeating( + [](const InstructionParser::Value& value) -> offset_t { + if (value.instr->format == dex::FormatId::c && + (value.instr->opcode == 0x1B)) { // const-string/jumbo + // BBBBBBBB from e.g., const-string/jumbo vAA, string@BBBBBBBB. + return value.instr_offset + 2; + } + return kInvalidOffset; + }); + auto mapper = + base::BindRepeating(ReadTargetIndex<uint32_t>, image_, string_map_item_, + sizeof(dex::StringIdItem)); + return std::make_unique<InstructionReferenceReader>( + image_, lo, hi, code_item_offsets_, std::move(filter), std::move(mapper)); +} + +std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadCodeToTypeId16( + offset_t lo, + offset_t hi) { + auto filter = base::BindRepeating( + [](const InstructionParser::Value& value) -> offset_t { + if (value.instr->format == dex::FormatId::c && + (value.instr->opcode == 0x1C || // const-class + value.instr->opcode == 0x1F || // check-cast + value.instr->opcode == 0x20 || // instance-of + value.instr->opcode == 0x22 || // new-instance + value.instr->opcode == 0x23 || // new-array + value.instr->opcode == 0x24 || // filled-new-array + value.instr->opcode == 0x25)) { // filled-new-array/range + // BBBB from e.g., const-class vAA, type@BBBB. + return value.instr_offset + 2; + } + return kInvalidOffset; + }); + auto mapper = base::BindRepeating(ReadTargetIndex<uint16_t>, image_, + type_map_item_, sizeof(dex::TypeIdItem)); + return std::make_unique<InstructionReferenceReader>( + image_, lo, hi, code_item_offsets_, std::move(filter), std::move(mapper)); +} + +std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadCodeToFieldId16( + offset_t lo, + offset_t hi) { + auto filter = base::BindRepeating( + [](const InstructionParser::Value& value) -> offset_t { + if (value.instr->format == dex::FormatId::c && + (value.instr->opcode == 0x52 || // iinstanceop (iget-*, iput-*) + value.instr->opcode == 0x60)) { // sstaticop (sget-*, sput-*) + // CCCC from e.g., iget vA, vB, field@CCCC. + return value.instr_offset + 2; + } + return kInvalidOffset; + }); + auto mapper = base::BindRepeating(ReadTargetIndex<uint16_t>, image_, + field_map_item_, sizeof(dex::FieldIdItem)); + return std::make_unique<InstructionReferenceReader>( + image_, lo, hi, code_item_offsets_, std::move(filter), std::move(mapper)); +} + +std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadCodeToMethodId16( + offset_t lo, + offset_t hi) { + auto filter = base::BindRepeating( + [](const InstructionParser::Value& value) -> offset_t { + if (value.instr->format == dex::FormatId::c && + (value.instr->opcode == 0x6E || // invoke-kind + value.instr->opcode == 0x74)) { // invoke-kind/range + // BBBB from e.g., invoke-virtual {vC, vD, vE, vF, vG}, meth@BBBB. + return value.instr_offset + 2; + } + return kInvalidOffset; + }); + auto mapper = + base::BindRepeating(ReadTargetIndex<uint16_t>, image_, method_map_item_, + sizeof(dex::MethodIdItem)); + return std::make_unique<InstructionReferenceReader>( + image_, lo, hi, code_item_offsets_, std::move(filter), std::move(mapper)); +} + +std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadCodeToRelCode16( + offset_t lo, + offset_t hi) { + auto filter = base::BindRepeating( + [](const InstructionParser::Value& value) -> offset_t { + if (value.instr->format == dex::FormatId::t && + (value.instr->opcode == 0x29 || // goto/16 + value.instr->opcode == 0x32 || // if-test + value.instr->opcode == 0x38)) { // if-testz + // +AAAA from e.g., goto/16 +AAAA. + return value.instr_offset + 2; + } + return kInvalidOffset; + }); + auto mapper = base::BindRepeating( + [](DisassemblerDex* dis, offset_t location) { + // Address is relative to the current instruction, which begins 1 unit + // before |location|. This needs to be subtracted out. Also, store as + // int32_t so |unsafe_delta - 1| won't underflow! + int32_t unsafe_delta = dis->image_.read<int16_t>(location); + offset_t unsafe_target = static_cast<offset_t>( + location + (unsafe_delta - 1) * kInstrUnitSize); + // TODO(huangs): Check that |unsafe_target| stays within code item. + return unsafe_target; + }, + base::Unretained(this)); + return std::make_unique<InstructionReferenceReader>( + image_, lo, hi, code_item_offsets_, std::move(filter), std::move(mapper)); +} + +std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadCodeToRelCode32( + offset_t lo, + offset_t hi) { + auto filter = base::BindRepeating( + [](const InstructionParser::Value& value) -> offset_t { + if (value.instr->format == dex::FormatId::t && + (value.instr->opcode == 0x26 || // fill-array-data + value.instr->opcode == 0x2A || // goto/32 + value.instr->opcode == 0x2B || // packed-switch + value.instr->opcode == 0x2C)) { // sparse-switch + // +BBBBBBBB from e.g., fill-array-data vAA, +BBBBBBBB. + // +AAAAAAAA from e.g., goto/32 +AAAAAAAA. + return value.instr_offset + 2; + } + return kInvalidOffset; + }); + auto mapper = base::BindRepeating( + [](DisassemblerDex* dis, offset_t location) { + // Address is relative to the current instruction, which begins 1 unit + // before |location|. This needs to be subtracted out. + int32_t unsafe_delta = dis->image_.read<int32_t>(location); + offset_t unsafe_target = static_cast<offset_t>( + location + (unsafe_delta - 1) * kInstrUnitSize); + // TODO(huangs): Check that |unsafe_target| stays within code item. + return unsafe_target; + }, + base::Unretained(this)); + return std::make_unique<InstructionReferenceReader>( + image_, lo, hi, code_item_offsets_, std::move(filter), std::move(mapper)); +} + +std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteStringId16( + MutableBufferView image) { + auto writer = base::BindRepeating( + WriteTargetIndex<uint16_t>, string_map_item_, sizeof(dex::StringIdItem)); + return std::make_unique<ReferenceWriterAdaptor>(image, std::move(writer)); +} + +std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteStringId32( + MutableBufferView image) { + auto writer = base::BindRepeating( + WriteTargetIndex<uint32_t>, string_map_item_, sizeof(dex::StringIdItem)); + return std::make_unique<ReferenceWriterAdaptor>(image, std::move(writer)); +} + +std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteTypeId16( + MutableBufferView image) { + auto writer = base::BindRepeating(WriteTargetIndex<uint16_t>, type_map_item_, + sizeof(dex::TypeIdItem)); + return std::make_unique<ReferenceWriterAdaptor>(image, std::move(writer)); +} + +std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteFieldId16( + MutableBufferView image) { + auto writer = base::BindRepeating(WriteTargetIndex<uint16_t>, field_map_item_, + sizeof(dex::FieldIdItem)); + return std::make_unique<ReferenceWriterAdaptor>(image, std::move(writer)); +} + +std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteMethodId16( + MutableBufferView image) { + auto writer = base::BindRepeating( + WriteTargetIndex<uint16_t>, method_map_item_, sizeof(dex::MethodIdItem)); + return std::make_unique<ReferenceWriterAdaptor>(image, std::move(writer)); +} + +std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteRelCode16( + MutableBufferView image) { + auto writer = base::BindRepeating([](Reference ref, MutableBufferView image) { + ptrdiff_t byte_diff = static_cast<ptrdiff_t>(ref.target) - ref.location; + DCHECK_EQ(0, byte_diff % kInstrUnitSize); + // |delta| is relative to start of instruction, which is 1 unit before + // |ref.location|. The subtraction above removed too much, so +1 to fix. + ptrdiff_t delta = (byte_diff / kInstrUnitSize) + 1; + image.write<int16_t>(ref.location, base::checked_cast<int16_t>(delta)); + }); + return std::make_unique<ReferenceWriterAdaptor>(image, std::move(writer)); +} + +std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteRelCode32( + MutableBufferView image) { + auto writer = base::BindRepeating([](Reference ref, MutableBufferView image) { + ptrdiff_t byte_diff = static_cast<ptrdiff_t>(ref.target) - ref.location; + DCHECK_EQ(0, byte_diff % kInstrUnitSize); + ptrdiff_t delta = (byte_diff / kInstrUnitSize) + 1; + image.write<int32_t>(ref.location, base::checked_cast<int32_t>(delta)); + }); + return std::make_unique<ReferenceWriterAdaptor>(image, std::move(writer)); +} + +std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteAbs32( + MutableBufferView image) { + auto writer = base::BindRepeating([](Reference ref, MutableBufferView image) { + image.write<uint32_t>(ref.location, ref.target); + }); + return std::make_unique<ReferenceWriterAdaptor>(image, std::move(writer)); } bool DisassemblerDex::Parse(ConstBufferView image) { |