diff options
author | Calder Kitagawa <ckitagawa@chromium.org> | 2018-06-11 13:54:24 +0000 |
---|---|---|
committer | Edward Lesmes <ehmaldonado@google.com> | 2021-07-23 22:50:32 +0000 |
commit | 6b80ac7f4fbd9558e55a335340c23d5d43bbec41 (patch) | |
tree | 56fee58946d1525648432cc29df3636d7844d8fe /disassembler_dex.cc | |
parent | 18379914c9b6ce649f1ce2fbfc76c7bf71d1df9a (diff) | |
download | zucchini-6b80ac7f4fbd9558e55a335340c23d5d43bbec41.tar.gz |
[Zucchini]: Support reference lists in DEX
Adds support for types which contain variable length lists of references
in DEX. These lists take the form: |NTTTTT|NTT|N|NTT|... where N is the
header containing the length and T is a reference body. There are three
types which utilize this format. AnnotationsDirectoryItem also uses a
variant of this format with multiple lists per item (to be added in a
separate CL).
Method:
We pre-cache the offsets of each T within the list using the parser and
iterate over it in the ReferenceReader. This is faster than implicitly
parsing each list and avoids having to handle all the accounting for
the number of lists, items per list, map size, etc. in the
ReferenceReader. The tradeoff is memory which varies by number of types
and annotations but could exceed 1 MB in a very large DEX file. This is
an acceptable cost for the time and simplicity gained. Explicitly
parsing beforehand is also safer as it delegates the validation of DEX
structure to the parser early before any references are read. This is
also inkeeping with the style of the other ReferenceReaders in the file.
Bug: 847571
Change-Id: I853905b10ab7003e87895cc50c5ebf6b9fb4a424
Reviewed-on: https://chromium-review.googlesource.com/1087409
Commit-Queue: Calder Kitagawa <ckitagawa@chromium.org>
Reviewed-by: Samuel Huang <huangs@chromium.org>
Reviewed-by: agrieve <agrieve@chromium.org>
Reviewed-by: Greg Thompson <grt@chromium.org>
Cr-Commit-Position: refs/heads/master@{#565989}
NOKEYCHECK=True
GitOrigin-RevId: 55a60dd9ab731cb569e49ae3600bc42c716d4756
Diffstat (limited to 'disassembler_dex.cc')
-rw-r--r-- | disassembler_dex.cc | 188 |
1 files changed, 176 insertions, 12 deletions
diff --git a/disassembler_dex.cc b/disassembler_dex.cc index a909b29..2fa11f8 100644 --- a/disassembler_dex.cc +++ b/disassembler_dex.cc @@ -44,6 +44,11 @@ static_assert(kDexSentinelIndexAsOffset != kInvalidOffset, // wrecks havoc for base::checked_cast<int16_t>(). constexpr int kInstrUnitSize = static_cast<int>(sizeof(uint16_t)); +// Checks if |offset| is byte aligned to 32 bits or 4 bytes. +bool Is32BitAligned(offset_t offset) { + return offset % 4 == 0; +} + /******** CodeItemParser ********/ // A parser to extract successive code items from a DEX image whose header has @@ -120,7 +125,7 @@ class CodeItemParser { const auto* code_item = source_.GetPointer<const dex::CodeItem>(); if (!code_item) return kInvalidOffset; - DCHECK_EQ(0U, code_item_offset % 4U); + DCHECK(Is32BitAligned(code_item_offset)); // Skip instruction bytes. if (!source_.GetArray<uint16_t>(code_item->insns_size)) @@ -407,17 +412,14 @@ class ItemReferenceReader : public ReferenceReader { // ReferenceReader: base::Optional<Reference> GetNext() override { - while (true) { - if (cur_idx_ >= num_items_) - return base::nullopt; - + while (cur_idx_ < num_items_) { const offset_t item_offset = OffsetOfIndex(cur_idx_); const offset_t location = item_offset + rel_location_; // The general check is |location + reference_width > hi_|. However, by // assumption |hi_| and |lo_| do not straddle the body of a Reference. So // |reference_width| is unneeded. if (location >= hi_) - return base::nullopt; + break; const offset_t target = mapper_.Run(location); // kDexSentinelOffset (0) may appear for the following: @@ -435,11 +437,12 @@ class ItemReferenceReader : public ReferenceReader { if (target == kInvalidOffset) { LOG(WARNING) << "Invalid item target at " << AsHex<8>(location) << "."; - return base::nullopt; + break; } ++cur_idx_; return Reference{location, target}; } + return base::nullopt; } private: @@ -456,11 +459,104 @@ class ItemReferenceReader : public ReferenceReader { offset_t cur_idx_ = 0; }; +// Parses a flattened jagged list of lists of items that looks like: +// NTTT|NTT|NTTTT|N|NTT... +// where |N| is an uint32_t representing the number of items in each sub-list, +// and "T" is a fixed-size item (|item_width|) of type "T". On success, stores +// the offset of each |T| into |reference_list|, and returns true. Otherwise +// (e.g., on finding any structural problem) returns false. +bool ParseItemOffsets(ConstBufferView image, + const dex::MapItem& map_item, + size_t item_width, + std::vector<offset_t>* item_offsets) { + // Sanity check: |image| should at least fit |map_item.size| copies of "N". + if (!image.covers_array(map_item.offset, map_item.size, sizeof(uint32_t))) + return false; + BufferSource source = std::move(BufferSource(image).Skip(map_item.offset)); + item_offsets->clear(); + for (uint32_t i = 0; i < map_item.size; ++i) { + if (!source.AlignOn(image, 4U)) + return false; + uint32_t unsafe_size; + if (!source.GetValue<uint32_t>(&unsafe_size)) + return false; + DCHECK(Is32BitAligned( + base::checked_cast<offset_t>(source.begin() - image.begin()))); + if (!source.covers_array(0, unsafe_size, item_width)) + return false; + for (uint32_t j = 0; j < unsafe_size; ++j) { + item_offsets->push_back( + base::checked_cast<offset_t>(source.begin() - image.begin())); + source.Skip(item_width); + } + } + return true; +} + +/******** CachedItemListReferenceReader ********/ + +// A class that takes sorted |item_offsets|, and emits all member variable of +// interest (MVIs) that fall inside |[lo, hi)|. The MVI of each item has +// location of |rel_location| from item offset, and has target extracted with +// |mapper| (which performs validation). By the "atomicity assumption", [|lo, +// hi)| never cut across an MVI. +class CachedItemListReferenceReader : public ReferenceReader { + public: + // A function that takes an MVI's location and emit its target offset. + using Mapper = base::RepeatingCallback<offset_t(offset_t)>; + + CachedItemListReferenceReader(offset_t lo, + offset_t hi, + uint32_t rel_location, + const std::vector<offset_t>& item_offsets, + Mapper&& mapper) + : hi_(hi), + rel_location_(rel_location), + end_it_(item_offsets.cend()), + mapper_(mapper) { + cur_it_ = std::upper_bound(item_offsets.cbegin(), item_offsets.cend(), lo); + if (cur_it_ != item_offsets.begin() && *(cur_it_ - 1) >= lo) + --cur_it_; + } + + // ReferenceReader: + base::Optional<Reference> GetNext() override { + while (cur_it_ < end_it_) { + const offset_t location = *cur_it_ + rel_location_; + if (location >= hi_) // Check is simplified by atomicity assumption. + break; + const offset_t target = mapper_.Run(location); + if (target == kInvalidOffset) { + LOG(WARNING) << "Invalid item target at " << AsHex<8>(location) << "."; + break; + } + ++cur_it_; + + // kDexSentinelOffset is a sentinel for; + // - AnnotationsDirectoryItem: class_annotations_off + if (target == kDexSentinelOffset) + continue; + return Reference{location, target}; + } + return base::nullopt; + } + + private: + const offset_t hi_; + const uint32_t rel_location_; + const std::vector<offset_t>::const_iterator end_it_; + const Mapper mapper_; + std::vector<offset_t>::const_iterator cur_it_; + + DISALLOW_COPY_AND_ASSIGN(CachedItemListReferenceReader); +}; + // Reads an INT index at |location| in |image| and translates the index to the // offset of a fixed-size item specified by |target_map_item| and // |target_item_size|. Returns the target offset if valid, or kInvalidOffset -// otherwise. This is compatible with InstructionReferenceReader::Mapper and -// ItemReferenceReader::Mapper. +// otherwise. This is compatible with +// CachedReferenceListReferenceReader::Mapper, +// InstructionReferenceReader::Mapper, and ItemReferenceReader::Mapper. template <typename INT> static offset_t ReadTargetIndex(ConstBufferView image, const dex::MapItem& target_map_item, @@ -482,8 +578,8 @@ static offset_t ReadTargetIndex(ConstBufferView image, // Reads uint32_t value in |image| at (valid) |location| and checks whether it // is a safe offset of a fixed-size item. Returns the target offset (possibly a // sentinel) if valid, or kInvalidOffset otherwise. This is compatible with -// InstructionReferenceReader::Mapper, ItemReferenceReader::Mapper, and -// CachedListItemReferenceReader::Mapper. +// CachedReferenceListReferenceReader::Mapper, +// InstructionReferenceReader::Mapper, and ItemReferenceReader::Mapper. static offset_t ReadTargetOffset32(ConstBufferView image, offset_t location) { const offset_t unsafe_target = static_cast<offset_t>(image.read<uint32_t>(location)); @@ -644,6 +740,9 @@ std::vector<ReferenceGroup> DisassemblerDex::MakeReferenceGroups() const { {{4, TypeTag(kClassDefToSuperClassTypeId), PoolTag(kTypeId)}, &DisassemblerDex::MakeReadClassDefToSuperClassTypeId32, &DisassemblerDex::MakeWriteTypeId32}, + {{2, TypeTag(kTypeListToTypeId), PoolTag(kTypeId)}, + &DisassemblerDex::MakeReadTypeListToTypeId16, + &DisassemblerDex::MakeWriteTypeId16}, {{2, TypeTag(kCodeToTypeId), PoolTag(kTypeId)}, &DisassemblerDex::MakeReadCodeToTypeId16, &DisassemblerDex::MakeWriteTypeId16}, @@ -662,6 +761,10 @@ std::vector<ReferenceGroup> DisassemblerDex::MakeReferenceGroups() const { {{4, TypeTag(kClassDefToInterfacesTypeList), PoolTag(kTypeList)}, &DisassemblerDex::MakeReadClassDefToInterfacesTypeList, &DisassemblerDex::MakeWriteAbs32}, + {{4, TypeTag(kAnnotationSetRefListToAnnotationSet), + PoolTag(kAnnotionSet)}, + &DisassemblerDex::MakeReadAnnotationSetRefListToAnnotationSet, + &DisassemblerDex::MakeWriteAbs32}, {{4, TypeTag(kClassDefToClassData), PoolTag(kClassData)}, &DisassemblerDex::MakeReadClassDefToClassData, &DisassemblerDex::MakeWriteAbs32}, @@ -677,6 +780,9 @@ std::vector<ReferenceGroup> DisassemblerDex::MakeReferenceGroups() const { {{4, TypeTag(kStringIdToStringData), PoolTag(kStringData)}, &DisassemblerDex::MakeReadStringIdToStringData, &DisassemblerDex::MakeWriteAbs32}, + {{4, TypeTag(kAnnotationSetToAnnotation), PoolTag(kAnnotation)}, + &DisassemblerDex::MakeReadAnnotationSetToAnnotation, + &DisassemblerDex::MakeWriteAbs32}, {{4, TypeTag(kClassDefToStaticValuesEncodedArray), PoolTag(kEncodedArray)}, &DisassemblerDex::MakeReadClassDefToStaticValuesEncodedArray, @@ -874,6 +980,36 @@ DisassemblerDex::MakeReadClassDefToStaticValuesEncodedArray(offset_t lo, offsetof(dex::ClassDefItem, static_values_off), std::move(mapper)); } +std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadTypeListToTypeId16( + offset_t lo, + offset_t hi) { + auto mapper = + base::BindRepeating(ReadTargetIndex<decltype(dex::TypeItem::type_idx)>, + image_, type_map_item_, sizeof(dex::TypeIdItem)); + return std::make_unique<CachedItemListReferenceReader>( + lo, hi, offsetof(dex::TypeItem, type_idx), type_list_offsets_, + std::move(mapper)); +} + +std::unique_ptr<ReferenceReader> +DisassemblerDex::MakeReadAnnotationSetToAnnotation(offset_t lo, offset_t hi) { + // dex::AnnotationOffItem::annotation_off mapper. + auto mapper = base::BindRepeating(ReadTargetOffset32, image_); + return std::make_unique<CachedItemListReferenceReader>( + lo, hi, offsetof(dex::AnnotationOffItem, annotation_off), + annotation_set_offsets_, std::move(mapper)); +} + +std::unique_ptr<ReferenceReader> +DisassemblerDex::MakeReadAnnotationSetRefListToAnnotationSet(offset_t lo, + offset_t hi) { + // dex::AnnotationSetRefItem::annotations_off mapper. + auto mapper = base::BindRepeating(ReadTargetOffset32, image_); + return std::make_unique<CachedItemListReferenceReader>( + lo, hi, offsetof(dex::AnnotationSetRefItem, annotations_off), + annotation_set_ref_list_offsets_, std::move(mapper)); +} + std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadCodeToStringId16( offset_t lo, offset_t hi) { @@ -1207,7 +1343,7 @@ bool DisassemblerDex::ParseHeader() { std::set<uint16_t> required_item_types = { dex::kTypeStringIdItem, dex::kTypeTypeIdItem, dex::kTypeProtoIdItem, dex::kTypeFieldIdItem, dex::kTypeMethodIdItem, dex::kTypeClassDefItem, - dex::kTypeCodeItem, + dex::kTypeTypeList, dex::kTypeCodeItem, }; for (offset_t i = 0; i < list_size; ++i) { const dex::MapItem* item = &item_list[i]; @@ -1230,8 +1366,36 @@ bool DisassemblerDex::ParseHeader() { field_map_item_ = *map_item_map_[dex::kTypeFieldIdItem]; method_map_item_ = *map_item_map_[dex::kTypeMethodIdItem]; class_def_map_item_ = *map_item_map_[dex::kTypeClassDefItem]; + type_list_map_item_ = *map_item_map_[dex::kTypeTypeList]; code_map_item_ = *map_item_map_[dex::kTypeCodeItem]; + // The following types are optional and may not be present in every DEX file. + if (map_item_map_.count(dex::kTypeAnnotationSetRefList)) { + annotation_set_ref_list_map_item_ = + *map_item_map_[dex::kTypeAnnotationSetRefList]; + } + if (map_item_map_.count(dex::kTypeAnnotationSetItem)) + annotation_set_map_item_ = *map_item_map_[dex::kTypeAnnotationSetItem]; + + // Iteratively extract variable length lists. Any failure would indicate + // invalid DEX. Success indicates that no structural problem is found. + // However, contained references data read from parsed items still require + // validation. + if (!ParseItemOffsets(image_, type_list_map_item_, sizeof(dex::TypeItem), + &type_list_offsets_)) { + return false; + } + if (!ParseItemOffsets(image_, annotation_set_ref_list_map_item_, + sizeof(dex::AnnotationSetRefItem), + &annotation_set_ref_list_offsets_)) { + return false; + } + if (!ParseItemOffsets(image_, annotation_set_map_item_, + sizeof(dex::AnnotationOffItem), + &annotation_set_offsets_)) { + return false; + } + // Iteratively extract variable-length code items blocks. Any failure would // indicate invalid DEX. Success indicates that no structural problem is // found. However, contained instructions still need validation on use. |