// Copyright 2017 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #ifndef COMPONENTS_ZUCCHINI_ADDRESS_TRANSLATOR_H_ #define COMPONENTS_ZUCCHINI_ADDRESS_TRANSLATOR_H_ #include #include #include #include "components/zucchini/algorithm.h" #include "components/zucchini/image_utils.h" namespace zucchini { // There are several ways to reason about addresses in an image: // - Offset: Position relative to start of image. // - VA (Virtual Address): Virtual memory address of a loaded image. This is // subject to relocation by the OS. // - RVA (Relative Virtual Address): VA relative to some base address. This is // the preferred way to specify pointers in an image. // // Zucchini is primarily concerned with offsets and RVAs. Executable images like // PE and ELF are organized into sections. Each section specifies offset and RVA // ranges as: // {Offset start, offset size, RVA start, RVA size}. // This constitutes a basic unit to translate between offsets and RVAs. Note: // |offset size| < |RVA size| is possible. For example, the .bss section can can // have zero-filled statically-allocated data that have no corresponding bytes // on image (to save space). This poses a problem for Zucchini, which stores // addresses as offsets: now we'd have "dangling RVAs" that don't map to // offsets! Some ways to handling this are: // 1. Ignore all dangling RVAs. This simplifies the algorithm, but also means // some reference targets would escape detection and processing. // 2. Create distinct "fake offsets" to accommodate dangling RVAs. Image data // must not be read on these fake offsets, which are only valid as target // addresses for reference matching. // As for |RVA size| < |offset size|, the extra portion just gets ignored. // // Status: Zucchini implements (2) in a simple way: dangling RVAs are mapped to // fake offsets by adding a large value. This value can be chosen as an // exclusive upper bound of all offsets (i.e., image size). This allows them to // be easily detected and processed as a special-case. // TODO(huangs): Investigate option (1), now that the refactored code makes // experimentation easier. // TODO(huangs): Make AddressTranslator smarter: Allocate unused |offset_t| // ranges and create "fake" units to accommodate dangling RVAs. Then // AddressTranslator can be simplified. // Virtual Address relative to some base address (RVA). There's distinction // between "valid RVA" and "existent RVA": // - Valid RVA: An RVA that's reasonably small, i.e., below |kRvaBound|. // - Existent RVA: An RVA that has semantic meaning in an image, and may // translate to an offset in an image or (if a dangling RVA) a fake offset. // All existent RVAs are valid RVAs. using rva_t = uint32_t; // Divide by 2 to match |kOffsetBound|. constexpr rva_t kRvaBound = static_cast(-1) / 2; constexpr rva_t kInvalidRva = static_cast(-2); // A utility to translate between offsets and RVAs in an image. class AddressTranslator { public: // A basic unit for address translation, roughly maps to a section, but may // be processed (e.g., merged) as an optimization. struct Unit { offset_t offset_end() const { return offset_begin + offset_size; } rva_t rva_end() const { return rva_begin + rva_size; } bool IsEmpty() const { // |rva_size == 0| and |offset_size > 0| means Unit hasn't been trimmed // yet, and once it is then it's empty. // |rva_size > 0| and |offset_size == 0| means Unit has dangling RVA, but // is not empty. return rva_size == 0; } bool CoversOffset(offset_t offset) const { return RangeCovers(offset_begin, offset_size, offset); } bool CoversRva(rva_t rva) const { return RangeCovers(rva_begin, rva_size, rva); } bool CoversDanglingRva(rva_t rva) const { return CoversRva(rva) && rva - rva_begin >= offset_size; } // Assumes valid |offset| (*cannot* be fake offset). rva_t OffsetToRvaUnsafe(offset_t offset) const { return offset - offset_begin + rva_begin; } // Assumes valid |rva| (*can* be danging RVA). offset_t RvaToOffsetUnsafe(rva_t rva, offset_t fake_offset_begin) const { rva_t delta = rva - rva_begin; return delta < offset_size ? delta + offset_begin : fake_offset_begin + rva; } bool HasDanglingRva() const { return rva_size > offset_size; } friend bool operator==(const Unit& a, const Unit& b) { return std::tie(a.offset_begin, a.offset_size, a.rva_begin, a.rva_size) == std::tie(b.offset_begin, b.offset_size, b.rva_begin, b.rva_size); } offset_t offset_begin; offset_t offset_size; rva_t rva_begin; rva_t rva_size; }; // An adaptor for AddressTranslator::OffsetToRva() that caches the last Unit // found, to reduce the number of OffsetToUnit() calls for clustered queries. class OffsetToRvaCache { public: // Embeds |translator| for use. Now object lifetime is tied to |translator| // lifetime. explicit OffsetToRvaCache(const AddressTranslator& translator); OffsetToRvaCache(const OffsetToRvaCache&) = delete; const OffsetToRvaCache& operator=(const OffsetToRvaCache&) = delete; rva_t Convert(offset_t offset) const; private: const AddressTranslator& translator_; mutable const AddressTranslator::Unit* cached_unit_ = nullptr; }; // An adaptor for AddressTranslator::RvaToOffset() that caches the last Unit // found, to reduce the number of RvaToUnit() calls for clustered queries. class RvaToOffsetCache { public: // Embeds |translator| for use. Now object lifetime is tied to |translator| // lifetime. explicit RvaToOffsetCache(const AddressTranslator& translator); RvaToOffsetCache(const RvaToOffsetCache&) = delete; const RvaToOffsetCache& operator=(const RvaToOffsetCache&) = delete; bool IsValid(rva_t rva) const; offset_t Convert(rva_t rva) const; private: const AddressTranslator& translator_; mutable const AddressTranslator::Unit* cached_unit_ = nullptr; }; enum Status { kSuccess = 0, kErrorOverflow, kErrorBadOverlap, kErrorBadOverlapDanglingRva, kErrorFakeOffsetBeginTooLarge, }; AddressTranslator(); AddressTranslator(AddressTranslator&&); AddressTranslator(const AddressTranslator&) = delete; const AddressTranslator& operator=(const AddressTranslator&) = delete; ~AddressTranslator(); // Consumes |units| to populate data in this class. Performs consistency // checks and overlapping Units. Returns Status to indicate success. Status Initialize(std::vector&& units); // Returns the (possibly dangling) RVA corresponding to |offset|, or // kInvalidRva if not found. rva_t OffsetToRva(offset_t offset) const; // Returns the (possibly fake) offset corresponding to |rva|, or // kInvalidOffset if not found (i.e., |rva| is non-existent). offset_t RvaToOffset(rva_t rva) const; // For testing. offset_t fake_offset_begin() const { return fake_offset_begin_; } const std::vector& units_sorted_by_offset() const { return units_sorted_by_offset_; } const std::vector& units_sorted_by_rva() const { return units_sorted_by_rva_; } private: // Helper to find the Unit that contains given |offset| or |rva|. Returns null // if not found. const Unit* OffsetToUnit(offset_t offset) const; const Unit* RvaToUnit(rva_t rva) const; // Storage of Units. All offset ranges are non-empty and disjoint. Likewise // for all RVA ranges. std::vector units_sorted_by_offset_; std::vector units_sorted_by_rva_; // Conversion factor to translate between dangling RVAs and fake offsets. offset_t fake_offset_begin_; }; } // namespace zucchini #endif // COMPONENTS_ZUCCHINI_ADDRESS_TRANSLATOR_H_