diff options
author | Etienne Pierre-doray <etiennep@chromium.org> | 2018-08-10 17:44:37 +0000 |
---|---|---|
committer | Copybara-Service <copybara-worker@google.com> | 2021-07-25 20:34:00 -0700 |
commit | e57c4e6bb4c122686c16f40e0b9d50a2e683d42b (patch) | |
tree | 2cf002c3499a23f698d92eb0ae2b51bfe9bc1606 /disassembler_elf.cc | |
parent | a88cad0485f1c73d63ba0a1bcfccc8a68bd300c6 (diff) | |
download | zucchini-e57c4e6bb4c122686c16f40e0b9d50a2e683d42b.tar.gz |
[Zucchini] Create elf disassembler.
Creates Disassembler that recognises and parses ELF format. For now, it only supports Intel architeture. Support for Arm will be added in follow-up CLs.
Change-Id: Ibdcf113b573f22844b6a1611c5ff6df46829b9b3
Reviewed-on: https://chromium-review.googlesource.com/1136841
Commit-Queue: Etienne Pierre-Doray <etiennep@chromium.org>
Reviewed-by: Greg Thompson <grt@chromium.org>
Reviewed-by: Samuel Huang <huangs@chromium.org>
Cr-Commit-Position: refs/heads/master@{#582233}
NOKEYCHECK=True
GitOrigin-RevId: 3c64e078fea9f23e44939c25ca02cf05b72b2c40
Diffstat (limited to 'disassembler_elf.cc')
-rw-r--r-- | disassembler_elf.cc | 424 |
1 files changed, 424 insertions, 0 deletions
diff --git a/disassembler_elf.cc b/disassembler_elf.cc new file mode 100644 index 0000000..75690e9 --- /dev/null +++ b/disassembler_elf.cc @@ -0,0 +1,424 @@ +// Copyright 2018 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/disassembler_elf.h" + +#include <stddef.h> + +#include <algorithm> +#include <utility> + +#include "base/logging.h" +#include "base/numerics/safe_conversions.h" +#include "components/zucchini/abs32_utils.h" +#include "components/zucchini/algorithm.h" +#include "components/zucchini/buffer_source.h" + +namespace zucchini { + +namespace { + +// Determines whether |section| is a reloc section. +template <class Traits> +bool IsRelocSection(const typename Traits::Elf_Shdr& section) { + if (section.sh_size == 0) + return false; + if (section.sh_type == elf::SHT_REL) { + // Also validate |section.sh_entsize|, which gets used later. + return section.sh_entsize == sizeof(typename Traits::Elf_Rel); + } + if (section.sh_type == elf::SHT_RELA) + return section.sh_entsize == sizeof(typename Traits::Elf_Rela); + return false; +} + +// Determines whether |section| is a section with executable code. +template <class Traits> +bool IsExecSection(const typename Traits::Elf_Shdr& section) { + return (section.sh_flags & elf::SHF_EXECINSTR) != 0; +} + +} // namespace + +/******** ELF32Traits ********/ + +// static +constexpr Bitness Elf32Traits::kBitness; +constexpr elf::FileClass Elf32Traits::kIdentificationClass; + +/******** ELF32IntelTraits ********/ + +// static +constexpr ExecutableType Elf32IntelTraits::kExeType; +const char Elf32IntelTraits::kExeTypeString[] = "ELF x86"; +constexpr elf::MachineArchitecture Elf32IntelTraits::kMachineValue; +constexpr uint32_t Elf32IntelTraits::kRelType; + +/******** ELF64Traits ********/ + +// static +constexpr Bitness Elf64Traits::kBitness; +constexpr elf::FileClass Elf64Traits::kIdentificationClass; + +/******** ELF64IntelTraits ********/ + +// static +constexpr ExecutableType Elf64IntelTraits::kExeType; +const char Elf64IntelTraits::kExeTypeString[] = "ELF x64"; +constexpr elf::MachineArchitecture Elf64IntelTraits::kMachineValue; +constexpr uint32_t Elf64IntelTraits::kRelType; + +/******** DisassemblerElf ********/ + +// static. +template <class Traits> +bool DisassemblerElf<Traits>::QuickDetect(ConstBufferView image) { + BufferSource source(image); + + // Do not consume the bytes for the magic value, as they are part of the + // header. + if (!source.CheckNextBytes({0x7F, 'E', 'L', 'F'})) + return false; + + auto* header = source.GetPointer<typename Traits::Elf_Ehdr>(); + if (!header) + return false; + + if (header->e_ident[elf::EI_CLASS] != Traits::kIdentificationClass) + return false; + + if (header->e_ident[elf::EI_DATA] != 1) // Only ELFDATA2LSB is supported. + return false; + + if (header->e_type != elf::ET_EXEC && header->e_type != elf::ET_DYN) + return false; + + if (header->e_version != 1 || header->e_ident[elf::EI_VERSION] != 1) + return false; + + if (header->e_machine != supported_architecture()) + return false; + + if (header->e_shentsize != sizeof(typename Traits::Elf_Shdr)) + return false; + + return true; +} + +template <class Traits> +DisassemblerElf<Traits>::~DisassemblerElf() = default; + +template <class Traits> +ExecutableType DisassemblerElf<Traits>::GetExeType() const { + return Traits::kExeType; +} + +template <class Traits> +std::string DisassemblerElf<Traits>::GetExeTypeString() const { + return Traits::kExeTypeString; +} + +// |num_equivalence_iterations_| = 2 for reloc -> abs32. +template <class Traits> +DisassemblerElf<Traits>::DisassemblerElf() : Disassembler(2) {} + +template <class Traits> +bool DisassemblerElf<Traits>::Parse(ConstBufferView image) { + image_ = image; + if (!ParseHeader()) + return false; + ParseSections(); + return true; +} + +template <class Traits> +std::unique_ptr<ReferenceReader> DisassemblerElf<Traits>::MakeReadRelocs( + offset_t lo, + offset_t hi) { + DCHECK_LE(lo, hi); + DCHECK_LE(hi, image_.size()); + + if (reloc_section_dims_.empty()) + return std::make_unique<EmptyReferenceReader>(); + + return std::make_unique<RelocReaderElf>( + image_, Traits::kBitness, reloc_section_dims_, + supported_relocation_type(), lo, hi, translator_); +} + +template <class Traits> +std::unique_ptr<ReferenceWriter> DisassemblerElf<Traits>::MakeWriteRelocs( + MutableBufferView image) { + return std::make_unique<RelocWriterElf>(image, Traits::kBitness, translator_); +} + +template <class Traits> +std::unique_ptr<ReferenceReader> DisassemblerElf<Traits>::MakeReadAbs32( + offset_t lo, + offset_t hi) { + Abs32RvaExtractorWin32 abs_rva_extractor(image_, {Traits::kBitness, 0}, + abs32_locations_, lo, hi); + return std::make_unique<Abs32ReaderWin32>(std::move(abs_rva_extractor), + translator_); +} + +template <class Traits> +std::unique_ptr<ReferenceWriter> DisassemblerElf<Traits>::MakeWriteAbs32( + MutableBufferView image) { + return std::make_unique<Abs32WriterWin32>( + image, AbsoluteAddress(Traits::kBitness, 0), translator_); +} + +template <class Traits> +bool DisassemblerElf<Traits>::ParseHeader() { + BufferSource source(image_); + + // Ensures |header_| is valid later on. + if (!QuickDetect(image_)) + return false; + + header_ = source.GetPointer<typename Traits::Elf_Ehdr>(); + + sections_count_ = header_->e_shnum; + source = std::move(BufferSource(image_).Skip(header_->e_shoff)); + sections_ = source.GetArray<typename Traits::Elf_Shdr>(sections_count_); + if (!sections_) + return false; + offset_t section_table_end = + base::checked_cast<offset_t>(source.begin() - image_.begin()); + + segments_count_ = header_->e_phnum; + source = std::move(BufferSource(image_).Skip(header_->e_phoff)); + segments_ = source.GetArray<typename Traits::Elf_Phdr>(segments_count_); + if (!segments_) + return false; + offset_t segment_table_end = + base::checked_cast<offset_t>(source.begin() - image_.begin()); + + // Check string section -- even though we've stopped using them. + elf::Elf32_Half string_section_id = header_->e_shstrndx; + if (string_section_id >= sections_count_) + return false; + size_t section_names_size = sections_[string_section_id].sh_size; + if (section_names_size > 0) { + // If nonempty, then last byte of string section must be null. + const char* section_names = nullptr; + source = std::move( + BufferSource(image_).Skip(sections_[string_section_id].sh_offset)); + section_names = source.GetArray<char>(section_names_size); + if (!section_names || section_names[section_names_size - 1] != '\0') + return false; + } + + // Establish bound on encountered offsets. + offset_t offset_bound = std::max(section_table_end, segment_table_end); + + // Visit each section, validate, and add address translation data to |units|. + std::vector<AddressTranslator::Unit> units; + units.reserve(sections_count_); + + for (int i = 0; i < sections_count_; ++i) { + const typename Traits::Elf_Shdr* section = §ions_[i]; + + // Skip empty sections. These don't affect |offset_bound|, and don't + // contribute to RVA-offset mapping. + if (section->sh_size == 0) + continue; + + // Be lax with RVAs: Assume they fit in int32_t, even for 64-bit. If + // assumption fails, simply skip the section with warning. + if (!RangeIsBounded(section->sh_addr, section->sh_size, kRvaBound) || + !RangeIsBounded(section->sh_offset, section->sh_size, kOffsetBound)) { + LOG(WARNING) << "Section " << i << " does not fit in int32_t."; + continue; + } + + // Extract dimensions to 32-bit integers to facilitate conversion. Range of + // values was ensured above when checking that the section is bounded. + uint32_t sh_size = base::checked_cast<uint32_t>(section->sh_size); + offset_t sh_offset = base::checked_cast<offset_t>(section->sh_offset); + rva_t sh_addr = base::checked_cast<rva_t>(section->sh_addr); + + // Update |offset_bound|. + if (section->sh_type != elf::SHT_NOBITS) { + // Be strict with offsets: Any size overflow invalidates the file. + if (!image_.covers({sh_offset, sh_size})) + return false; + + offset_t section_end = sh_offset + sh_size; + offset_bound = std::max(offset_bound, section_end); + } + + // Compute mappings to translate between RVA and offset. As a heuristic, + // sections with RVA == 0 (i.e., |sh_addr == 0|) are ignored because these + // tend to be duplicates (which cause problems during lookup), and tend to + // be uninteresting. + if (section->sh_addr > 0) { + // Add |section| data for offset-RVA translation. + units.push_back({sh_offset, sh_size, sh_addr, sh_size}); + } + } + + // Initialize |translator_| for offset-RVA translations. Any inconsistency + // (e.g., 2 offsets correspond to the same RVA) would invalidate the ELF file. + if (translator_.Initialize(std::move(units)) != AddressTranslator::kSuccess) + return false; + + // Visits |segments_| to get better estimate on |offset_bound|. + for (const typename Traits::Elf_Phdr* segment = segments_; + segment != segments_ + segments_count_; ++segment) { + if (!RangeIsBounded(segment->p_offset, segment->p_filesz, kOffsetBound)) + return false; + offset_t segment_end = segment->p_offset + segment->p_filesz; + offset_bound = std::max(offset_bound, segment_end); + } + + if (offset_bound > image_.size()) + return false; + image_.shrink(offset_bound); + + return true; +} + +template <class Traits> +void DisassemblerElf<Traits>::ExtractInterestingSectionHeaders() { + DCHECK(reloc_section_dims_.empty()); + DCHECK(exec_headers_.empty()); + for (elf::Elf32_Half i = 0; i < sections_count_; ++i) { + const typename Traits::Elf_Shdr* section = sections_ + i; + if (IsRelocSection<Traits>(*section)) + reloc_section_dims_.emplace_back(*section); + else if (IsExecSection<Traits>(*section)) + exec_headers_.push_back(section); + } + auto comp = [](const typename Traits::Elf_Shdr* a, + const typename Traits::Elf_Shdr* b) { + return a->sh_offset < b->sh_offset; + }; + std::sort(reloc_section_dims_.begin(), reloc_section_dims_.end()); + std::sort(exec_headers_.begin(), exec_headers_.end(), comp); +} + +template <class Traits> +void DisassemblerElf<Traits>::GetAbs32FromRelocSections() { + constexpr int kAbs32Width = 4; + DCHECK(abs32_locations_.empty()); + auto relocs = MakeReadRelocs(0, offset_t(size())); + for (auto ref = relocs->GetNext(); ref; ref = relocs->GetNext()) { + // Reject null targets and targets outside |image_|. Note that here we + // assume abs32 targets are never "fake offsets". + if (ref->target > 0 && image_.covers({ref->target, kAbs32Width})) + abs32_locations_.push_back(ref->target); + } + abs32_locations_.shrink_to_fit(); + std::sort(abs32_locations_.begin(), abs32_locations_.end()); + + // Abs32 reference bodies must not overlap. If found, simply remove them. + size_t num_removed = + RemoveOverlappingAbs32Locations(Traits::kBitness, &abs32_locations_); + if (num_removed) { + LOG(WARNING) << "Warning: Found and removed " << num_removed + << " abs32 locations with overlapping bodies."; + } +} + +template <class Traits> +void DisassemblerElf<Traits>::GetRel32FromCodeSections() { + for (const typename Traits::Elf_Shdr* section : exec_headers_) + ParseExecSection(*section); + PostProcessRel32(); +} + +template <class Traits> +void DisassemblerElf<Traits>::ParseSections() { + ExtractInterestingSectionHeaders(); + GetAbs32FromRelocSections(); + GetRel32FromCodeSections(); +} + +/******** DisassemblerElfIntel ********/ + +template <class Traits> +DisassemblerElfIntel<Traits>::DisassemblerElfIntel() = default; + +template <class Traits> +DisassemblerElfIntel<Traits>::~DisassemblerElfIntel() = default; + +template <class Traits> +std::vector<ReferenceGroup> DisassemblerElfIntel<Traits>::MakeReferenceGroups() + const { + return {{ReferenceTypeTraits{4, TypeTag(kReloc), PoolTag(kReloc)}, + &DisassemblerElfIntel<Traits>::MakeReadRelocs, + &DisassemblerElfIntel<Traits>::MakeWriteRelocs}, + {ReferenceTypeTraits{4, TypeTag(kAbs32), PoolTag(kAbs32)}, + &DisassemblerElfIntel<Traits>::MakeReadAbs32, + &DisassemblerElfIntel<Traits>::MakeWriteAbs32}, + {ReferenceTypeTraits{4, TypeTag(kRel32), PoolTag(kRel32)}, + &DisassemblerElfIntel<Traits>::MakeReadRel32, + &DisassemblerElfIntel<Traits>::MakeWriteRel32}}; +} + +template <class Traits> +void DisassemblerElfIntel<Traits>::ParseExecSection( + const typename Traits::Elf_Shdr& section) { + ConstBufferView& image_ = this->image_; + auto& abs32_locations_ = this->abs32_locations_; + + std::ptrdiff_t from_offset_to_rva = section.sh_addr - section.sh_offset; + rva_t start_rva = section.sh_addr; + rva_t end_rva = start_rva + section.sh_size; + + AddressTranslator::RvaToOffsetCache target_rva_checker(this->translator_); + + ConstBufferView region(image_.begin() + section.sh_offset, section.sh_size); + Abs32GapFinder gap_finder(image_, region, abs32_locations_, 4); + std::unique_ptr<Rel32FinderIntel> finder = + std::make_unique<typename Traits::Rel32FinderUse>(image_); + for (auto gap = gap_finder.GetNext(); gap.has_value(); + gap = gap_finder.GetNext()) { + finder->Reset(gap.value()); + for (auto rel32 = finder->GetNext(); rel32.has_value(); + rel32 = finder->GetNext()) { + offset_t rel32_offset = offset_t(rel32->location - image_.begin()); + rva_t rel32_rva = rva_t(rel32_offset + from_offset_to_rva); + rva_t target_rva = rel32_rva + 4 + image_.read<uint32_t>(rel32_offset); + if (target_rva_checker.IsValid(target_rva) && + (rel32->can_point_outside_section || + (start_rva <= target_rva && target_rva < end_rva))) { + finder->Accept(); + rel32_locations_.push_back(rel32_offset); + } + } + } +} + +template <class Traits> +void DisassemblerElfIntel<Traits>::PostProcessRel32() { + rel32_locations_.shrink_to_fit(); + std::sort(rel32_locations_.begin(), rel32_locations_.end()); +} + +template <class Traits> +std::unique_ptr<ReferenceReader> DisassemblerElfIntel<Traits>::MakeReadRel32( + offset_t lo, + offset_t hi) { + return std::make_unique<Rel32ReaderX86>(this->image_, lo, hi, + &rel32_locations_, this->translator_); +} + +template <class Traits> +std::unique_ptr<ReferenceWriter> DisassemblerElfIntel<Traits>::MakeWriteRel32( + MutableBufferView image) { + return std::make_unique<Rel32WriterX86>(image, this->translator_); +} + +// Explicit instantiation for supported classes. +template class DisassemblerElfIntel<Elf32IntelTraits>; +template class DisassemblerElfIntel<Elf64IntelTraits>; +template bool DisassemblerElf<Elf32IntelTraits>::QuickDetect( + ConstBufferView image); +template bool DisassemblerElf<Elf64IntelTraits>::QuickDetect( + ConstBufferView image); + +} // namespace zucchini |