// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -*- mode: C++ -*- // // Copyright 2020-2022 Google LLC // // Licensed under the Apache License v2.0 with LLVM Exceptions (the // "License"); you may not use this file except in compliance with the // License. You may obtain a copy of the License at // // https://llvm.org/LICENSE.txt // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // // Author: Maria Teguiani // Author: Giuliano Procida // Author: Aleksei Vetrov #include "elf_loader.h" #include #include #include #include #include #include #include #include #include #include #include #include #include "error.h" #include "graph.h" namespace stg { namespace elf { namespace { SymbolTableEntry::SymbolType ParseSymbolType(unsigned char symbol_type) { switch (symbol_type) { case STT_NOTYPE: return SymbolTableEntry::SymbolType::NOTYPE; case STT_OBJECT: return SymbolTableEntry::SymbolType::OBJECT; case STT_FUNC: return SymbolTableEntry::SymbolType::FUNCTION; case STT_SECTION: return SymbolTableEntry::SymbolType::SECTION; case STT_FILE: return SymbolTableEntry::SymbolType::FILE; case STT_COMMON: return SymbolTableEntry::SymbolType::COMMON; case STT_TLS: return SymbolTableEntry::SymbolType::TLS; case STT_GNU_IFUNC: return SymbolTableEntry::SymbolType::GNU_IFUNC; default: Die() << "Unknown ELF symbol type: " << symbol_type; } } SymbolTableEntry::Binding ParseSymbolBinding(unsigned char binding) { switch (binding) { case STB_LOCAL: return SymbolTableEntry::Binding::LOCAL; case STB_GLOBAL: return SymbolTableEntry::Binding::GLOBAL; case STB_WEAK: return SymbolTableEntry::Binding::WEAK; case STB_GNU_UNIQUE: return SymbolTableEntry::Binding::GNU_UNIQUE; default: Die() << "Unknown ELF symbol binding: " << binding; } } SymbolTableEntry::Visibility ParseSymbolVisibility(unsigned char visibility) { switch (visibility) { case STV_DEFAULT: return SymbolTableEntry::Visibility::DEFAULT; case STV_INTERNAL: return SymbolTableEntry::Visibility::INTERNAL; case STV_HIDDEN: return SymbolTableEntry::Visibility::HIDDEN; case STV_PROTECTED: return SymbolTableEntry::Visibility::PROTECTED; default: Die() << "Unknown ELF symbol visibility: " << visibility; } } SymbolTableEntry::ValueType ParseSymbolValueType(Elf64_Section section_index) { switch (section_index) { case SHN_UNDEF: return SymbolTableEntry::ValueType::UNDEFINED; case SHN_ABS: return SymbolTableEntry::ValueType::ABSOLUTE; case SHN_COMMON: return SymbolTableEntry::ValueType::COMMON; default: return SymbolTableEntry::ValueType::RELATIVE_TO_SECTION; } } std::string ElfHeaderTypeToString(unsigned char elf_header_type) { switch (elf_header_type) { case ET_NONE: return "none"; case ET_REL: return "relocatable"; case ET_EXEC: return "executable"; case ET_DYN: return "shared object"; case ET_CORE: return "coredump"; default: return "unknown (type = " + std::to_string(elf_header_type) + ')'; } } std::string ElfSectionTypeToString(Elf64_Word elf_section_type) { switch (elf_section_type) { case SHT_SYMTAB: return "symtab"; case SHT_DYNSYM: return "dynsym"; case SHT_GNU_verdef: return "GNU_verdef"; case SHT_GNU_verneed: return "GNU_verneed"; case SHT_GNU_versym: return "GNU_versym"; default: return "unknown (type = " + std::to_string(elf_section_type) + ')'; } } GElf_Half GetMachine(Elf* elf) { GElf_Ehdr header; Check(gelf_getehdr(elf, &header) != nullptr) << "could not get ELF header"; return header.e_machine; } void AdjustAddress(GElf_Half machine, SymbolTableEntry& entry) { if (machine == EM_ARM) { if (entry.symbol_type == SymbolTableEntry::SymbolType::FUNCTION || entry.symbol_type == SymbolTableEntry::SymbolType::GNU_IFUNC) { // Clear bit zero of ARM32 addresses as per "ELF for the Arm Architecture" // section 5.5.3. https://static.docs.arm.com/ihi0044/g/aaelf32.pdf entry.value &= ~1; } } else if (machine == EM_AARCH64) { // Copy bit 55 over bits 56 to 63 which may be tag information. entry.value = entry.value & (1ULL << 55) ? entry.value | (0xffULL << 56) : entry.value & ~(0xffULL << 56); } } std::vector GetSectionsIf( Elf* elf, const std::function& predicate) { std::vector result; Elf_Scn* section = nullptr; GElf_Shdr header; while ((section = elf_nextscn(elf, section)) != nullptr) { Check(gelf_getshdr(section, &header) != nullptr) << "could not get ELF section header"; if (predicate(header)) { result.push_back(section); } } return result; } std::vector GetSectionsByName(Elf* elf, const std::string& name) { size_t shdr_strtab_index; Check(elf_getshdrstrndx(elf, &shdr_strtab_index) == 0) << "could not get ELF section header string table index"; return GetSectionsIf(elf, [&](const GElf_Shdr& header) { const auto* section_name = elf_strptr(elf, shdr_strtab_index, header.sh_name); return section_name != nullptr && section_name == name; }); } Elf_Scn* MaybeGetSectionByName(Elf* elf, const std::string& name) { const auto sections = GetSectionsByName(elf, name); if (sections.empty()) { return nullptr; } Check(sections.size() == 1) << "multiple sections found with name '" << name << "'"; return sections[0]; } Elf_Scn* GetSectionByName(Elf* elf, const std::string& name) { Elf_Scn* section = MaybeGetSectionByName(elf, name); Check(section != nullptr) << "no section found with name '" << name << "'"; return section; } Elf_Scn* MaybeGetSectionByType(Elf* elf, Elf64_Word type) { auto sections = GetSectionsIf( elf, [&](const GElf_Shdr& header) { return header.sh_type == type; }); if (sections.empty()) { return nullptr; } Check(sections.size() == 1) << "multiple sections found with type " << type; return sections[0]; } Elf_Scn* GetSectionByIndex(Elf* elf, size_t index) { Elf_Scn* section = elf_getscn(elf, index); Check(section != nullptr) << "no section found with index " << index; return section; } struct SectionInfo { GElf_Shdr header; Elf_Data* data; }; SectionInfo GetSectionInfo(Elf_Scn* section) { const size_t index = elf_ndxscn(section); GElf_Shdr section_header; Check(gelf_getshdr(section, §ion_header) != nullptr) << "failed to read section (index = " << index << ") header"; Elf_Data* data = elf_getdata(section, nullptr); Check(data != nullptr) << "section (index = " << index << ") data is invalid"; return {section_header, data}; } size_t GetNumberOfEntries(const GElf_Shdr& section_header) { Check(section_header.sh_entsize != 0) << "zero table entity size is unexpected for section " << ElfSectionTypeToString(section_header.sh_type); return section_header.sh_size / section_header.sh_entsize; } std::string_view GetString(Elf* elf, uint32_t section, size_t offset) { const auto name = elf_strptr(elf, section, offset); Check(name != nullptr) << "string was not found (section: " << section << ", offset: " << offset << ")"; return name; } Elf_Scn* GetSymbolTableSection(Elf* elf, bool is_linux_kernel_binary) { GElf_Ehdr elf_header; Check(gelf_getehdr(elf, &elf_header) != nullptr) << "could not get ELF header"; Elf_Scn* symtab = MaybeGetSectionByType(elf, SHT_SYMTAB); Elf_Scn* dynsym = MaybeGetSectionByType(elf, SHT_DYNSYM); if (symtab != nullptr && dynsym != nullptr) { // Relocatable ELF binaries, Linux kernel and modules have their // exported symbols in .symtab, all other ELF types have their // exported symbols in .dynsym. if (elf_header.e_type == ET_REL || is_linux_kernel_binary) { return symtab; } if (elf_header.e_type == ET_DYN || elf_header.e_type == ET_EXEC) { return dynsym; } Die() << "unsupported ELF type: '" << ElfHeaderTypeToString(elf_header.e_type) << "'"; } else if (symtab != nullptr) { return symtab; } else if (dynsym != nullptr) { return dynsym; } else { Die() << "no ELF symbol table found"; } } constexpr std::string_view kCFISuffix = ".cfi"; bool IsCFISymbolName(std::string_view name) { // Check if symbol name ends with ".cfi" // TODO: use std::string_view::ends_with return (name.size() >= kCFISuffix.size() && name.substr(name.size() - kCFISuffix.size()) == kCFISuffix); } } // namespace std::string_view UnwrapCFISymbolName(std::string_view cfi_name) { Check(IsCFISymbolName(cfi_name)) << "CFI symbol " << cfi_name << " doesn't end with .cfi"; return cfi_name.substr(0, cfi_name.size() - kCFISuffix.size()); } namespace { std::vector GetSymbols( Elf* elf, Elf_Scn* symbol_table_section, bool cfi) { const auto machine = GetMachine(elf); const auto [symbol_table_header, symbol_table_data] = GetSectionInfo(symbol_table_section); const size_t number_of_symbols = GetNumberOfEntries(symbol_table_header); std::vector result; result.reserve(number_of_symbols); // GElf uses int for indexes in symbol table, prevent int overflow. Check(number_of_symbols <= std::numeric_limits::max()) << "number of symbols exceeds INT_MAX"; for (size_t i = 0; i < number_of_symbols; ++i) { GElf_Sym symbol; Check(gelf_getsym(symbol_table_data, static_cast(i), &symbol) != nullptr) << "symbol (i = " << i << ") was not found"; const auto name = GetString(elf, symbol_table_header.sh_link, symbol.st_name); if (cfi != IsCFISymbolName(name)) { continue; } SymbolTableEntry entry{ .name = name, .value = symbol.st_value, .size = symbol.st_size, .symbol_type = ParseSymbolType(GELF_ST_TYPE(symbol.st_info)), .binding = ParseSymbolBinding(GELF_ST_BIND(symbol.st_info)), .visibility = ParseSymbolVisibility(GELF_ST_VISIBILITY(symbol.st_other)), .section_index = symbol.st_shndx, .value_type = ParseSymbolValueType(symbol.st_shndx), }; AdjustAddress(machine, entry); result.push_back(entry); } return result; } bool IsLinuxKernelBinary(Elf* elf) { // The Linux kernel itself has many specific sections that are sufficient to // classify a binary as kernel binary if present, `__ksymtab_strings` is one // of them. It is present if a kernel binary (vmlinux or a module) exports // symbols via the EXPORT_SYMBOL_* macros and it contains symbol names and // namespaces which form part of the ABI. // // Kernel modules might not present a `__ksymtab_strings` section if they do // not export symbols themselves via the ksymtab. Yet they can be identified // by the presence of the `.modinfo` section. Since that is somewhat a generic // name, also check for the presence of `.gnu.linkonce.this_module` to get // solid signal as both of those sections are present in kernel modules. return MaybeGetSectionByName(elf, "__ksymtab_strings") != nullptr || (MaybeGetSectionByName(elf, ".modinfo") != nullptr && MaybeGetSectionByName(elf, ".gnu.linkonce.this_module") != nullptr); } bool IsRelocatable(Elf* elf) { GElf_Ehdr elf_header; Check(gelf_getehdr(elf, &elf_header) != nullptr) << "could not get ELF header"; return elf_header.e_type == ET_REL; } bool IsLittleEndianBinary(Elf* elf) { GElf_Ehdr elf_header; Check(gelf_getehdr(elf, &elf_header) != nullptr) << "could not get ELF header"; switch (auto endianness = elf_header.e_ident[EI_DATA]) { case ELFDATA2LSB: return true; case ELFDATA2MSB: return false; default: Die() << "Unsupported ELF endianness: " << endianness; } } } // namespace std::ostream& operator<<(std::ostream& os, SymbolTableEntry::SymbolType type) { using SymbolType = SymbolTableEntry::SymbolType; switch (type) { case SymbolType::NOTYPE: return os << "notype"; case SymbolType::OBJECT: return os << "object"; case SymbolType::FUNCTION: return os << "function"; case SymbolType::SECTION: return os << "section"; case SymbolType::FILE: return os << "file"; case SymbolType::COMMON: return os << "common"; case SymbolType::TLS: return os << "TLS"; case SymbolType::GNU_IFUNC: return os << "indirect (ifunc) function"; } } std::ostream& operator<<(std::ostream& os, const SymbolTableEntry::ValueType type) { using ValueType = SymbolTableEntry::ValueType; switch (type) { case ValueType::UNDEFINED: return os << "undefined"; case ValueType::ABSOLUTE: return os << "absolute"; case ValueType::COMMON: return os << "common"; case ValueType::RELATIVE_TO_SECTION: return os << "relative"; } } ElfLoader::ElfLoader(Elf* elf) : elf_(elf) { Check(elf_ != nullptr) << "No ELF was provided"; InitializeElfInformation(); } void ElfLoader::InitializeElfInformation() { is_linux_kernel_binary_ = elf::IsLinuxKernelBinary(elf_); is_relocatable_ = elf::IsRelocatable(elf_); is_little_endian_binary_ = elf::IsLittleEndianBinary(elf_); } std::string_view ElfLoader::GetBtfRawData() const { Elf_Scn* btf_section = GetSectionByName(elf_, ".BTF"); Check(btf_section != nullptr) << ".BTF section is invalid"; Elf_Data* elf_data = elf_rawdata(btf_section, nullptr); Check(elf_data != nullptr) << ".BTF section data is invalid"; const char* btf_start = static_cast(elf_data->d_buf); const size_t btf_size = elf_data->d_size; return std::string_view(btf_start, btf_size); } std::vector ElfLoader::GetElfSymbols() const { Elf_Scn* symbol_table_section = GetSymbolTableSection(elf_, is_linux_kernel_binary_); Check(symbol_table_section != nullptr) << "failed to find symbol table section"; return GetSymbols(elf_, symbol_table_section, /* cfi = */ false); } std::vector ElfLoader::GetCFISymbols() const { // CFI symbols may be only in .symtab Elf_Scn* symbol_table_section = MaybeGetSectionByType(elf_, SHT_SYMTAB); if (symbol_table_section == nullptr) { // It is possible for ET_DYN and ET_EXEC ELF binaries to not have .symtab, // because it was trimmed away. We can't determine whether there were CFI // symbols in the first place, so the best we can do is returning an empty // list. return {}; } return GetSymbols(elf_, symbol_table_section, /* cfi = */ true); } ElfSymbol::CRC ElfLoader::GetElfSymbolCRC( const SymbolTableEntry& symbol) const { Check(is_little_endian_binary_) << "CRC is not supported in big-endian binaries"; const auto address = GetAbsoluteAddress(symbol); if (symbol.value_type == SymbolTableEntry::ValueType::ABSOLUTE) { return ElfSymbol::CRC{static_cast(address)}; } Check(symbol.value_type == SymbolTableEntry::ValueType::RELATIVE_TO_SECTION) << "CRC symbol is expected to be absolute or relative to a section"; const auto section = GetSectionByIndex(elf_, symbol.section_index); const auto [header, data] = GetSectionInfo(section); Check(data->d_buf != nullptr) << "Section has no data buffer"; Check(address >= header.sh_addr) << "CRC symbol address is below CRC section start"; const size_t offset = address - header.sh_addr; const size_t offset_end = offset + sizeof(uint32_t); Check(offset_end <= data->d_size && offset_end <= header.sh_size) << "CRC symbol address is above CRC section end"; return ElfSymbol::CRC{*reinterpret_cast( reinterpret_cast(data->d_buf) + offset)}; } std::string_view ElfLoader::GetElfSymbolNamespace( const SymbolTableEntry& symbol) const { Check(symbol.value_type == SymbolTableEntry::ValueType::RELATIVE_TO_SECTION) << "Namespace symbol is expected to be relative to a section"; const auto section = GetSectionByIndex(elf_, symbol.section_index); const auto [header, data] = GetSectionInfo(section); Check(data->d_buf != nullptr) << "Section has no data buffer"; const auto address = GetAbsoluteAddress(symbol); Check(address >= header.sh_addr) << "Namespace symbol address is below namespace section start"; const size_t offset = address - header.sh_addr; Check(offset < data->d_size && offset < header.sh_size) << "Namespace symbol address is above namespace section end"; const char* begin = reinterpret_cast(data->d_buf) + offset; const size_t length = strnlen(begin, data->d_size - offset); Check(offset + length < data->d_size) << "Namespace string should be null-terminated"; return std::string_view(begin, length); } size_t ElfLoader::GetAbsoluteAddress(const SymbolTableEntry& symbol) const { if (symbol.value_type == SymbolTableEntry::ValueType::ABSOLUTE) { return symbol.value; } Check(symbol.value_type == SymbolTableEntry::ValueType::RELATIVE_TO_SECTION) << "Only absolute and relative to sections symbols are supported"; // In relocatable files, st_value holds a section offset for a defined symbol. if (is_relocatable_) { const auto section = GetSectionByIndex(elf_, symbol.section_index); GElf_Shdr header; Check(gelf_getshdr(section, &header) != nullptr) << "failed to get symbol section header"; Check(symbol.value + symbol.size <= header.sh_size) << "Symbol should be inside the section"; return symbol.value + header.sh_addr; } // In executable and shared object files, st_value holds a virtual address. return symbol.value; } bool ElfLoader::IsLinuxKernelBinary() const { return is_linux_kernel_binary_; } bool ElfLoader::IsLittleEndianBinary() const { return is_little_endian_binary_; } } // namespace elf } // namespace stg