diff options
author | Giuliano Procida <gprocida@google.com> | 2023-05-16 16:59:03 +0100 |
---|---|---|
committer | Giuliano Procida <gprocida@google.com> | 2023-05-16 16:59:03 +0100 |
commit | ad8e0a5eb946febb9dd3ae27f7260bae8bf69b63 (patch) | |
tree | e380facd9f760c1169699fb81a345b98a0b3d990 | |
parent | cceb6a78f3c958ff93abdad8adac6383e7d03862 (diff) | |
parent | e43971671d31b445e71e9765ea4e0f958cd0585d (diff) | |
download | stg-main-16k.tar.gz |
Merge branch 'upstream-master' into 'master'main-16k
* aosp/upstream-master:
stg: remove ability to emit unstable output
type_roots: Update `stg` and `stgdiff` documentation
type_roots: Die when merge is requested on interfaces with type roots
type_roots: Add flag to `stg` and `stgdiff` tools for capturing types as roots
Abigail reader: remove useless qualifiers
Abigail reader: add XML tidying functionality
Abigail reader: strip non-elements before parsing XML
Abigail reader: forbid network access during XML parsing
Abigail reader: make `FromLibxml` and `GetName` return `string_view`
Abigail reader: use GetName consistently
Abigail reader: shorten a couple of helper function names
Abigail reader: GetAttributeOrDie: make a local string const
stgdiff.md: fix typo in --exact section
type_roots: Populate type roots while reading ELF
Abigail reader: add some function description comments
Abigail reader: GetOnlyChild: eliminate name argument
Abigail reader: GetAttribute: assign optional<string> using emplace
Abigail reader: use Die rather than Check in GetOnlyChild
Abigail reader: update is-declaration-only handling comment
Abigail reader: add short-named helpers for child iteration
Abigail reader: use auto* child element iteration loop variable type
type_roots: Add `ReadOptions` struct for ELF reader
type_roots: Emit `Interface` node from the proto writer
fix some const correctness issues
type_roots: Add `Interface` node to STG protobuf definitions
proto: make `Symbols` node repeated in the format for consistency
type_roots: Add types map to `Interface` node
comparison: move static functions into anonymous namespace
Signed-off-by: Giuliano Procida <gprocida@google.com>
Change-Id: I5c544d452661ff98bf3affa41e4b8c9efb9caf52
57 files changed, 1742 insertions, 265 deletions
diff --git a/abigail_reader.cc b/abigail_reader.cc index acab015..c414a59 100644 --- a/abigail_reader.cc +++ b/abigail_reader.cc @@ -1,7 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -*- mode: C++ -*- // -// Copyright 2021-2022 Google LLC +// Copyright 2021-2023 Google LLC // // Licensed under the Apache License v2.0 with LLVM Exceptions (the // "License"); you may not use this file except in compliance with the @@ -23,18 +23,22 @@ #include <fcntl.h> #include <unistd.h> +#include <algorithm> +#include <array> #include <cstddef> #include <cstdint> -#include <cstring> #include <functional> #include <iomanip> #include <ios> #include <map> #include <memory> #include <optional> +#include <set> #include <sstream> #include <string> +#include <string_view> #include <type_traits> +#include <unordered_map> #include <utility> #include <vector> @@ -42,60 +46,91 @@ #include "error.h" #include "file_descriptor.h" #include "graph.h" +#include "type_normalisation.h" namespace stg { namespace abixml { namespace { -const char* FromLibxml(const xmlChar* str) { +// Cast a libxml string to C string and present it as a string_view. +std::string_view FromLibxml(const xmlChar* str) { return reinterpret_cast<const char*>(str); } +// Cast a C string to a libxml string. const xmlChar* ToLibxml(const char* str) { return reinterpret_cast<const xmlChar*>(str); } -std::string GetElementName(xmlNodePtr element) { - return std::string(FromLibxml(element->name)); +// Get the name of an XML element. +std::string_view GetName(xmlNodePtr element) { + return FromLibxml(element->name); } -void CheckElementName(const char* name, xmlNodePtr element) { - const auto element_name = FromLibxml(element->name); - if (strcmp(element_name, name) != 0) { +void CheckName(const char* name, xmlNodePtr element) { + const auto element_name = GetName(element); + if (element_name != name) { Die() << "expected element '" << name << "' but got '" << element_name << "'"; } } -xmlNodePtr GetOnlyChild(const std::string& name, xmlNodePtr element) { - xmlNodePtr child = xmlFirstElementChild(element); - Check(child && !xmlNextElementSibling(child)) - << name << " with not exactly one child element"; +xmlNodePtr Child(xmlNodePtr node) { + return node->children; +} + +xmlNodePtr Next(xmlNodePtr node) { + return node->next; +} + +xmlNodePtr GetOnlyChild(xmlNodePtr element) { + xmlNodePtr child = Child(element); + if (child == nullptr || Next(child) != nullptr) { + Die() << "element '" << GetName(element) << "' without exactly one child"; + } return child; } +// Get an optional attribute. std::optional<std::string> GetAttribute(xmlNodePtr node, const char* name) { std::optional<std::string> result; xmlChar* attribute = xmlGetProp(node, ToLibxml(name)); if (attribute) { - result = {FromLibxml(attribute)}; + result.emplace(FromLibxml(attribute)); xmlFree(attribute); } return result; } +// Get an attribute. std::string GetAttributeOrDie(xmlNodePtr node, const char* name) { xmlChar* attribute = xmlGetProp(node, ToLibxml(name)); if (!attribute) { - Die() << "element '" << FromLibxml(node->name) + Die() << "element '" << GetName(node) << "' missing attribute '" << name << "'"; } - std::string result = FromLibxml(attribute); + const std::string result(FromLibxml(attribute)); xmlFree(attribute); return result; } +// Set an attribute value. +void SetAttribute(xmlNodePtr node, const char* name, const char* value) { + xmlSetProp(node, ToLibxml(name), ToLibxml(value)); +} + +// Unset an attribute value. +void UnsetAttribute(xmlNodePtr node, const char* name) { + xmlUnsetProp(node, ToLibxml(name)); +} + +// Remove a node and free its storage. +void RemoveNode(xmlNodePtr node) { + xmlUnlinkNode(node); + xmlFreeNode(node); +} + template <typename T> std::optional<T> Parse(const std::string& value) { T result; @@ -179,7 +214,7 @@ T GetParsedValueOrDie(xmlNodePtr element, const char* name, if (parse) { return *parse; } - Die() << "element '" << FromLibxml(element->name) + Die() << "element '" << GetName(element) << "' has attribute '" << name << "' with bad value '" << value << "'"; } @@ -215,6 +250,417 @@ T ReadAttribute(xmlNodePtr element, const char* name, return GetParsedValueOrDie(element, name, value, parse(value)); } +// Remove non-element nodes, recursively. +// +// This simplifies subsequent manipulation. This should only remove comment, +// text and possibly CDATA nodes. +void StripNonElements(xmlNodePtr node) { + switch (node->type) { + case XML_COMMENT_NODE: + case XML_TEXT_NODE: + case XML_CDATA_SECTION_NODE: + RemoveNode(node); + break; + case XML_ELEMENT_NODE: { + xmlNodePtr child = Child(node); + while (child) { + xmlNodePtr next = Next(child); + StripNonElements(child); + child = next; + } + break; + } + default: + Die() << "unexpected XML node type: " << node->type; + } +} + +// Determine whether one XML element is a subtree of another, and optionally, +// actually equal to it. +bool SubOrEqualTree(bool also_equal, xmlNodePtr left, xmlNodePtr right) { + // Node names must match. + const auto left_name = GetName(left); + const auto right_name = GetName(right); + if (left_name != right_name) { + return false; + } + + // Attributes may be missing on the left, but must match otherwise. + size_t left_attributes = 0; + for (auto* p = left->properties; p; p = p->next) { + ++left_attributes; + const auto attribute = FromLibxml(p->name); + const char* attribute_name = attribute.data(); + const auto left_value = GetAttributeOrDie(left, attribute_name); + const auto right_value = GetAttribute(right, attribute_name); + if (!right_value || left_value != right_value.value()) { + return false; + } + } + // To also be equal, we just need to check the counts are the same. + if (also_equal) { + size_t right_attributes = 0; + for (auto* p = right->properties; p; p = p->next) { + ++right_attributes; + } + if (left_attributes != right_attributes) { + return false; + } + } + + // The left subelements must be a subsequence of the right ones and to also be + // equal, we must not have skipped any right ones. + xmlNodePtr left_child = Child(left); + xmlNodePtr right_child = Child(right); + while (left_child != nullptr && right_child != nullptr) { + if (SubOrEqualTree(also_equal, left_child, right_child)) { + left_child = Next(left_child); + } else if (also_equal) { + return false; + } + right_child = Next(right_child); + } + return left_child == nullptr && (right_child == nullptr || !also_equal); +} + +} // namespace + +// Determine whether one XML element is a subtree of another. +bool SubTree(xmlNodePtr left, xmlNodePtr right) { + return SubOrEqualTree(false, left, right); +} + +// Determine whether one XML element is the same as another. +bool EqualTree(xmlNodePtr left, xmlNodePtr right) { + return SubOrEqualTree(true, left, right); +} + +// Find a maximal XML element if one exists. +std::optional<size_t> MaximalTree(const std::vector<xmlNodePtr>& nodes) { + if (nodes.empty()) { + return std::nullopt; + } + + // Find a potentially maximal candidate by scanning through and retaining the + // new node if it's a supertree of the current candidate. + const auto count = nodes.size(); + std::vector<bool> ok(count); + size_t candidate = 0; + ok[candidate] = true; + for (size_t ix = 1; ix < count; ++ix) { + if (SubTree(nodes[candidate], nodes[ix])) { + candidate = ix; + ok[candidate] = true; + } + } + + // Verify the candidate is indeed maximal by comparing it with the nodes not + // already known to be subtrees of it. + const auto& candidate_node = nodes[candidate]; + for (size_t ix = 0; ix < count; ++ix) { + const auto& node = nodes[ix]; + if (!ok[ix] && !SubTree(node, candidate_node)) { + return std::nullopt; + } + } + + return std::make_optional(candidate); +} + +namespace { + +// Check if string_view is in an array. +template<size_t N> +bool Contains(const std::array<std::string_view, N>& haystack, + std::string_view needle) { + return std::find(haystack.begin(), haystack.end(), needle) != haystack.end(); +} + +// Remove source location attributes. +// +// This simplifies element comparison later. +void StripLocationInfo(xmlNodePtr node) { + static const std::array<std::string_view, 7> has_location_info = { + "class-decl", + "enum-decl", + "function-decl", + "parameter", + "typedef-decl", + "union-decl", + "var-decl" + }; + + if (Contains(has_location_info, GetName(node))) { + UnsetAttribute(node, "filepath"); + UnsetAttribute(node, "line"); + UnsetAttribute(node, "column"); + } + for (auto* child = Child(node); child; child = Next(child)) { + StripLocationInfo(child); + } +} + +// Remove access attribute. +// +// This simplifies element comparison later in a very specific way: libabigail +// (possibly older versions) uses the access specifier for the type it's trying +// to "emit in scope", even for its containing types, making deduplicating types +// trickier. We don't care about access anyway, so just remove it everywhere. +void StripAccess(xmlNodePtr node) { + static const std::array<std::string_view, 5> has_access = { + "base-class", + "data-member", + "member-function", + "member-template", + "member-type", + }; + + if (Contains(has_access, GetName(node))) { + UnsetAttribute(node, "access"); + } + for (auto* child = Child(node); child; child = Next(child)) { + StripAccess(child); + } +} + +// Elements corresponding to named types that can be anonymous or marked as +// unreachable by libabigail, so user-defined types, excepting typedefs. +const std::array<std::string_view, 3> kNamedTypes = { + "class-decl", + "enum-decl", + "union-decl", +}; + +// Remove attributes emitted by abidw --load-all-types. +// +// With this invocation and if any user-defined types are deemed unreachable, +// libabigail will output a tracking-non-reachable-types attribute on top-level +// elements and an is-non-reachable attribute on each such type element. +// +// We have our own graph-theoretic notion of reachability and these attributes +// have no ABI relevance and can interfere with element comparisons. +void StripReachabilityAttributes(xmlNodePtr node) { + const auto node_name = GetName(node); + + if (node_name == "abi-corpus-group" || node_name == "abi-corpus") { + UnsetAttribute(node, "tracking-non-reachable-types"); + } else if (Contains(kNamedTypes, node_name)) { + UnsetAttribute(node, "is-non-reachable"); + } + + for (auto* child = Child(node); child; child = Next(child)) { + StripReachabilityAttributes(child); + } +} + +// Tidy anonymous types in various ways. +// +// 1. Normalise anonymous type names by dropping the name attribute. +// +// Anonymous type names take the form __anonymous_foo__N where foo is one of +// enum, struct or union and N is an optional numerical suffix. We don't care +// about these names but they may cause trouble when comparing elements. +// +// 2. Reanonymise anonymous types that have been given names. +// +// At some point abidw changed its behaviour given an anonymous with a naming +// typedef. In addition to linking the typedef and type in both directions, the +// code now gives (some) anonymous types the same name as the typedef. This +// misrepresents the original types. +// +// Such types should be anonymous. We set is-anonymous and drop the name. +// +// 3. Discard naming typedef backlinks. +// +// The attribute naming-typedef-id is a backwards link from an anonymous type to +// the typedef that refers to it. +// +// We don't care about these attributes and they may cause comparison issues. +void TidyAnonymousTypes(xmlNodePtr node) { + if (Contains(kNamedTypes, GetName(node))) { + const bool is_anon = ReadAttribute<bool>(node, "is-anonymous", false); + const auto naming_attribute = GetAttribute(node, "naming-typedef-id"); + if (is_anon) { + UnsetAttribute(node, "name"); + } else if (naming_attribute) { + SetAttribute(node, "is-anonymous", "yes"); + UnsetAttribute(node, "name"); + } + if (naming_attribute) { + UnsetAttribute(node, "naming-typedef-id"); + } + } + + for (auto* child = Child(node); child; child = Next(child)) { + TidyAnonymousTypes(child); + } +} + +// Remove duplicate data members. +void RemoveDuplicateDataMembers(xmlNodePtr root) { + std::vector<xmlNodePtr> types; + + // find all structs and unions + std::function<void(xmlNodePtr)> dfs = [&](xmlNodePtr node) { + const auto node_name = GetName(node); + // preorder in case we delete a nested element + for (auto* child = Child(node); child; child = Next(child)) { + dfs(child); + } + if (node_name == "class-decl" || node_name == "union-decl") { + types.push_back(node); + } + }; + dfs(root); + + for (const auto& node : types) { + // filter data members + std::vector<xmlNodePtr> data_members; + for (auto* child = Child(node); child; child = Next(child)) { + if (GetName(child) == "data-member") { + data_members.push_back(child); + } + } + // remove identical duplicate data members - O(n^2) + for (size_t i = 0; i < data_members.size(); ++i) { + xmlNodePtr& i_node = data_members[i]; + bool duplicate = false; + for (size_t j = 0; j < i; ++j) { + const xmlNodePtr& j_node = data_members[j]; + if (j_node != nullptr && EqualTree(i_node, j_node)) { + duplicate = true; + break; + } + } + if (duplicate) { + Warn() << "found duplicate data-member"; + RemoveNode(i_node); + i_node = nullptr; + } + } + } +} + +// Eliminate non-conflicting / report conflicting duplicate definitions. +// +// XML elements representing types are sometimes emitted multiple times, +// identically. Also, member typedefs are sometimes emitted separately from +// their types, resulting in duplicate XML fragments. +// +// Both these issues can be resolved by first detecting duplicate occurrences of +// a given type id and then checking to see if there's an instance that subsumes +// the others, which can then be eliminated. +// +// This function eliminates exact type duplicates and duplicates where there is +// at least one maximal definition. It can report the remaining duplicate +// definitions. +// +// If a type has duplicate definitions in multiple namespace scopes or +// definitions with different effective names, these are considered to be +// *conflicting* duplicate definitions. TODO: update text +void HandleDuplicateTypes(xmlNodePtr root) { + // Convenience typedef referring to a namespace scope. + using namespace_scope = std::vector<std::string>; + // map of type-id to pair of set of namespace scopes and vector of + // xmlNodes + std::unordered_map< + std::string, + std::pair< + std::set<namespace_scope>, + std::vector<xmlNodePtr>>> types; + namespace_scope namespaces; + + // find all type occurrences + std::function<void(xmlNodePtr)> dfs = [&](xmlNodePtr node) { + const auto node_name = GetName(node); + std::optional<std::string> namespace_name; + if (node_name == "namespace-decl") { + namespace_name = GetAttribute(node, "name"); + } + if (namespace_name) { + namespaces.push_back(namespace_name.value()); + } + if (node_name == "abi-corpus-group" + || node_name == "abi-corpus" + || node_name == "abi-instr" + || namespace_name) { + for (auto* child = Child(node); child; child = Next(child)) { + dfs(child); + } + } else { + const auto id = GetAttribute(node, "id"); + if (id) { + auto& info = types[id.value()]; + info.first.insert(namespaces); + info.second.push_back(node); + } + } + if (namespace_name) { + namespaces.pop_back(); + } + }; + dfs(root); + + for (const auto& [id, scopes_and_definitions] : types) { + const auto& [scopes, definitions] = scopes_and_definitions; + + if (scopes.size() > 1) { + Warn() << "conflicting scopes found for type '" << id << '\''; + continue; + } + + const auto possible_maximal = MaximalTree(definitions); + if (!possible_maximal) { + Warn() << "unresolvable duplicate definitions found for type '" << id + << '\''; + continue; + } + + // Remove all but the maximal definition. + const size_t maximal = possible_maximal.value(); + for (size_t ix = 0; ix < definitions.size(); ++ix) { + if (ix != maximal) { + RemoveNode(definitions[ix]); + } + } + } +} + +} // namespace + +// Remove XML nodes and attributes that are neither used or wanted. +void Clean(xmlNodePtr root) { + // Strip non-element nodes to simplify other operations. + StripNonElements(root); + + // Strip location information. + StripLocationInfo(root); + + // Strip access. + StripAccess(root); + + // Strip reachability attributes. + StripReachabilityAttributes(root); +} + +namespace { + +// Transform XML elements to improve their semantics. +void Tidy(xmlNodePtr root) { + // Normalise anonymous type names. + // Reanonymise anonymous types. + // Discard naming typedef backlinks. + TidyAnonymousTypes(root); + + // Remove duplicate data members. + RemoveDuplicateDataMembers(root); + + // Eliminate complete duplicates and extra fragments of types. + // Report conflicting duplicate defintions. + // Record whether there are conflicting duplicate definitions. + HandleDuplicateTypes(root); +} + std::optional<uint64_t> ParseLength(const std::string& value) { if (value == "infinite" || value == "unknown") { return {0}; @@ -276,9 +722,8 @@ Id Abigail::GetVariadic() { Function Abigail::MakeFunctionType(xmlNodePtr function) { std::vector<Id> parameters; std::optional<Id> return_type; - for (auto child = xmlFirstElementChild(function); child; - child = xmlNextElementSibling(child)) { - const auto child_name = GetElementName(child); + for (auto* child = Child(function); child; child = Next(child)) { + const auto child_name = GetName(child); if (return_type) { Die() << "unexpected element after return-type"; } @@ -288,7 +733,7 @@ Function Abigail::MakeFunctionType(xmlNodePtr function) { } else if (child_name == "return") { return_type = {GetEdge(child)}; } else { - Die() << "unrecognised " << FromLibxml(function->name) + Die() << "unrecognised " << GetName(function) << " child element '" << child_name << "'"; } } @@ -299,7 +744,9 @@ Function Abigail::MakeFunctionType(xmlNodePtr function) { } Id Abigail::ProcessRoot(xmlNodePtr root) { - const auto name = GetElementName(root); + Clean(root); + Tidy(root); + const auto name = GetName(root); if (name == "abi-corpus-group") { ProcessCorpusGroup(root); } else if (name == "abi-corpus") { @@ -307,21 +754,21 @@ Id Abigail::ProcessRoot(xmlNodePtr root) { } else { Die() << "unrecognised root element '" << name << "'"; } - return BuildSymbols(); + const Id id = BuildSymbols(); + RemoveUselessQualifiers(graph_, id); + return id; } void Abigail::ProcessCorpusGroup(xmlNodePtr group) { - for (auto corpus = xmlFirstElementChild(group); corpus; - corpus = xmlNextElementSibling(corpus)) { - CheckElementName("abi-corpus", corpus); + for (auto* corpus = Child(group); corpus; corpus = Next(corpus)) { + CheckName("abi-corpus", corpus); ProcessCorpus(corpus); } } void Abigail::ProcessCorpus(xmlNodePtr corpus) { - for (auto element = xmlFirstElementChild(corpus); element; - element = xmlNextElementSibling(element)) { - const auto name = GetElementName(element); + for (auto* element = Child(corpus); element; element = Next(element)) { + const auto name = GetName(element); if (name == "elf-function-symbols" || name == "elf-variable-symbols") { ProcessSymbols(element); } else if (name == "elf-needed") { @@ -335,9 +782,8 @@ void Abigail::ProcessCorpus(xmlNodePtr corpus) { } void Abigail::ProcessSymbols(xmlNodePtr symbols) { - for (auto element = xmlFirstElementChild(symbols); element; - element = xmlNextElementSibling(element)) { - CheckElementName("elf-symbol", element); + for (auto* element = Child(symbols); element; element = Next(element)) { + CheckName("elf-symbol", element); ProcessSymbol(element); } } @@ -376,7 +822,7 @@ void Abigail::ProcessSymbol(xmlNodePtr symbol) { } } -bool Abigail::ProcessUserDefinedType(const std::string& name, Id id, +bool Abigail::ProcessUserDefinedType(std::string_view name, Id id, xmlNodePtr decl) { if (name == "typedef-decl") { ProcessTypedef(id, decl); @@ -393,9 +839,8 @@ bool Abigail::ProcessUserDefinedType(const std::string& name, Id id, } void Abigail::ProcessScope(xmlNodePtr scope) { - for (auto element = xmlFirstElementChild(scope); element; - element = xmlNextElementSibling(element)) { - const auto name = GetElementName(element); + for (auto* element = Child(scope); element; element = Next(element)) { + const auto name = GetName(element); const auto type_id = GetAttribute(element, "id"); // all type elements have "id", all non-types do not if (type_id) { @@ -507,9 +952,8 @@ void Abigail::ProcessQualified(Id id, xmlNodePtr qualified) { void Abigail::ProcessArray(Id id, xmlNodePtr array) { std::vector<size_t> dimensions; - for (auto child = xmlFirstElementChild(array); child; - child = xmlNextElementSibling(child)) { - CheckElementName("subrange", child); + for (auto* child = Child(array); child; child = Next(child)) { + CheckName("subrange", child); const auto length = ReadAttribute<uint64_t>(child, "length", &ParseLength); dimensions.push_back(length); } @@ -557,13 +1001,11 @@ void Abigail::ProcessTypeDecl(Id id, xmlNodePtr type_decl) { void Abigail::ProcessStructUnion(Id id, bool is_struct, xmlNodePtr struct_union) { - // TODO - // Libabigail is reporting wrong information for is-declaration-only so it is - // not reliable. We are looking at the children of the element instead. - // It can be removed once the bug is fixed. + // Libabigail sometimes reports is-declaration-only but still provides some + // child elements. So we check both things. const bool forward = ReadAttribute<bool>(struct_union, "is-declaration-only", false) - && !xmlFirstElementChild(struct_union); + && Child(struct_union) == nullptr; const auto kind = is_struct ? StructUnion::Kind::STRUCT : StructUnion::Kind::UNION; @@ -588,9 +1030,8 @@ void Abigail::ProcessStructUnion(Id id, bool is_struct, std::vector<Id> base_classes; std::vector<Id> methods; std::vector<Id> members; - for (xmlNodePtr child = xmlFirstElementChild(struct_union); child; - child = xmlNextElementSibling(child)) { - const auto child_name = GetElementName(child); + for (auto* child = Child(struct_union); child; child = Next(child)) { + const auto child_name = GetName(child); if (child_name == "data-member") { if (const auto member = ProcessDataMember(is_struct, child)) { members.push_back(*member); @@ -621,15 +1062,15 @@ void Abigail::ProcessEnum(Id id, xmlNodePtr enumeration) { return; } - xmlNodePtr underlying = xmlFirstElementChild(enumeration); + xmlNodePtr underlying = Child(enumeration); Check(underlying) << "enum-decl has no child elements"; - CheckElementName("underlying-type", underlying); + CheckName("underlying-type", underlying); const auto type = GetEdge(underlying); std::vector<std::pair<std::string, int64_t>> enumerators; - for (xmlNodePtr enumerator = xmlNextElementSibling(underlying); enumerator; - enumerator = xmlNextElementSibling(enumerator)) { - CheckElementName("enumerator", enumerator); + for (auto* enumerator = Next(underlying); enumerator; + enumerator = Next(enumerator)) { + CheckName("enumerator", enumerator); const auto enumerator_name = GetAttributeOrDie(enumerator, "name"); // libabigail currently supports anything that fits in an int64_t const auto enumerator_value = @@ -652,8 +1093,8 @@ Id Abigail::ProcessBaseClass(xmlNodePtr base_class) { std::optional<Id> Abigail::ProcessDataMember(bool is_struct, xmlNodePtr data_member) { - xmlNodePtr decl = GetOnlyChild("data-member", data_member); - CheckElementName("var-decl", decl); + xmlNodePtr decl = GetOnlyChild(data_member); + CheckName("var-decl", decl); if (ReadAttribute<bool>(data_member, "static", false)) { ProcessDecl(true, decl); return {}; @@ -670,8 +1111,8 @@ std::optional<Id> Abigail::ProcessDataMember(bool is_struct, } Id Abigail::ProcessMemberFunction(xmlNodePtr method) { - xmlNodePtr decl = GetOnlyChild("member-function", method); - CheckElementName("function-decl", decl); + xmlNodePtr decl = GetOnlyChild(method); + CheckName("function-decl", decl); static const std::string missing = "{missing}"; const auto mangled_name = ReadAttribute(decl, "mangled-name", missing); const auto name = GetAttributeOrDie(decl, "name"); @@ -686,14 +1127,14 @@ Id Abigail::ProcessMemberFunction(xmlNodePtr method) { } void Abigail::ProcessMemberType(xmlNodePtr member_type) { - xmlNodePtr decl = GetOnlyChild("member-type", member_type); + xmlNodePtr decl = GetOnlyChild(member_type); const auto type_id = GetAttributeOrDie(decl, "id"); const auto id = GetNode(type_id); if (graph_.Is(id)) { Warn() << "duplicate definition of member type '" << type_id << '\''; return; } - const auto name = GetElementName(decl); + const auto name = GetName(decl); if (!ProcessUserDefinedType(name, id, decl)) { Die() << "unrecognised member-type child element '" << name << "'"; } @@ -748,27 +1189,30 @@ Id Abigail::BuildSymbols() { return graph_.Add<Interface>(symbols); } -Id Read(Graph& graph, const std::string& path, Metrics& metrics) { +Document Read(const std::string& path, Metrics& metrics) { // Open input for reading. FileDescriptor fd(path.c_str(), O_RDONLY); // Read the XML. - std::unique_ptr<std::remove_pointer<xmlDocPtr>::type, void(*)(xmlDocPtr)> - document(nullptr, xmlFreeDoc); + Document document(nullptr, xmlFreeDoc); { Time t(metrics, "abigail.libxml_parse"); std::unique_ptr< - std::remove_pointer<xmlParserCtxtPtr>::type, void(*)(xmlParserCtxtPtr)> + std::remove_pointer_t<xmlParserCtxtPtr>, void(*)(xmlParserCtxtPtr)> context(xmlNewParserCtxt(), xmlFreeParserCtxt); document.reset( - xmlCtxtReadFd(context.get(), fd.Value(), nullptr, nullptr, 0)); + xmlCtxtReadFd(context.get(), fd.Value(), nullptr, nullptr, + XML_PARSE_NONET)); } Check(document != nullptr) << "failed to parse input as XML"; - // Get the root element. + return document; +} + +Id Read(Graph& graph, const std::string& path, Metrics& metrics) { + const Document document = Read(path, metrics); xmlNodePtr root = xmlDocGetRootElement(document.get()); Check(root) << "XML document has no root element"; - return Abigail(graph).ProcessRoot(root); } diff --git a/abigail_reader.h b/abigail_reader.h index 2ee5fc6..b040352 100644 --- a/abigail_reader.h +++ b/abigail_reader.h @@ -21,8 +21,11 @@ #ifndef STG_ABIGAIL_READER_H_ #define STG_ABIGAIL_READER_H_ +#include <memory> #include <optional> #include <string> +#include <string_view> +#include <type_traits> #include <unordered_map> #include <utility> #include <vector> @@ -114,7 +117,7 @@ class Abigail { void ProcessSymbols(xmlNodePtr symbols); void ProcessSymbol(xmlNodePtr symbol); - bool ProcessUserDefinedType(const std::string& name, Id id, xmlNodePtr decl); + bool ProcessUserDefinedType(std::string_view name, Id id, xmlNodePtr decl); void ProcessScope(xmlNodePtr scope); void ProcessInstr(xmlNodePtr instr); @@ -144,6 +147,14 @@ class Abigail { Id Read(Graph& graph, const std::string& path, Metrics& metrics); +// Exposed for testing. +void Clean(xmlNodePtr root); +bool EqualTree(xmlNodePtr left, xmlNodePtr right); +bool SubTree(xmlNodePtr left, xmlNodePtr right); +using Document = + std::unique_ptr<std::remove_pointer_t<xmlDocPtr>, void(*)(xmlDocPtr)>; +Document Read(const std::string& path, Metrics& metrics); + } // namespace abixml } // namespace stg diff --git a/abigail_reader_test.cc b/abigail_reader_test.cc new file mode 100644 index 0000000..d7f251c --- /dev/null +++ b/abigail_reader_test.cc @@ -0,0 +1,219 @@ +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// -*- mode: C++ -*- +// +// Copyright 2023 Google LLC +// +// Licensed under the Apache License v2.0 with LLVM Exceptions (the +// "License"); you may not use this file except in compliance with the +// License. You may obtain a copy of the License at +// +// https://llvm.org/LICENSE.txt +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Author: Giuliano Procida + +#include <filesystem> +#include <fstream> +#include <iostream> +#include <optional> +#include <ostream> +#include <sstream> +#include <vector> + +#include <catch2/catch.hpp> +#include "abigail_reader.h" +#include "graph.h" +#include "metrics.h" +#include "equality.h" + +namespace { + +std::filesystem::path filename_to_path(const char* f) { + return std::filesystem::path("testdata") / f; +} + +stg::abixml::Document Read(const char* input) { + stg::Metrics metrics; + return stg::abixml::Read(filename_to_path(input), metrics); +} + +stg::Id Read(stg::Graph& graph, const char* input) { + stg::Metrics metrics; + return stg::abixml::Read(graph, filename_to_path(input), metrics); +} + +struct EqualTreeTestCase { + const char* name; + const char* left; + const char* right; + bool equal; +}; + +TEST_CASE("EqualTree") { + const auto test = GENERATE( + EqualTreeTestCase( + {"cleaning", + "abigail_dirty.xml", + "abigail_clean.xml", + true}), + EqualTreeTestCase( + {"self comparison", + "abigail_tree_0.xml", + "abigail_tree_0.xml", + true}), + EqualTreeTestCase( + {"attribute order is irrelevant", + "abigail_tree_0.xml", + "abigail_tree_1.xml", + true}), + EqualTreeTestCase( + {"element order is relevant", + "abigail_tree_0.xml", + "abigail_tree_2.xml", + false}), + EqualTreeTestCase( + {"attribute missing", + "abigail_tree_0.xml", + "abigail_tree_3.xml", + false}), + EqualTreeTestCase( + {"element missing", + "abigail_tree_0.xml", + "abigail_tree_4.xml", + false}), + EqualTreeTestCase( + {"attribute changed", + "abigail_tree_0.xml", + "abigail_tree_5.xml", + false}), + EqualTreeTestCase( + {"element changed", + "abigail_tree_0.xml", + "abigail_tree_6.xml", + false})); + + SECTION(test.name) { + const stg::abixml::Document left_document = Read(test.left); + const stg::abixml::Document right_document = Read(test.right); + xmlNodePtr left_root = xmlDocGetRootElement(left_document.get()); + xmlNodePtr right_root = xmlDocGetRootElement(right_document.get()); + stg::abixml::Clean(left_root); + stg::abixml::Clean(right_root); + CHECK(stg::abixml::EqualTree(left_root, right_root) == test.equal); + CHECK(stg::abixml::EqualTree(right_root, left_root) == test.equal); + } +} + +struct SubTreeTestCase { + const char* name; + const char* left; + const char* right; + bool left_sub_right; + bool right_sub_left; +}; + +TEST_CASE("SubTree") { + const auto test = GENERATE( + SubTreeTestCase( + {"self comparison", + "abigail_tree_0.xml", + "abigail_tree_0.xml", + true, true}), + SubTreeTestCase( + {"attribute missing", + "abigail_tree_0.xml", + "abigail_tree_3.xml", + false, true}), + SubTreeTestCase( + {"element missing", + "abigail_tree_0.xml", + "abigail_tree_4.xml", + false, true}), + SubTreeTestCase( + {"member-type access special case", + "abigail_tree_0.xml", + "abigail_tree_7.xml", + true, true})); + + SECTION(test.name) { + const stg::abixml::Document left_document = Read(test.left); + const stg::abixml::Document right_document = Read(test.right); + xmlNodePtr left_root = xmlDocGetRootElement(left_document.get()); + xmlNodePtr right_root = xmlDocGetRootElement(right_document.get()); + stg::abixml::Clean(left_root); + stg::abixml::Clean(right_root); + CHECK(stg::abixml::SubTree(left_root, right_root) == test.left_sub_right); + CHECK(stg::abixml::SubTree(right_root, left_root) == test.right_sub_left); + } +} + +struct TidyTestCase { + const char* name; + const std::vector<const char*> files; +}; + +TEST_CASE("Tidy") { + const auto test = GENERATE( + TidyTestCase( + {"anonymous type normalisation", + {"abigail_anonymous_types_0.xml", + "abigail_anonymous_types_1.xml", + "abigail_anonymous_types_2.xml", + "abigail_anonymous_types_3.xml", + "abigail_anonymous_types_4.xml"}}), + TidyTestCase( + {"duplicate data members", + {"abigail_duplicate_data_members_0.xml", + "abigail_duplicate_data_members_1.xml"}}), + TidyTestCase( + {"duplicate type resolution - exact duplicate", + {"abigail_duplicate_types_0.xml", + "abigail_duplicate_types_1.xml"}}), + TidyTestCase( + {"duplicate type resolution - partial duplicate", + {"abigail_duplicate_types_0.xml", + "abigail_duplicate_types_2.xml"}}), + TidyTestCase( + {"duplicate type resolution - multiple partial duplicates", + {"abigail_duplicate_types_0.xml", + "abigail_duplicate_types_3.xml"}}), + TidyTestCase( + {"duplicate type resolution - no maximal duplicate", + {"abigail_duplicate_types_4.xml", + "abigail_duplicate_types_5.xml"}}), + TidyTestCase( + {"duplicate type resolution - different scopes", + {"abigail_duplicate_types_4.xml", + "abigail_duplicate_types_6.xml"}})); + + SECTION(test.name) { + // Read inputs. + stg::Graph graph; + std::vector<stg::Id> ids; + for (const char* file : test.files) { + ids.push_back(Read(graph, file)); + } + + // Useless equality cache. + struct NoCache { + static std::optional<bool> Query(const stg::Pair&) { + return std::nullopt; + } + void AllSame(const std::vector<stg::Pair>&) {} + void AllDifferent(const std::vector<stg::Pair>&) {} + }; + + // Check exact equality. + NoCache cache; + for (size_t ix = 1; ix < ids.size(); ++ix) { + CHECK(stg::Equals<NoCache>(graph, cache)(ids[0], ids[ix])); + } + } +} + +} // namespace diff --git a/btf_reader.cc b/btf_reader.cc index 0bf3aa2..3063592 100644 --- a/btf_reader.cc +++ b/btf_reader.cc @@ -28,7 +28,6 @@ #include <algorithm> #include <array> #include <cstring> -#include <iomanip> #include <iostream> #include <memory> #include <optional> @@ -41,6 +40,7 @@ #include "elf_loader.h" #include "error.h" #include "file_descriptor.h" +#include "reader_options.h" namespace stg { @@ -518,7 +518,7 @@ Id Structs::BuildSymbols() { return graph_.Add<Interface>(btf_symbols_); } -Id ReadFile(Graph& graph, const std::string& path, bool verbose) { +Id ReadFile(Graph& graph, const std::string& path, ReadOptions options) { Check(elf_version(EV_CURRENT) != EV_NONE) << "ELF version mismatch"; struct ElfDeleter { void operator()(Elf* elf) { @@ -538,7 +538,8 @@ Id ReadFile(Graph& graph, const std::string& path, bool verbose) { } } const elf::ElfLoader loader(elf.get()); - return Structs(graph, verbose).Process(loader.GetBtfRawData()); + return Structs(graph, options.Test(ReadOptions::INFO)) + .Process(loader.GetBtfRawData()); } } // namespace btf diff --git a/btf_reader.h b/btf_reader.h index b777e64..f5f99e7 100644 --- a/btf_reader.h +++ b/btf_reader.h @@ -32,6 +32,7 @@ #include <linux/btf.h> #include "graph.h" +#include "reader_options.h" namespace stg { namespace btf { @@ -82,7 +83,7 @@ class Structs { static void PrintStrings(MemoryRange memory); }; -Id ReadFile(Graph& graph, const std::string& path, bool verbose = false); +Id ReadFile(Graph& graph, const std::string& path, ReadOptions options); } // namespace btf } // namespace stg diff --git a/comparison.cc b/comparison.cc index 66416f0..29453b8 100644 --- a/comparison.cc +++ b/comparison.cc @@ -23,6 +23,7 @@ #include <algorithm> #include <array> +#include <map> #include <optional> #include <ostream> #include <sstream> @@ -298,9 +299,10 @@ bool Compare::CompareDefined(bool defined1, bool defined2, Result& result) { return false; } +namespace { + using KeyIndexPairs = std::vector<std::pair<std::string, size_t>>; -static KeyIndexPairs MatchingKeys(const Graph& graph, - const std::vector<Id>& ids) { +KeyIndexPairs MatchingKeys(const Graph& graph, const std::vector<Id>& ids) { KeyIndexPairs keys; const auto size = ids.size(); keys.reserve(size); @@ -318,8 +320,7 @@ static KeyIndexPairs MatchingKeys(const Graph& graph, using MatchedPairs = std::vector<std::pair<std::optional<size_t>, std::optional<size_t>>>; -static MatchedPairs PairUp(const KeyIndexPairs& keys1, - const KeyIndexPairs& keys2) { +MatchedPairs PairUp(const KeyIndexPairs& keys1, const KeyIndexPairs& keys2) { MatchedPairs pairs; pairs.reserve(std::max(keys1.size(), keys2.size())); auto it1 = keys1.begin(); @@ -345,10 +346,8 @@ static MatchedPairs PairUp(const KeyIndexPairs& keys1, return pairs; } -static void CompareNodes(Result& result, Compare& compare, - const std::vector<Id>& ids1, - const std::vector<Id>& ids2, - const bool reorder) { +void CompareNodes(Result& result, Compare& compare, const std::vector<Id>& ids1, + const std::vector<Id>& ids2, const bool reorder) { const auto keys1 = MatchingKeys(compare.graph, ids1); const auto keys2 = MatchingKeys(compare.graph, ids2); auto pairs = PairUp(keys1, keys2); @@ -373,6 +372,48 @@ static void CompareNodes(Result& result, Compare& compare, } } +void CompareNodes(Result& result, Compare& compare, + const std::map<std::string, Id>& x1, + const std::map<std::string, Id>& x2) { + // Group diffs into removed, added and changed symbols for readability. + std::vector<Id> removed; + std::vector<Id> added; + std::vector<std::pair<Id, Id>> in_both; + + auto it1 = x1.begin(); + auto it2 = x2.begin(); + const auto end1 = x1.end(); + const auto end2 = x2.end(); + while (it1 != end1 || it2 != end2) { + if (it2 == end2 || (it1 != end1 && it1->first < it2->first)) { + // removed + removed.push_back(it1->second); + ++it1; + } else if (it1 == end1 || (it2 != end2 && it1->first > it2->first)) { + // added + added.push_back(it2->second); + ++it2; + } else { + // in both + in_both.emplace_back(it1->second, it2->second); + ++it1; + ++it2; + } + } + + for (const auto symbol1 : removed) { + result.AddEdgeDiff("", compare.Removed(symbol1)); + } + for (const auto symbol2 : added) { + result.AddEdgeDiff("", compare.Added(symbol2)); + } + for (const auto& [symbol1, symbol2] : in_both) { + result.MaybeAddEdgeDiff("", compare(symbol1, symbol2)); + } +} + +} // namespace + Result Compare::operator()(const BaseClass& x1, const BaseClass& x2) { Result result; result.MaybeAddNodeDiff("inheritance", x1.inheritance, x2.inheritance); @@ -627,45 +668,8 @@ Result Compare::operator()(const ElfSymbol& x1, const ElfSymbol& x2) { Result Compare::operator()(const Interface& x1, const Interface& x2) { Result result; result.diff_.holds_changes = true; - - // Group diffs into removed, added and changed symbols for readability. - std::vector<Id> removed; - std::vector<Id> added; - std::vector<std::pair<Id, Id>> in_both; - - const auto& symbols1 = x1.symbols; - const auto& symbols2 = x2.symbols; - auto it1 = symbols1.begin(); - auto it2 = symbols2.begin(); - const auto end1 = symbols1.end(); - const auto end2 = symbols2.end(); - while (it1 != end1 || it2 != end2) { - if (it2 == end2 || (it1 != end1 && it1->first < it2->first)) { - // removed - removed.push_back(it1->second); - ++it1; - } else if (it1 == end1 || (it2 != end2 && it1->first > it2->first)) { - // added - added.push_back(it2->second); - ++it2; - } else { - // in both - in_both.emplace_back(it1->second, it2->second); - ++it1; - ++it2; - } - } - - for (const auto symbol1 : removed) { - result.AddEdgeDiff("", Removed(symbol1)); - } - for (const auto symbol2 : added) { - result.AddEdgeDiff("", Added(symbol2)); - } - for (const auto& [symbol1, symbol2] : in_both) { - result.MaybeAddEdgeDiff("", (*this)(symbol1, symbol2)); - } - + CompareNodes(result, *this, x1.symbols, x2.symbols); + CompareNodes(result, *this, x1.types, x2.types); return result; } diff --git a/comparison.h b/comparison.h index 2456392..724c0ca 100644 --- a/comparison.h +++ b/comparison.h @@ -191,7 +191,7 @@ struct Result { struct HashComparison { size_t operator()(const Comparison& comparison) const { size_t seed = 0; - std::hash<std::optional<Id>> h; + const std::hash<std::optional<Id>> h; combine_hash(seed, h(comparison.first)); combine_hash(seed, h(comparison.second)); return seed; @@ -13,7 +13,7 @@ stg [-m|--metrics] [-i|--info] [-d|--keep-duplicates] - [-u|--unstable] + [-t|--types] [-S|--symbols <filter>] [--skip-dwarf] [-a|--abi|-b|--btf|-e|--elf|-s|--stg] [file] ... @@ -36,9 +36,14 @@ The tool can be passed any number of inputs to combine into a unified ABI. * `-a|--abi` - Read ABI XML representation generated by libabigail's `abidw`. It is very - strongly recommended that ABI XML be passed through `abitidy --all` first as - `stgdiff` cannot resolve issues such as duplicate types in "untidy" XML. + Read ABI XML representation generated by libabigail's `abidw`. Not all ABI + XML features are consumed. Some XML "tidying" is performed before parsing: + + * types with naming typedefs are re-anonymised + * (rare) duplicate data members are removed + * (partial and entire) duplicate type definitions are removed + + After parsing, function parameter and return type qualifiers are removed. * `-b|--btf` @@ -63,6 +68,11 @@ The tool can be passed any number of inputs to combine into a unified ABI. ### Options +* `--types` + + Captures all named types found in ELF files as interface types, regardless + of whether those types are reachable by any symbol. + * `--skip-dwarf` Disable DWARF processing, when reading ELF files. For other formats this @@ -70,9 +80,16 @@ The tool can be passed any number of inputs to combine into a unified ABI. ## Merge -If multiple (or zero) inputs are provided, then a symbol merge operation is run. +If multiple (or zero) inputs are provided, then ABI roots from all inputs are +merged. + +### Symbols -The resulting ABI has the union of the inputs' symbols, which must be disjoint. +Symbols must be disjoint across all inputs. + +### Types + +Merging is not yet supported with type roots. ## Filter @@ -117,11 +134,6 @@ nodes that are recursively equal. By default, duplicate nodes are eliminated. NOTE: The `.stg` format is still novel and subject to change. -* `-u|--unstable`: Use internal node ids in the ABI representation, instead of - stabilised external node ids. - - This is intended for debugging purposes. - ## Diagnostics * `-m|--metrics` diff --git a/doc/stgdiff.md b/doc/stgdiff.md index c9ae3d0..a2c613c 100644 --- a/doc/stgdiff.md +++ b/doc/stgdiff.md @@ -11,6 +11,7 @@ stgdiff [-a|--abi|-b|--btf|-e|--elf|-s|--stg] file1 [-a|--abi|-b|--btf|-e|--elf|-s|--stg] file2 [-x|--exact] + [-t|--types] [--skip-dwarf] [{-i|--ignore} <ignore-option>] ... [{-f|--format} <output-format>] ... @@ -28,9 +29,14 @@ ignore options: type_declaration_status symbol_type_presence primitive_type_enco * `-a|--abi` - Read ABI XML representation generated by libabigail's `abidw`. It is very - strongly recommended that ABI XML be passed through `abitidy --all` first as - `stgdiff` cannot resolve issues such as duplicate types in "untidy" XML. + Read ABI XML representation generated by libabigail's `abidw`. Not all ABI + XML features are consumed. Some XML "tidying" is performed before parsing: + + * types with naming typedefs are re-anonymised + * (rare) duplicate data members are removed + * (partial and entire) duplicate type definitions are removed + + After parsing, function parameter and return type qualifiers are removed. * `-b|--btf` @@ -55,6 +61,11 @@ ignore options: type_declaration_status symbol_type_presence primitive_type_enco ### Options +* `--types` + + Captures all named types found in ELF files as interface types, regardless + of whether those types are reachable by any symbol. + * `--skip-dwarf` Disable DWARF processing, when reading ELF files. For other formats this @@ -215,7 +226,7 @@ symbol table nodes. ## Exact Node Equality -** `-x|--exact`: perform exact node equality (ignoring node identity) instead +* `-x|--exact`: perform exact node equality (ignoring node identity) instead of generating an ABI equivalence diff graph; no outputs may be specified. ## Other options: @@ -270,3 +281,10 @@ return 0. Otherwise: ``` stgdiff -s abi.0.stg abi.1.stg -F - ``` + +* Compare symbols and named types in two ELF files and print a short report to + stdout: + + ``` + stgdiff -f short -t -e example1.o example2.o -o - + ``` diff --git a/elf_reader.cc b/elf_reader.cc index 2b9ee27..9afb8c6 100644 --- a/elf_reader.cc +++ b/elf_reader.cc @@ -19,16 +19,14 @@ #include "elf_reader.h" -#include <algorithm> #include <functional> +#include <iomanip> #include <ios> #include <iostream> -#include <iterator> #include <map> #include <optional> #include <string> #include <string_view> -#include <unordered_set> #include <utility> #include <vector> @@ -37,8 +35,10 @@ #include "elf_loader.h" #include "equality.h" #include "equality_cache.h" +#include "error.h" #include "graph.h" #include "metrics.h" +#include "reader_options.h" #include "type_normalisation.h" #include "type_resolution.h" @@ -48,6 +48,25 @@ namespace internal { namespace { +struct IsTypeDefined { + bool operator()(const Typedef&) const { + return true; + } + + bool operator()(const StructUnion& x) const { + return x.definition.has_value(); + } + + bool operator()(const Enumeration& x) const { + return x.definition.has_value(); + } + + template <typename Node> + bool operator()(const Node&) const { + Die() << "expected a Typedef/StructUnion/Enumeration node"; + } +}; + template <typename M, typename K> std::optional<typename M::mapped_type> MaybeGet(const M& map, const K& key) { const auto it = map.find(key); @@ -182,10 +201,12 @@ class Typing { equality_cache_(metrics), equals_(graph, equality_cache_) {} - void GetTypesFromDwarf(dwarf::Handler& dwarf, bool is_little_endian_binary) { + const std::vector<Id>& GetTypesFromDwarf(dwarf::Handler& dwarf, + bool is_little_endian_binary) { types_ = dwarf::Process(dwarf, is_little_endian_binary, graph_); ResolveTypes(); FillAddressToId(); + return types_.named_type_ids; } void ResolveTypes() { @@ -284,22 +305,20 @@ class Typing { class Reader { public: - Reader(Graph& graph, const std::string& path, bool process_dwarf, - bool verbose, Metrics& metrics) + Reader(Graph& graph, const std::string& path, ReadOptions options, + Metrics& metrics) : graph_(graph), dwarf_(path), - elf_(dwarf_.GetElf(), verbose), - process_dwarf_(process_dwarf), - verbose_(verbose), + elf_(dwarf_.GetElf(), options.Test(ReadOptions::INFO)), + options_(options), typing_(graph_, metrics) {} - Reader(Graph& graph, char* data, size_t size, bool process_dwarf, - bool verbose, Metrics& metrics) + Reader(Graph& graph, char* data, size_t size, ReadOptions options, + Metrics& metrics) : graph_(graph), dwarf_(data, size), - elf_(dwarf_.GetElf(), verbose), - process_dwarf_(process_dwarf), - verbose_(verbose), + elf_(dwarf_.GetElf(), options.Test(ReadOptions::INFO)), + options_(options), typing_(graph_, metrics) {} Id Read(); @@ -311,8 +330,7 @@ class Reader { // an Elf* from dwarf::Handler without owning it. dwarf::Handler dwarf_; elf::ElfLoader elf_; - bool process_dwarf_; - bool verbose_; + ReadOptions options_; // Data extracted from ELF CRCValuesMap crc_values_; @@ -322,7 +340,7 @@ class Reader { Id Reader::Read() { const auto all_symbols = elf_.GetElfSymbols(); - if (verbose_) { + if (options_.Test(ReadOptions::INFO)) { std::cout << "Parsed " << all_symbols.size() << " symbols\n"; } @@ -345,7 +363,7 @@ Id Reader::Read() { namespaces_ = GetNamespacesMap(all_symbols, elf_); } - if (verbose_) { + if (options_.Test(ReadOptions::INFO)) { std::cout << "File has " << public_functions_and_variables.size() << " public functions and variables:\n"; for (const auto& symbol : public_functions_and_variables) { @@ -356,8 +374,22 @@ Id Reader::Read() { } } - if (process_dwarf_) { - typing_.GetTypesFromDwarf(dwarf_, elf_.IsLittleEndianBinary()); + std::map<std::string, Id> types_map; + if (!options_.Test(ReadOptions::SKIP_DWARF)) { + const auto& named_type_ids = + typing_.GetTypesFromDwarf(dwarf_, elf_.IsLittleEndianBinary()); + if (options_.Test(ReadOptions::TYPE_ROOTS)) { + const IsTypeDefined is_type_defined; + const InterfaceKey get_key(graph_); + for (const auto id : named_type_ids) { + if (graph_.Apply<bool>(is_type_defined, id)) { + const auto [it, inserted] = types_map.emplace(get_key(id), id); + if (!inserted) { + Die() << "found conflicting interface type: " << it->first; + } + } + } + } } std::map<std::string, Id> symbols_map; @@ -369,7 +401,8 @@ Id Reader::Read() { std::string(symbol.name), graph_.Add<ElfSymbol>(SymbolTableEntryToElfSymbol(symbol))); } - auto root = graph_.Add<Interface>(std::move(symbols_map)); + auto root = + graph_.Add<Interface>(std::move(symbols_map), std::move(types_map)); // Types produced by ELF/DWARF readers may require removing useless // qualifiers. RemoveUselessQualifiers(graph_, root); @@ -397,15 +430,14 @@ ElfSymbol Reader::SymbolTableEntryToElfSymbol( } // namespace } // namespace internal -Id Read(Graph& graph, const std::string& path, bool process_dwarf, - bool verbose, Metrics& metrics) { - return internal::Reader(graph, path, process_dwarf, verbose, metrics).Read(); +Id Read(Graph& graph, const std::string& path, ReadOptions options, + Metrics& metrics) { + return internal::Reader(graph, path, options, metrics).Read(); } -Id Read(Graph& graph, char* data, size_t size, bool process_dwarf, - bool verbose, Metrics& metrics) { - return internal::Reader(graph, data, size, process_dwarf, verbose, metrics) - .Read(); +Id Read(Graph& graph, char* data, size_t size, ReadOptions options, + Metrics& metrics) { + return internal::Reader(graph, data, size, options, metrics).Read(); } } // namespace elf diff --git a/elf_reader.h b/elf_reader.h index ca6f3c7..0740a39 100644 --- a/elf_reader.h +++ b/elf_reader.h @@ -29,14 +29,15 @@ #include "elf_loader.h" #include "graph.h" #include "metrics.h" +#include "reader_options.h" namespace stg { namespace elf { -Id Read(Graph& graph, const std::string& path, bool process_dwarf, - bool verbose, Metrics& metrics); -Id Read(Graph& graph, char* data, size_t size, bool process_dwarf, - bool verbose, Metrics& metrics); +Id Read(Graph& graph, const std::string& path, ReadOptions options, + Metrics& metrics); +Id Read(Graph& graph, char* data, size_t size, ReadOptions options, + Metrics& metrics); // For unit tests only namespace internal { @@ -22,7 +22,6 @@ #include <map> #include <vector> -#include <utility> #include "graph.h" #include "scc.h" @@ -216,7 +215,8 @@ struct Equals { } bool operator()(const Interface& x1, const Interface& x2) { - return (*this)(x1.symbols, x2.symbols); + return (*this)(x1.symbols, x2.symbols) + && (*this)(x1.types, x2.types); } bool Mismatch() { diff --git a/equality_cache.h b/equality_cache.h index 370520a..35d58a8 100644 --- a/equality_cache.h +++ b/equality_cache.h @@ -77,8 +77,8 @@ struct EqualityCache { ++query_unequal_hashes; return std::make_optional(false); } - Id fid1 = Find(id1); - Id fid2 = Find(id2); + const Id fid1 = Find(id1); + const Id fid2 = Find(id2); if (fid1 == fid2) { ++query_equal_representatives; return std::make_optional(true); @@ -192,8 +192,8 @@ struct EqualityCache { ++disunion_known_hash; return; } - Id fid1 = Find(id1); - Id fid2 = Find(id2); + const Id fid1 = Find(id1); + const Id fid2 = Find(id2); Check(fid1 != fid2) << "disunion of equal"; if (inequalities[fid1].insert(fid2).second) { inequalities[fid2].insert(fid1); diff --git a/fidelity.cc b/fidelity.cc index f5b5718..d25e1c4 100644 --- a/fidelity.cc +++ b/fidelity.cc @@ -20,6 +20,7 @@ #include "fidelity.h" #include <algorithm> +#include <map> #include <ostream> #include <set> #include <string> @@ -73,6 +74,7 @@ struct Fidelity { void operator()(Id); void operator()(const std::vector<Id>&); + void operator()(const std::map<std::string, Id>&); void operator()(const Void&, Id); void operator()(const Variadic&, Id); void operator()(const PointerReference&, Id); @@ -109,6 +111,12 @@ void Fidelity::operator()(const std::vector<Id>& x) { } } +void Fidelity::operator()(const std::map<std::string, Id>& x) { + for (const auto& [_, id] : x) { + (*this)(id); + } +} + void Fidelity::operator()(const Void&, Id) {} void Fidelity::operator()(const Variadic&, Id) {} @@ -188,9 +196,8 @@ void Fidelity::operator()(const ElfSymbol& x, Id) { } void Fidelity::operator()(const Interface& x, Id) { - for (const auto& [_, id] : x.symbols) { - (*this)(id); - } + (*this)(x.symbols); + (*this)(x.types); } template <typename T> diff --git a/fingerprint.cc b/fingerprint.cc index bd3494c..7a26aac 100644 --- a/fingerprint.cc +++ b/fingerprint.cc @@ -19,12 +19,15 @@ #include "fingerprint.h" +#include <map> #include <string> #include <unordered_map> #include <unordered_set> #include <utility> #include <vector> +#include "graph.h" +#include "hashing.h" #include "scc.h" namespace stg { @@ -136,9 +139,8 @@ struct Hasher { } HashValue operator()(const Interface& x) { - for (const auto& [name, symbol] : x.symbols) { - todo.insert(symbol); - } + ToDo(x.symbols); + ToDo(x.types); return hash('Z'); } @@ -193,6 +195,12 @@ struct Hasher { } } + void ToDo(const std::map<std::string, Id>& ids) { + for (const auto& [_, id] : ids) { + todo.insert(id); + } + } + const Graph& graph; std::unordered_map<Id, HashValue>& hashes; std::unordered_set<Id> &todo; diff --git a/fuzz/abigail_reader_fuzzer.cc b/fuzz/abigail_reader_fuzzer.cc index 637eef8..2e01b94 100644 --- a/fuzz/abigail_reader_fuzzer.cc +++ b/fuzz/abigail_reader_fuzzer.cc @@ -31,8 +31,9 @@ extern "C" int LLVMFuzzerTestOneInput(char* data, size_t size) { xmlParserCtxtPtr ctxt = xmlNewParserCtxt(); // Suppress libxml error messages. xmlSetGenericErrorFunc(ctxt, (xmlGenericErrorFunc) DoNothing); - xmlDocPtr doc = xmlCtxtReadMemory(ctxt, data, size, nullptr, nullptr, - XML_PARSE_NOERROR | XML_PARSE_NOWARNING); + xmlDocPtr doc = xmlCtxtReadMemory( + ctxt, data, size, nullptr, nullptr, + XML_PARSE_NOERROR | XML_PARSE_NONET | XML_PARSE_NOWARNING); xmlFreeParserCtxt(ctxt); // Bail out if the doc XML is invalid. diff --git a/fuzz/elf_reader_fuzzer.cc b/fuzz/elf_reader_fuzzer.cc index 51d16dd..e4fc087 100644 --- a/fuzz/elf_reader_fuzzer.cc +++ b/fuzz/elf_reader_fuzzer.cc @@ -24,6 +24,7 @@ #include "error.h" #include "graph.h" #include "metrics.h" +#include "reader_options.h" extern "C" int LLVMFuzzerTestOneInput(char* data, size_t size) { try { @@ -33,8 +34,7 @@ extern "C" int LLVMFuzzerTestOneInput(char* data, size_t size) { std::vector<char> data_copy(data, data + size); stg::Graph graph; stg::Metrics metrics; - stg::elf::Read(graph, data_copy.data(), size, /* process_dwarf= */ true, - /* verbose= */ false, metrics); + stg::elf::Read(graph, data_copy.data(), size, stg::ReadOptions(), metrics); } catch (const stg::Exception&) { // Pass as this is us catching invalid ELF properly. } @@ -28,6 +28,7 @@ #include <map> #include <optional> #include <ostream> +#include <sstream> #include <string> #include <type_traits> #include <utility> @@ -312,8 +313,12 @@ std::ostream& operator<<(std::ostream& os, ElfSymbol::CRC crc); struct Interface { explicit Interface(const std::map<std::string, Id>& symbols) : symbols(symbols) {} + Interface(const std::map<std::string, Id>& symbols, + const std::map<std::string, Id>& types) + : symbols(symbols), types(types) {} std::map<std::string, Id> symbols; + std::map<std::string, Id> types; }; std::ostream& operator<<(std::ostream& os, Primitive::Encoding encoding); @@ -644,6 +649,45 @@ Result Graph::Apply(FunctionObject& function, Id id, Args&&... args) { adapter, id, std::forward<Args>(args)...); } +struct InterfaceKey { + explicit InterfaceKey(const Graph& graph) : graph(graph) {} + + std::string operator()(Id id) const { + return graph.Apply<std::string>(*this, id); + } + + std::string operator()(const stg::Typedef& x) const { + return x.name; + } + + std::string operator()(const stg::StructUnion& x) const { + if (x.name.empty()) { + Die() << "anonymous struct/union interface type"; + } + std::ostringstream os; + os << x.kind << ' ' << x.name; + return os.str(); + } + + std::string operator()(const stg::Enumeration& x) const { + if (x.name.empty()) { + Die() << "anonymous enum interface type"; + } + return "enum " + x.name; + } + + std::string operator()(const stg::ElfSymbol& x) const { + return VersionedSymbolName(x); + } + + template <typename Node> + std::string operator()(const Node&) const { + Die() << "unexpected interface type"; + } + + const Graph& graph; +}; + } // namespace stg #endif // STG_GRAPH_H_ @@ -69,8 +69,8 @@ struct Hash { // Hash unsigned 64 bits by splitting, hashing and combining. constexpr HashValue operator()(uint64_t x) const { - uint32_t lo = x; - uint32_t hi = x >> 32; + const uint32_t lo = x; + const uint32_t hi = x >> 32; return (*this)(lo, hi); } @@ -122,8 +122,8 @@ struct Hash { // Reverse order Boost hash_combine (must be used with good hashes). template <typename Arg, typename... Args> constexpr HashValue operator()(Arg arg, Args... args) const { - uint32_t seed = (*this)(args...).value; - uint32_t hash = (*this)(arg).value; + const uint32_t seed = (*this)(args...).value; + const uint32_t hash = (*this)(arg).value; return HashValue(seed ^ (hash + 0x9e3779b9 + (seed << 6) + (seed >> 2))); } }; @@ -25,11 +25,12 @@ #include "graph.h" #include "metrics.h" #include "proto_reader.h" +#include "reader_options.h" namespace stg { -Id Read(Graph& graph, InputFormat format, const char* input, bool process_dwarf, - bool info, Metrics& metrics) { +Id Read(Graph& graph, InputFormat format, const char* input, + ReadOptions options, Metrics& metrics) { switch (format) { case InputFormat::ABI: { Time read(metrics, "read ABI"); @@ -37,11 +38,11 @@ Id Read(Graph& graph, InputFormat format, const char* input, bool process_dwarf, } case InputFormat::BTF: { Time read(metrics, "read BTF"); - return btf::ReadFile(graph, input, info); + return btf::ReadFile(graph, input, options); } case InputFormat::ELF: { Time read(metrics, "read ELF"); - return elf::Read(graph, input, process_dwarf, info, metrics); + return elf::Read(graph, input, options, metrics); } case InputFormat::STG: { Time read(metrics, "read STG"); @@ -22,13 +22,14 @@ #include "graph.h" #include "metrics.h" +#include "reader_options.h" namespace stg { enum class InputFormat { ABI, BTF, ELF, STG }; -Id Read(Graph& graph, InputFormat format, const char* input, bool process_dwarf, - bool info, Metrics& metrics); +Id Read(Graph& graph, InputFormat format, const char* input, + ReadOptions options, Metrics& metrics); } // namespace stg diff --git a/proto_reader.cc b/proto_reader.cc index c5899f5..61f5c5b 100644 --- a/proto_reader.cc +++ b/proto_reader.cc @@ -21,6 +21,7 @@ #include <algorithm> #include <array> +#include <cerrno> #include <cstdint> #include <fstream> #include <map> @@ -32,6 +33,7 @@ #include <google/protobuf/io/zero_copy_stream_impl.h> #include <google/protobuf/repeated_field.h> +#include <google/protobuf/repeated_ptr_field.h> #include <google/protobuf/text_format.h> #include "error.h" #include "graph.h" @@ -67,10 +69,14 @@ struct Transformer { void AddNode(const Function&); void AddNode(const ElfSymbol&); void AddNode(const Symbols&); + void AddNode(const Interface&); template <typename STGType, typename... Args> void AddNode(Args&&...); std::vector<Id> Transform(const google::protobuf::RepeatedField<uint32_t>&); + template <typename GetKey> + std::map<std::string, Id> Transform(GetKey, + const google::protobuf::RepeatedField<uint32_t>&); stg::PointerReference::Kind Transform(PointerReference::Kind); stg::Qualifier Transform(Qualified::Qualifier); stg::Primitive::Encoding Transform(Primitive::Encoding); @@ -107,9 +113,8 @@ Id Transformer::Transform(const proto::STG& x) { AddNodes(x.enumeration()); AddNodes(x.function()); AddNodes(x.elf_symbol()); - if (x.has_symbols()) { - AddNode(x.symbols()); - } + AddNodes(x.symbols()); + AddNodes(x.interface()); return GetId(x.root_id()); } @@ -239,6 +244,12 @@ void Transformer::AddNode(const Symbols& x) { AddNode<stg::Interface>(GetId(x.id()), symbols); } +void Transformer::AddNode(const Interface& x) { + const InterfaceKey get_key(graph); + AddNode<stg::Interface>(GetId(x.id()), Transform(get_key, x.symbol_id()), + Transform(get_key, x.type_id())); +} + template <typename STGType, typename... Args> void Transformer::AddNode(Args&&... args) { graph.Set<STGType>(Transform(args)...); @@ -254,6 +265,20 @@ std::vector<Id> Transformer::Transform( return result; } +template <typename GetKey> +std::map<std::string, Id> Transformer::Transform( + GetKey get_key, const google::protobuf::RepeatedField<uint32_t>& ids) { + std::map<std::string, Id> result; + for (auto id : ids) { + const Id stg_id = GetId(id); + const auto [it, inserted] = result.emplace(get_key(stg_id), stg_id); + if (!inserted) { + Die() << "found conflicting interface nodes: " << it->first << '\n'; + } + } + return result; +} + stg::PointerReference::Kind Transformer::Transform(PointerReference::Kind x) { switch (x) { case PointerReference::POINTER: @@ -411,7 +436,7 @@ Type Transformer::Transform(const Type& x) { return x; } -const std::array<uint32_t, 1> kSupportedFormatVersions = {0}; +const std::array<uint32_t, 2> kSupportedFormatVersions = {0, 1}; void CheckFormatVersion(uint32_t version, std::optional<std::string> path) { Check(std::count(kSupportedFormatVersions.begin(), diff --git a/proto_writer.cc b/proto_writer.cc index 9a8b683..503fa17 100644 --- a/proto_writer.cc +++ b/proto_writer.cc @@ -286,10 +286,13 @@ void Transform<MapId>::operator()(const stg::ElfSymbol& x, uint32_t id) { template <typename MapId> void Transform<MapId>::operator()(const stg::Interface& x, uint32_t id) { - auto& symbols = *stg.mutable_symbols(); - symbols.set_id(id); - for (const auto& [symbol, id] : x.symbols) { - (*symbols.mutable_symbol())[symbol] = (*this)(id); + auto& interface = *stg.add_interface(); + interface.set_id(id); + for (const auto& [_, id] : x.symbols) { + interface.add_symbol_id((*this)(id)); + } + for (const auto& [_, id] : x.types) { + interface.add_type_id((*this)(id)); } } @@ -461,7 +464,7 @@ class HexPrinter : public google::protobuf::TextFormat::FastFieldValuePrinter { } }; -const uint32_t kWrittenFormatVersion = 0; +const uint32_t kWrittenFormatVersion = 1; } // namespace @@ -475,14 +478,9 @@ void Print(const STG& stg, std::ostream& os) { void Writer::Write(const Id& root, std::ostream& os) { proto::STG stg; - if (stable) { - StableId stable_id(graph_); - stg.set_root_id(Transform<StableId>(graph_, stg, stable_id)(root)); - SortNodes(stg); - } else { - auto get_id = [](Id id) { return id.ix_; }; - stg.set_root_id(Transform<decltype(get_id)>(graph_, stg, get_id)(root)); - } + StableId stable_id(graph_); + stg.set_root_id(Transform<StableId>(graph_, stg, stable_id)(root)); + SortNodes(stg); stg.set_version(kWrittenFormatVersion); Print(stg, os); } diff --git a/proto_writer.h b/proto_writer.h index cacbb38..ff5be85 100644 --- a/proto_writer.h +++ b/proto_writer.h @@ -30,13 +30,12 @@ namespace proto { class Writer { public: - Writer(const stg::Graph& graph, bool stable) - : graph_(graph), stable(stable) {} + Writer(const stg::Graph& graph) + : graph_(graph) {} void Write(const Id&, std::ostream&); private: const stg::Graph& graph_; - const bool stable; }; } // namespace proto diff --git a/reader_options.h b/reader_options.h new file mode 100644 index 0000000..8b6736a --- /dev/null +++ b/reader_options.h @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// -*- mode: C++ -*- +// +// Copyright 2023 Google LLC +// +// Licensed under the Apache License v2.0 with LLVM Exceptions (the +// "License"); you may not use this file except in compliance with the +// License. You may obtain a copy of the License at +// +// https://llvm.org/LICENSE.txt +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Author: Siddharth Nayyar + +#ifndef STG_READER_OPTIONS_H_ +#define STG_READER_OPTIONS_H_ + +#include <type_traits> + +namespace stg { + +struct ReadOptions { + enum Value { + INFO = 1 << 0, + SKIP_DWARF = 1 << 1, + TYPE_ROOTS = 1 << 2, + }; + + using Bitset = std::underlying_type_t<Value>; + + ReadOptions() = default; + template <typename... Values> + explicit ReadOptions(Values... values) { + for (auto value : {values...}) { + Set(value); + } + } + + void Set(Value other) { + bitset |= static_cast<Bitset>(other); + } + + bool Test(Value other) const { + return static_cast<bool>(bitset & static_cast<Bitset>(other)); + } + + Bitset bitset = 0; +}; + +} // namespace stg + +#endif // STG_READER_OPTIONS_H_ @@ -27,6 +27,7 @@ #include <memory> #include <ostream> #include <string> +#include <utility> #include <vector> #include "deduplication.h" @@ -36,6 +37,7 @@ #include "input.h" #include "metrics.h" #include "proto_writer.h" +#include "reader_options.h" #include "symbol_filter.h" #include "type_resolution.h" @@ -45,12 +47,12 @@ namespace { Metrics metrics; struct GetInterface { - const std::map<std::string, Id>& operator()(const Interface& x) { - return x.symbols; + Interface& operator()(Interface& x) { + return x; } template <typename Node> - const std::map<std::string, Id>& operator()(const Node&) { + Interface& operator()(Node&) { Die() << "expected an Interface root node"; } }; @@ -59,8 +61,12 @@ Id Merge(Graph& graph, const std::vector<Id>& roots) { std::map<std::string, Id> symbols; GetInterface get; for (auto root : roots) { - for (const auto& x : - graph.Apply<const std::map<std::string, Id>&>(get, root)) { + const auto& interface = graph.Apply<Interface&>(get, root); + // TODO: Implement merging interfaces with type roots. + if (!interface.types.empty()) { + Die() << "merging interfaces with type roots is not yet supported"; + } + for (const auto& x : interface.symbols) { if (!symbols.insert(x).second) { Die() << "merge failed with duplicate symbol: " << x.first; } @@ -73,21 +79,20 @@ Id Merge(Graph& graph, const std::vector<Id>& roots) { void Filter(Graph& graph, Id root, const SymbolFilter& filter) { std::map<std::string, Id> symbols; GetInterface get; - for (const auto& x : - graph.Apply<const std::map<std::string, Id>&>(get, root)) { + auto& interface = graph.Apply<Interface&>(get, root); + for (const auto& x : interface.symbols) { if (filter(x.first)) { symbols.insert(x); } } - graph.Unset(root); - graph.Set<Interface>(root, symbols); + std::swap(interface.symbols, symbols); } -void Write(const Graph& graph, Id root, const char* output, bool stable) { +void Write(const Graph& graph, Id root, const char* output) { std::ofstream os(output); { Time x(metrics, "write"); - proto::Writer writer(graph, stable); + proto::Writer writer(graph); writer.Write(root, os); os << std::flush; } @@ -105,11 +110,9 @@ int main(int argc, char* argv[]) { }; // Process arguments. bool opt_metrics = false; - bool opt_info = false; bool opt_keep_duplicates = false; - bool opt_unstable = false; std::unique_ptr<stg::SymbolFilter> opt_symbols; - bool opt_skip_dwarf = false; + stg::ReadOptions opt_read_options; stg::InputFormat opt_input_format = stg::InputFormat::ABI; std::vector<const char*> inputs; std::vector<const char*> outputs; @@ -117,7 +120,7 @@ int main(int argc, char* argv[]) { {"metrics", no_argument, nullptr, 'm' }, {"info", no_argument, nullptr, 'i' }, {"keep-duplicates", no_argument, nullptr, 'd' }, - {"unstable", no_argument, nullptr, 'u' }, + {"types", no_argument, nullptr, 't' }, {"symbols", required_argument, nullptr, 'S' }, {"abi", no_argument, nullptr, 'a' }, {"btf", no_argument, nullptr, 'b' }, @@ -132,7 +135,7 @@ int main(int argc, char* argv[]) { << " [-m|--metrics]\n" << " [-i|--info]\n" << " [-d|--keep-duplicates]\n" - << " [-u|--unstable]\n" + << " [-t|--types]\n" << " [-S|--symbols <filter>]\n" << " [--skip-dwarf]\n" << " [-a|--abi|-b|--btf|-e|--elf|-s|--stg] [file] ...\n" @@ -143,7 +146,7 @@ int main(int argc, char* argv[]) { }; while (true) { int ix; - int c = getopt_long(argc, argv, "-miduS:abeso:", opts, &ix); + const int c = getopt_long(argc, argv, "-midtS:abeso:", opts, &ix); if (c == -1) { break; } @@ -153,13 +156,13 @@ int main(int argc, char* argv[]) { opt_metrics = true; break; case 'i': - opt_info = true; + opt_read_options.Set(stg::ReadOptions::INFO); break; case 'd': opt_keep_duplicates = true; break; - case 'u': - opt_unstable = true; + case 't': + opt_read_options.Set(stg::ReadOptions::TYPE_ROOTS); break; case 'S': opt_symbols = stg::MakeSymbolFilter(argument); @@ -186,7 +189,7 @@ int main(int argc, char* argv[]) { outputs.push_back(argument); break; case kSkipDwarf: - opt_skip_dwarf = true; + opt_read_options.Set(stg::ReadOptions::SKIP_DWARF); break; default: return usage(); @@ -199,7 +202,7 @@ int main(int argc, char* argv[]) { roots.reserve(inputs.size()); for (auto input : inputs) { roots.push_back(stg::Read(graph, opt_input_format, input, - !opt_skip_dwarf, opt_info, stg::metrics)); + opt_read_options, stg::metrics)); } stg::Id root = roots.size() == 1 ? roots[0] : stg::Merge(graph, roots); if (opt_symbols) { @@ -211,7 +214,7 @@ int main(int argc, char* argv[]) { root = stg::Deduplicate(graph, root, hashes, stg::metrics); } for (auto output : outputs) { - stg::Write(graph, root, output, !opt_unstable); + stg::Write(graph, root, output); } if (opt_metrics) { stg::Report(stg::metrics, std::cerr); @@ -239,6 +239,12 @@ message Symbols { map<string, fixed32> symbol = 2; } +message Interface { + fixed32 id = 1; + repeated fixed32 symbol_id = 2; + repeated fixed32 type_id = 3; +} + message STG { uint32 version = 1; fixed32 root_id = 2; @@ -257,5 +263,6 @@ message STG { repeated Enumeration enumeration = 15; repeated Function function = 16; repeated ElfSymbol elf_symbol = 17; - optional Symbols symbols = 18; + repeated Symbols symbols = 18; + repeated Interface interface = 19; } @@ -23,14 +23,10 @@ #include <cstddef> #include <cstring> -#include <deque> #include <fstream> #include <iostream> -#include <memory> #include <optional> #include <ostream> -#include <string> -#include <unordered_map> #include <unordered_set> #include <utility> #include <vector> @@ -38,9 +34,10 @@ #include "equality.h" #include "error.h" #include "fidelity.h" -#include "input.h" #include "graph.h" +#include "input.h" #include "metrics.h" +#include "reader_options.h" #include "reporting.h" namespace { @@ -56,11 +53,10 @@ using Outputs = std::vector<std::pair<stg::reporting::OutputFormat, const char*>>; std::vector<stg::Id> Read(const Inputs& inputs, stg::Graph& graph, - bool process_dwarf, stg::Metrics& metrics) { + stg::ReadOptions options, stg::Metrics& metrics) { std::vector<stg::Id> roots; for (const auto& [format, filename] : inputs) { - roots.push_back(stg::Read(graph, format, filename, process_dwarf, - /* info = */ false, metrics)); + roots.push_back(stg::Read(graph, format, filename, options, metrics)); } return roots; } @@ -79,9 +75,10 @@ int RunFidelity(const char* filename, const stg::Graph& graph, : 0; } -int RunExact(const Inputs& inputs, bool process_dwarf, stg::Metrics& metrics) { +int RunExact(const Inputs& inputs, stg::ReadOptions options, + stg::Metrics& metrics) { stg::Graph graph; - const auto roots = Read(inputs, graph, process_dwarf, metrics); + const auto roots = Read(inputs, graph, options, metrics); struct PairCache { std::optional<bool> Query(const stg::Pair& comparison) const { @@ -106,11 +103,11 @@ int RunExact(const Inputs& inputs, bool process_dwarf, stg::Metrics& metrics) { } int Run(const Inputs& inputs, const Outputs& outputs, stg::Ignore ignore, - bool process_dwarf, std::optional<const char*> fidelity, + stg::ReadOptions options, std::optional<const char*> fidelity, stg::Metrics& metrics) { // Read inputs. stg::Graph graph; - const auto roots = Read(inputs, graph, process_dwarf, metrics); + const auto roots = Read(inputs, graph, options, metrics); // Compute differences. stg::Compare compare{graph, ignore, metrics}; @@ -158,7 +155,7 @@ int main(int argc, char* argv[]) { // Process arguments. bool opt_metrics = false; bool opt_exact = false; - bool opt_skip_dwarf = false; + stg::ReadOptions opt_read_options; std::optional<const char*> opt_fidelity = std::nullopt; stg::Ignore opt_ignore; stg::InputFormat opt_input_format = stg::InputFormat::ABI; @@ -173,6 +170,7 @@ int main(int argc, char* argv[]) { {"elf", no_argument, nullptr, 'e' }, {"stg", no_argument, nullptr, 's' }, {"exact", no_argument, nullptr, 'x' }, + {"types", no_argument, nullptr, 't' }, {"ignore", required_argument, nullptr, 'i' }, {"format", required_argument, nullptr, 'f' }, {"output", required_argument, nullptr, 'o' }, @@ -186,6 +184,7 @@ int main(int argc, char* argv[]) { << " [-a|--abi|-b|--btf|-e|--elf|-s|--stg] file1\n" << " [-a|--abi|-b|--btf|-e|--elf|-s|--stg] file2\n" << " [-x|--exact]\n" + << " [-t|--types]\n" << " [--skip-dwarf]\n" << " [{-i|--ignore} <ignore-option>] ...\n" << " [{-f|--format} <output-format>] ...\n" @@ -199,7 +198,7 @@ int main(int argc, char* argv[]) { }; while (true) { int ix; - int c = getopt_long(argc, argv, "-mabesxi:f:o:F:", opts, &ix); + const int c = getopt_long(argc, argv, "-mabesxti:f:o:F:", opts, &ix); if (c == -1) { break; } @@ -223,6 +222,9 @@ int main(int argc, char* argv[]) { case 'x': opt_exact = true; break; + case 't': + opt_read_options.Set(stg::ReadOptions::TYPE_ROOTS); + break; case 1: inputs.emplace_back(opt_input_format, argument); break; @@ -257,7 +259,7 @@ int main(int argc, char* argv[]) { opt_fidelity.emplace(argument); break; case kSkipDwarf: - opt_skip_dwarf = true; + opt_read_options.Set(stg::ReadOptions::SKIP_DWARF); break; default: return usage(); @@ -268,9 +270,9 @@ int main(int argc, char* argv[]) { } try { - const int status = opt_exact ? RunExact(inputs, !opt_skip_dwarf, metrics) + const int status = opt_exact ? RunExact(inputs, opt_read_options, metrics) : Run(inputs, outputs, opt_ignore, - !opt_skip_dwarf, opt_fidelity, metrics); + opt_read_options, opt_fidelity, metrics); if (opt_metrics) { stg::Report(metrics, std::cerr); } diff --git a/stgdiff_test.cc b/stgdiff_test.cc index f8d93f7..7610883 100644 --- a/stgdiff_test.cc +++ b/stgdiff_test.cc @@ -28,6 +28,7 @@ #include "graph.h" #include "input.h" #include "metrics.h" +#include "reader_options.h" #include "reporting.h" namespace { @@ -49,8 +50,9 @@ std::string filename_to_path(const std::string& f) { stg::Id Read(stg::Graph& graph, stg::InputFormat format, const std::string& input, stg::Metrics& metrics) { + const stg::ReadOptions opt_read_options(stg::ReadOptions::SKIP_DWARF); return stg::Read(graph, format, filename_to_path(input).c_str(), - /* process_dwarf = */ false, /* info = */ false, metrics); + opt_read_options, metrics); } TEST_CASE("ignore") { @@ -22,13 +22,13 @@ #include <getopt.h> #include <iostream> -#include <string> #include <utility> #include <vector> #include "input.h" #include "error.h" #include "metrics.h" +#include "reader_options.h" using Input = std::pair<stg::InputFormat, const char*>; @@ -36,7 +36,7 @@ int main(int argc, char* const argv[]) { enum LongOptions { kSkipDwarf = 256, }; - bool opt_skip_dwarf = false; + stg::ReadOptions opt_read_options(stg::ReadOptions::INFO); static option opts[] = { {"btf", required_argument, nullptr, 'b' }, {"elf", required_argument, nullptr, 'e' }, @@ -65,7 +65,7 @@ int main(int argc, char* const argv[]) { inputs.emplace_back(stg::InputFormat::ELF, argument); break; case kSkipDwarf: - opt_skip_dwarf = true; + opt_read_options.Set(stg::ReadOptions::SKIP_DWARF); break; default: return usage(); @@ -82,8 +82,7 @@ int main(int argc, char* const argv[]) { try { stg::Graph graph; stg::Metrics metrics; - (void)stg::Read(graph, format, filename, !opt_skip_dwarf, /* info = */ true, - metrics); + (void)stg::Read(graph, format, filename, opt_read_options, metrics); } catch (const stg::Exception& e) { std::cerr << e.what() << '\n'; return 1; diff --git a/substitution.h b/substitution.h index f68392f..adcb801 100644 --- a/substitution.h +++ b/substitution.h @@ -130,6 +130,7 @@ struct Substitute { void operator()(Interface& x) { Update(x.symbols); + Update(x.types); } Graph& graph; diff --git a/testdata/abigail_anonymous_types_0.xml b/testdata/abigail_anonymous_types_0.xml new file mode 100644 index 0000000..021a4fd --- /dev/null +++ b/testdata/abigail_anonymous_types_0.xml @@ -0,0 +1,30 @@ +<!-- + Vanilla output from libabigail. Note that the two structs were + actually anonymous in the source. Here they have been given names + and backwards links to their naming typedefs. +--> +<abi-corpus version='2.1' architecture='elf-amd-x86_64'> + <elf-function-symbols> + <elf-symbol name='fun' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> + </elf-function-symbols> + <abi-instr address-size='64' path='nt.c' language='LANG_C11'> + <type-decl name='int' size-in-bits='32' id='95e97e5e'/> + <class-decl name='foo' size-in-bits='32' is-struct='yes' naming-typedef-id='e6891164' visibility='default' id='3f46facf'> + <data-member access='public' layout-offset-in-bits='0'> + <var-decl name='x' type-id='95e97e5e' visibility='default'/> + </data-member> + </class-decl> + <typedef-decl name='foo' type-id='3f46facf' id='e6891164'/> + <class-decl name='bar' size-in-bits='32' is-struct='yes' naming-typedef-id='9d6b0219' visibility='default' id='defbd7d2'> + <data-member access='public' layout-offset-in-bits='0'> + <var-decl name='x' type-id='95e97e5e' visibility='default'/> + </data-member> + </class-decl> + <typedef-decl name='bar' type-id='defbd7d2' id='9d6b0219'/> + <function-decl name='fun' mangled-name='fun' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fun'> + <parameter type-id='e6891164' name='a'/> + <parameter type-id='9d6b0219' name='b'/> + <return type-id='95e97e5e'/> + </function-decl> + </abi-instr> +</abi-corpus> diff --git a/testdata/abigail_anonymous_types_1.xml b/testdata/abigail_anonymous_types_1.xml new file mode 100644 index 0000000..8a3d941 --- /dev/null +++ b/testdata/abigail_anonymous_types_1.xml @@ -0,0 +1,30 @@ +<!-- + Simulation of vanilla output from an older version of libabigail. + Note that the two structs are properly anonymous. They have + backwards links to their naming typedefs. +--> +<abi-corpus version='2.1' architecture='elf-amd-x86_64'> + <elf-function-symbols> + <elf-symbol name='fun' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> + </elf-function-symbols> + <abi-instr address-size='64' path='nt.c' language='LANG_C11'> + <type-decl name='int' size-in-bits='32' id='95e97e5e'/> + <class-decl name='__anonymous_struct__' is-anonymous='yes' size-in-bits='32' is-struct='yes' naming-typedef-id='e6891164' visibility='default' id='3f46facf'> + <data-member access='public' layout-offset-in-bits='0'> + <var-decl name='x' type-id='95e97e5e' visibility='default'/> + </data-member> + </class-decl> + <typedef-decl name='foo' type-id='3f46facf' id='e6891164'/> + <class-decl name='__anonymous_struct__1' is-anonymous='yes' size-in-bits='32' is-struct='yes' naming-typedef-id='9d6b0219' visibility='default' id='defbd7d2'> + <data-member access='public' layout-offset-in-bits='0'> + <var-decl name='x' type-id='95e97e5e' visibility='default'/> + </data-member> + </class-decl> + <typedef-decl name='bar' type-id='defbd7d2' id='9d6b0219'/> + <function-decl name='fun' mangled-name='fun' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fun'> + <parameter type-id='e6891164' name='a'/> + <parameter type-id='9d6b0219' name='b'/> + <return type-id='95e97e5e'/> + </function-decl> + </abi-instr> +</abi-corpus> diff --git a/testdata/abigail_anonymous_types_2.xml b/testdata/abigail_anonymous_types_2.xml new file mode 100644 index 0000000..8666c64 --- /dev/null +++ b/testdata/abigail_anonymous_types_2.xml @@ -0,0 +1,26 @@ +<!-- + Simulation of buggy output from an older version of libabigail. + The two structs were anonymous and identical but had different + naming typedefs. Nevertheless, they were considered the same and + only one survived. +--> +<abi-corpus version='2.1' architecture='elf-amd-x86_64'> + <elf-function-symbols> + <elf-symbol name='fun' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> + </elf-function-symbols> + <abi-instr address-size='64' path='nt.c' language='LANG_C11'> + <type-decl name='int' size-in-bits='32' id='95e97e5e'/> + <class-decl name='__anonymous_struct__' is-anonymous='yes' size-in-bits='32' is-struct='yes' naming-typedef-id='e6891164' visibility='default' id='3f46facf'> + <data-member access='public' layout-offset-in-bits='0'> + <var-decl name='x' type-id='95e97e5e' visibility='default'/> + </data-member> + </class-decl> + <typedef-decl name='foo' type-id='3f46facf' id='e6891164'/> + <typedef-decl name='bar' type-id='3f46facf' id='9d6b0219'/> + <function-decl name='fun' mangled-name='fun' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fun'> + <parameter type-id='e6891164' name='a'/> + <parameter type-id='9d6b0219' name='b'/> + <return type-id='95e97e5e'/> + </function-decl> + </abi-instr> +</abi-corpus> diff --git a/testdata/abigail_anonymous_types_3.xml b/testdata/abigail_anonymous_types_3.xml new file mode 100644 index 0000000..7eda3e6 --- /dev/null +++ b/testdata/abigail_anonymous_types_3.xml @@ -0,0 +1,29 @@ +<!-- + abitidy-equivalent output with reinstated and cleaned-up anonymous + types. There are no backwards links to naming typedefs. +--> +<abi-corpus version='2.1' architecture='elf-amd-x86_64'> + <elf-function-symbols> + <elf-symbol name='fun' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> + </elf-function-symbols> + <abi-instr address-size='64' path='nt.c' language='LANG_C11'> + <type-decl name='int' size-in-bits='32' id='95e97e5e'/> + <class-decl name='__anonymous_struct__' is-anonymous='yes' size-in-bits='32' is-struct='yes' visibility='default' id='3f46facf'> + <data-member access='public' layout-offset-in-bits='0'> + <var-decl name='x' type-id='95e97e5e' visibility='default'/> + </data-member> + </class-decl> + <typedef-decl name='foo' type-id='3f46facf' id='e6891164'/> + <class-decl name='__anonymous_struct__' is-anonymous='yes' size-in-bits='32' is-struct='yes' visibility='default' id='defbd7d2'> + <data-member access='public' layout-offset-in-bits='0'> + <var-decl name='x' type-id='95e97e5e' visibility='default'/> + </data-member> + </class-decl> + <typedef-decl name='bar' type-id='defbd7d2' id='9d6b0219'/> + <function-decl name='fun' mangled-name='fun' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fun'> + <parameter type-id='e6891164' name='a'/> + <parameter type-id='9d6b0219' name='b'/> + <return type-id='95e97e5e'/> + </function-decl> + </abi-instr> +</abi-corpus> diff --git a/testdata/abigail_anonymous_types_4.xml b/testdata/abigail_anonymous_types_4.xml new file mode 100644 index 0000000..3ee3549 --- /dev/null +++ b/testdata/abigail_anonymous_types_4.xml @@ -0,0 +1,24 @@ +<!-- + Minimal equivalent tidy ABI XML. There is only one anonymous + struct definition. +--> +<abi-corpus version='2.1' architecture='elf-amd-x86_64'> + <elf-function-symbols> + <elf-symbol name='fun' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> + </elf-function-symbols> + <abi-instr address-size='64' path='nt.c' language='LANG_C11'> + <type-decl name='int' size-in-bits='32' id='95e97e5e'/> + <class-decl name='foo' size-in-bits='32' is-struct='yes' naming-typedef-id='e6891164' visibility='default' id='3f46facf'> + <data-member access='public' layout-offset-in-bits='0'> + <var-decl name='x' type-id='95e97e5e' visibility='default'/> + </data-member> + </class-decl> + <typedef-decl name='foo' type-id='3f46facf' id='e6891164'/> + <typedef-decl name='bar' type-id='3f46facf' id='9d6b0219'/> + <function-decl name='fun' mangled-name='fun' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fun'> + <parameter type-id='e6891164' name='a'/> + <parameter type-id='9d6b0219' name='b'/> + <return type-id='95e97e5e'/> + </function-decl> + </abi-instr> +</abi-corpus> diff --git a/testdata/abigail_clean.xml b/testdata/abigail_clean.xml new file mode 100644 index 0000000..36522dc --- /dev/null +++ b/testdata/abigail_clean.xml @@ -0,0 +1,10 @@ +<abi-corpus version='2.1' path='/tmp/ex.o' architecture='elf-amd-x86_64'> + <elf-variable-symbols> + <elf-symbol name='x' size='4' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> + </elf-variable-symbols> + <abi-instr address-size='64' path='ex.c' comp-dir-path='/tmp' language='LANG_C11'> + <type-decl name='int' size-in-bits='32' id='type-id-1'/> + <class-decl name='unused' is-struct='yes' visibility='default' id='type-id-2'/> + <var-decl name='x' type-id='type-id-1' mangled-name='x' visibility='default' elf-symbol-id='x'/> + </abi-instr> +</abi-corpus> diff --git a/testdata/abigail_dirty.xml b/testdata/abigail_dirty.xml new file mode 100644 index 0000000..071ed21 --- /dev/null +++ b/testdata/abigail_dirty.xml @@ -0,0 +1,14 @@ +<abi-corpus version='2.1' path='/tmp/ex.o' architecture='elf-amd-x86_64' tracking-non-reachable-types='yes'> + <elf-variable-symbols> + <!-- long long --> + <elf-symbol name='x' size='4' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> + </elf-variable-symbols> + <abi-instr address-size='64' path='ex.c' comp-dir-path='/tmp' language='LANG_C11'> + <!-- int --> + <type-decl name='int' size-in-bits='32' id='type-id-1'/> + <!-- struct unused --> + <class-decl name='unused' is-struct='yes' is-non-reachable='yes' visibility='default' filepath='/tmp/ex.c' line='1' column='1' id='type-id-2'/> + <!-- int x --> + <var-decl name='x' type-id='type-id-1' mangled-name='x' visibility='default' filepath='/tmp/ex.c' line='4' column='1' elf-symbol-id='x'/> + </abi-instr> +</abi-corpus> diff --git a/testdata/abigail_duplicate_data_members_0.xml b/testdata/abigail_duplicate_data_members_0.xml new file mode 100644 index 0000000..157f0e7 --- /dev/null +++ b/testdata/abigail_duplicate_data_members_0.xml @@ -0,0 +1,26 @@ +<!-- Vanilla output from libabigail. --> +<abi-corpus version='2.1' architecture='elf-amd-x86_64'> + <elf-function-symbols> + <elf-symbol name='fun' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> + </elf-function-symbols> + <abi-instr address-size='64' path='dup.c' language='LANG_C11'> + <type-decl name='int' size-in-bits='32' id='95e97e5e'/> + <class-decl name='s' size-in-bits='64' is-struct='yes' visibility='default' id='70d9d65a'> + <data-member access='public' layout-offset-in-bits='0'> + <var-decl name='x' type-id='95e97e5e' visibility='default'/> + </data-member> + <data-member access='public' layout-offset-in-bits='32'> + <var-decl name='' type-id='e7f43f72' visibility='default'/> + </data-member> + </class-decl> + <class-decl name='__anonymous_struct__' size-in-bits='32' is-struct='yes' is-anonymous='yes' visibility='default' id='e7f43f72'> + <data-member access='public' layout-offset-in-bits='0'> + <var-decl name='y' type-id='95e97e5e' visibility='default'/> + </data-member> + </class-decl> + <function-decl name='fun' mangled-name='fun' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fun'> + <parameter type-id='70d9d65a' name='a'/> + <return type-id='95e97e5e'/> + </function-decl> + </abi-instr> +</abi-corpus> diff --git a/testdata/abigail_duplicate_data_members_1.xml b/testdata/abigail_duplicate_data_members_1.xml new file mode 100644 index 0000000..9c287bc --- /dev/null +++ b/testdata/abigail_duplicate_data_members_1.xml @@ -0,0 +1,29 @@ +<!-- The trailing anonymous member of s is duplicated. --> +<abi-corpus version='2.1' architecture='elf-amd-x86_64'> + <elf-function-symbols> + <elf-symbol name='fun' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> + </elf-function-symbols> + <abi-instr address-size='64' path='dup.c' language='LANG_C11'> + <type-decl name='int' size-in-bits='32' id='95e97e5e'/> + <class-decl name='s' size-in-bits='64' is-struct='yes' visibility='default' id='70d9d65a'> + <data-member access='public' layout-offset-in-bits='0'> + <var-decl name='x' type-id='95e97e5e' visibility='default'/> + </data-member> + <data-member access='public' layout-offset-in-bits='32'> + <var-decl name='' type-id='e7f43f72' visibility='default'/> + </data-member> + <data-member access='public' layout-offset-in-bits='32'> + <var-decl name='' type-id='e7f43f72' visibility='default'/> + </data-member> + </class-decl> + <class-decl name='__anonymous_struct__' size-in-bits='32' is-struct='yes' is-anonymous='yes' visibility='default' id='e7f43f72'> + <data-member access='public' layout-offset-in-bits='0'> + <var-decl name='y' type-id='95e97e5e' visibility='default'/> + </data-member> + </class-decl> + <function-decl name='fun' mangled-name='fun' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fun'> + <parameter type-id='70d9d65a' name='a'/> + <return type-id='95e97e5e'/> + </function-decl> + </abi-instr> +</abi-corpus> diff --git a/testdata/abigail_duplicate_types_0.xml b/testdata/abigail_duplicate_types_0.xml new file mode 100644 index 0000000..4e85853 --- /dev/null +++ b/testdata/abigail_duplicate_types_0.xml @@ -0,0 +1,24 @@ +<!-- Vanilla output from libabigail. --> +<abi-corpus version='2.1' architecture='elf-amd-x86_64'> + <elf-variable-symbols> + <elf-symbol name='t' size='4' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> + </elf-variable-symbols> + <abi-instr address-size='64' path='mt.cc' language='LANG_C_plus_plus_14'> + <type-decl name='int' size-in-bits='32' id='95e97e5e'/> + <namespace-decl name='N'> + <class-decl name='S' size-in-bits='8' is-struct='yes' visibility='default' id='af2c111e'> + <member-type access='public'> + <class-decl name='T' size-in-bits='32' is-struct='yes' visibility='default' id='500730c2'> + <data-member access='public' layout-offset-in-bits='0'> + <var-decl name='x' type-id='95e97e5e' visibility='default'/> + </data-member> + <data-member access='public' layout-offset-in-bits='32'> + <var-decl name='y' type-id='95e97e5e' visibility='default'/> + </data-member> + </class-decl> + </member-type> + </class-decl> + </namespace-decl> + <var-decl name='t' type-id='500730c2' mangled-name='t' visibility='default' elf-symbol-id='t'/> + </abi-instr> +</abi-corpus> diff --git a/testdata/abigail_duplicate_types_1.xml b/testdata/abigail_duplicate_types_1.xml new file mode 100644 index 0000000..c672c5b --- /dev/null +++ b/testdata/abigail_duplicate_types_1.xml @@ -0,0 +1,40 @@ +<!-- + Simulation of exactly duplicated type, as seen in some older libabigail versions. +--> +<abi-corpus version='2.1' architecture='elf-amd-x86_64'> + <elf-variable-symbols> + <elf-symbol name='t' size='4' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> + </elf-variable-symbols> + <abi-instr address-size='64' path='mt.cc' language='LANG_C_plus_plus_14'> + <type-decl name='int' size-in-bits='32' id='95e97e5e'/> + <namespace-decl name='N'> + <class-decl name='S' size-in-bits='8' is-struct='yes' visibility='default' id='af2c111e'> + <member-type access='public'> + <class-decl name='T' size-in-bits='32' is-struct='yes' visibility='default' id='500730c2'> + <data-member access='public' layout-offset-in-bits='0'> + <var-decl name='x' type-id='95e97e5e' visibility='default'/> + </data-member> + <data-member access='public' layout-offset-in-bits='32'> + <var-decl name='y' type-id='95e97e5e' visibility='default'/> + </data-member> + </class-decl> + </member-type> + </class-decl> + </namespace-decl> + <namespace-decl name='N'> + <class-decl name='S' size-in-bits='8' is-struct='yes' visibility='default' id='af2c111e'> + <member-type access='public'> + <class-decl name='T' size-in-bits='32' is-struct='yes' visibility='default' id='500730c2'> + <data-member access='public' layout-offset-in-bits='0'> + <var-decl name='x' type-id='95e97e5e' visibility='default'/> + </data-member> + <data-member access='public' layout-offset-in-bits='32'> + <var-decl name='y' type-id='95e97e5e' visibility='default'/> + </data-member> + </class-decl> + </member-type> + </class-decl> + </namespace-decl> + <var-decl name='t' type-id='500730c2' mangled-name='t' visibility='default' elf-symbol-id='t'/> + </abi-instr> +</abi-corpus> diff --git a/testdata/abigail_duplicate_types_2.xml b/testdata/abigail_duplicate_types_2.xml new file mode 100644 index 0000000..a08250a --- /dev/null +++ b/testdata/abigail_duplicate_types_2.xml @@ -0,0 +1,40 @@ +<!-- + Simulation of duplicated emitted-in-scope member, as generated by + (older) libabigail, resulting in duplicate definitions of the + typedef and its containing structs; note the incorrect access + attribute just outside the incomplete definition of T. +--> +<abi-corpus version='2.1' architecture='elf-amd-x86_64'> + <elf-variable-symbols> + <elf-symbol name='t' size='4' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> + </elf-variable-symbols> + <abi-instr address-size='64' path='mt.cc' language='LANG_C_plus_plus_14'> + <type-decl name='int' size-in-bits='32' id='95e97e5e'/> + <namespace-decl name='N'> + <class-decl name='S' size-in-bits='8' is-struct='yes' visibility='default' id='af2c111e'> + <member-type access='private'> + <class-decl name='T' size-in-bits='32' is-struct='yes' visibility='default' id='500730c2'> + <data-member access='public' layout-offset-in-bits='0'> + <var-decl name='x' type-id='95e97e5e' visibility='default'/> + </data-member> + </class-decl> + </member-type> + </class-decl> + </namespace-decl> + <namespace-decl name='N'> + <class-decl name='S' size-in-bits='8' is-struct='yes' visibility='default' id='af2c111e'> + <member-type access='public'> + <class-decl name='T' size-in-bits='32' is-struct='yes' visibility='default' id='500730c2'> + <data-member access='public' layout-offset-in-bits='0'> + <var-decl name='x' type-id='95e97e5e' visibility='default'/> + </data-member> + <data-member access='public' layout-offset-in-bits='32'> + <var-decl name='y' type-id='95e97e5e' visibility='default'/> + </data-member> + </class-decl> + </member-type> + </class-decl> + </namespace-decl> + <var-decl name='t' type-id='500730c2' mangled-name='t' visibility='default' elf-symbol-id='t'/> + </abi-instr> +</abi-corpus> diff --git a/testdata/abigail_duplicate_types_3.xml b/testdata/abigail_duplicate_types_3.xml new file mode 100644 index 0000000..2e92f18 --- /dev/null +++ b/testdata/abigail_duplicate_types_3.xml @@ -0,0 +1,46 @@ +<!-- Two partial duplicates which are subtrees of the third. --> +<abi-corpus version='2.1' architecture='elf-amd-x86_64'> + <elf-variable-symbols> + <elf-symbol name='t' size='4' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> + </elf-variable-symbols> + <abi-instr address-size='64' path='mt.cc' language='LANG_C_plus_plus_14'> + <type-decl name='int' size-in-bits='32' id='95e97e5e'/> + <namespace-decl name='N'> + <class-decl name='S' size-in-bits='8' is-struct='yes' visibility='default' id='af2c111e'> + <member-type access='public'> + <class-decl name='T' size-in-bits='32' is-struct='yes' visibility='default' id='500730c2'> + <data-member access='public' layout-offset-in-bits='0'> + <var-decl name='x' type-id='95e97e5e' visibility='default'/> + </data-member> + </class-decl> + </member-type> + </class-decl> + </namespace-decl> + <namespace-decl name='N'> + <class-decl name='S' size-in-bits='8' is-struct='yes' visibility='default' id='af2c111e'> + <member-type access='public'> + <class-decl name='T' size-in-bits='32' is-struct='yes' visibility='default' id='500730c2'> + <data-member access='public' layout-offset-in-bits='32'> + <var-decl name='y' type-id='95e97e5e' visibility='default'/> + </data-member> + </class-decl> + </member-type> + </class-decl> + </namespace-decl> + <namespace-decl name='N'> + <class-decl name='S' size-in-bits='8' is-struct='yes' visibility='default' id='af2c111e'> + <member-type access='public'> + <class-decl name='T' size-in-bits='32' is-struct='yes' visibility='default' id='500730c2'> + <data-member access='public' layout-offset-in-bits='0'> + <var-decl name='x' type-id='95e97e5e' visibility='default'/> + </data-member> + <data-member access='public' layout-offset-in-bits='32'> + <var-decl name='y' type-id='95e97e5e' visibility='default'/> + </data-member> + </class-decl> + </member-type> + </class-decl> + </namespace-decl> + <var-decl name='t' type-id='500730c2' mangled-name='t' visibility='default' elf-symbol-id='t'/> + </abi-instr> +</abi-corpus> diff --git a/testdata/abigail_duplicate_types_4.xml b/testdata/abigail_duplicate_types_4.xml new file mode 100644 index 0000000..e302fe2 --- /dev/null +++ b/testdata/abigail_duplicate_types_4.xml @@ -0,0 +1,21 @@ +<!-- Vanilla output from libabigail. --> +<abi-corpus version='2.1' architecture='elf-amd-x86_64'> + <elf-variable-symbols> + <elf-symbol name='t' size='4' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> + </elf-variable-symbols> + <abi-instr address-size='64' path='mt.cc' language='LANG_C_plus_plus_14'> + <type-decl name='int' size-in-bits='32' id='95e97e5e'/> + <namespace-decl name='N'> + <class-decl name='S' size-in-bits='8' is-struct='yes' visibility='default' id='af2c111e'> + <member-type access='public'> + <class-decl name='T' size-in-bits='32' is-struct='yes' visibility='default' id='500730c2'> + <data-member access='public' layout-offset-in-bits='0'> + <var-decl name='x' type-id='95e97e5e' visibility='default'/> + </data-member> + </class-decl> + </member-type> + </class-decl> + </namespace-decl> + <var-decl name='t' type-id='500730c2' mangled-name='t' visibility='default' elf-symbol-id='t'/> + </abi-instr> +</abi-corpus> diff --git a/testdata/abigail_duplicate_types_5.xml b/testdata/abigail_duplicate_types_5.xml new file mode 100644 index 0000000..4d67278 --- /dev/null +++ b/testdata/abigail_duplicate_types_5.xml @@ -0,0 +1,46 @@ +<!-- Two partial duplicates which are almost distinct subtrees of the third. --> +<abi-corpus version='2.1' architecture='elf-amd-x86_64'> + <elf-variable-symbols> + <elf-symbol name='t' size='4' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> + </elf-variable-symbols> + <abi-instr address-size='64' path='mt.cc' language='LANG_C_plus_plus_14'> + <type-decl name='int' size-in-bits='32' id='95e97e5e'/> + <namespace-decl name='N'> + <class-decl name='S' size-in-bits='8' is-struct='yes' visibility='default' id='af2c111e'> + <member-type access='public'> + <class-decl name='T' size-in-bits='32' is-struct='yes' visibility='default' id='500730c2'> + <data-member access='public' layout-offset-in-bits='0'> + <var-decl name='x' type-id='95e97e5e' visibility='default'/> + </data-member> + </class-decl> + </member-type> + </class-decl> + </namespace-decl> + <namespace-decl name='N'> + <class-decl name='S' size-in-bits='8' is-struct='yes' visibility='default' id='af2c111e'> + <member-type access='public'> + <class-decl name='T' size-in-bits='32' is-struct='yes' visibility='default' id='500730c2'> + <data-member access='public' layout-offset-in-bits='0'> + <var-decl name='y' type-id='95e97e5e' visibility='default'/> + </data-member> + </class-decl> + </member-type> + </class-decl> + </namespace-decl> + <namespace-decl name='N'> + <class-decl name='S' size-in-bits='8' is-struct='yes' visibility='default' id='af2c111e'> + <member-type access='public'> + <class-decl name='T' size-in-bits='32' is-struct='yes' visibility='default' id='500730c2'> + <data-member access='public' layout-offset-in-bits='0'> + <var-decl name='x' type-id='95e97e5e' visibility='default'/> + </data-member> + <data-member access='public' layout-offset-in-bits='32'> + <var-decl name='y' type-id='95e97e5e' visibility='default'/> + </data-member> + </class-decl> + </member-type> + </class-decl> + </namespace-decl> + <var-decl name='t' type-id='500730c2' mangled-name='t' visibility='default' elf-symbol-id='t'/> + </abi-instr> +</abi-corpus> diff --git a/testdata/abigail_duplicate_types_6.xml b/testdata/abigail_duplicate_types_6.xml new file mode 100644 index 0000000..d0cb5c2 --- /dev/null +++ b/testdata/abigail_duplicate_types_6.xml @@ -0,0 +1,35 @@ +<!-- Duplicates in the wrong namespace will be ignored. --> +<abi-corpus version='2.1' architecture='elf-amd-x86_64'> + <elf-variable-symbols> + <elf-symbol name='t' size='4' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> + </elf-variable-symbols> + <abi-instr address-size='64' path='mt.cc' language='LANG_C_plus_plus_14'> + <type-decl name='int' size-in-bits='32' id='95e97e5e'/> + <namespace-decl name='N'> + <class-decl name='S' size-in-bits='8' is-struct='yes' visibility='default' id='af2c111e'> + <member-type access='public'> + <class-decl name='T' size-in-bits='32' is-struct='yes' visibility='default' id='500730c2'> + <data-member access='public' layout-offset-in-bits='0'> + <var-decl name='x' type-id='95e97e5e' visibility='default'/> + </data-member> + </class-decl> + </member-type> + </class-decl> + </namespace-decl> + <namespace-decl name='NO'> + <class-decl name='S' size-in-bits='8' is-struct='yes' visibility='default' id='af2c111e'> + <member-type access='public'> + <class-decl name='T' size-in-bits='32' is-struct='yes' visibility='default' id='500730c2'> + <data-member access='public' layout-offset-in-bits='0'> + <var-decl name='x' type-id='95e97e5e' visibility='default'/> + </data-member> + <data-member access='public' layout-offset-in-bits='32'> + <var-decl name='y' type-id='95e97e5e' visibility='default'/> + </data-member> + </class-decl> + </member-type> + </class-decl> + </namespace-decl> + <var-decl name='t' type-id='500730c2' mangled-name='t' visibility='default' elf-symbol-id='t'/> + </abi-instr> +</abi-corpus> diff --git a/testdata/abigail_tree_0.xml b/testdata/abigail_tree_0.xml new file mode 100644 index 0000000..fa90e56 --- /dev/null +++ b/testdata/abigail_tree_0.xml @@ -0,0 +1,12 @@ +<!-- Snippet of vanilla output from libabigail. --> +<class-decl name='s' size-in-bits='64' is-struct='yes' visibility='default' id='70d9d65a'> + <member-type access='private'> + <typedef-decl name='X' type-id='95e97e5e' id='b35c4d3f'/> + </member-type> + <data-member access='public' layout-offset-in-bits='0'> + <var-decl name='x' type-id='95e97e5e' visibility='default'/> + </data-member> + <data-member access='public' layout-offset-in-bits='32'> + <var-decl name='' type-id='e7f43f72' visibility='default'/> + </data-member> +</class-decl> diff --git a/testdata/abigail_tree_1.xml b/testdata/abigail_tree_1.xml new file mode 100644 index 0000000..3dfe56d --- /dev/null +++ b/testdata/abigail_tree_1.xml @@ -0,0 +1,12 @@ +<!-- Attributes are sorted. --> +<class-decl id='70d9d65a' is-struct='yes' name='s' size-in-bits='64' visibility='default'> + <member-type access='private'> + <typedef-decl name='X' type-id='95e97e5e' id='b35c4d3f'/> + </member-type> + <data-member access='public' layout-offset-in-bits='0'> + <var-decl name='x' type-id='95e97e5e' visibility='default'/> + </data-member> + <data-member access='public' layout-offset-in-bits='32'> + <var-decl name='' type-id='e7f43f72' visibility='default'/> + </data-member> +</class-decl> diff --git a/testdata/abigail_tree_2.xml b/testdata/abigail_tree_2.xml new file mode 100644 index 0000000..4077fa3 --- /dev/null +++ b/testdata/abigail_tree_2.xml @@ -0,0 +1,12 @@ +<!-- Elements are sorted. --> +<class-decl name='s' size-in-bits='64' is-struct='yes' visibility='default' id='70d9d65a'> + <data-member access='public' layout-offset-in-bits='0'> + <var-decl name='x' type-id='95e97e5e' visibility='default'/> + </data-member> + <data-member access='public' layout-offset-in-bits='32'> + <var-decl name='' type-id='e7f43f72' visibility='default'/> + </data-member> + <member-type access='private'> + <typedef-decl name='X' type-id='95e97e5e' id='b35c4d3f'/> + </member-type> +</class-decl> diff --git a/testdata/abigail_tree_3.xml b/testdata/abigail_tree_3.xml new file mode 100644 index 0000000..4f00618 --- /dev/null +++ b/testdata/abigail_tree_3.xml @@ -0,0 +1,12 @@ +<!-- An attribute is missing. --> +<class-decl name='s' size-in-bits='64' is-struct='yes' visibility='default' id='70d9d65a'> + <member-type access='private'> + <typedef-decl name='X' type-id='95e97e5e' id='b35c4d3f'/> + </member-type> + <data-member access='public' layout-offset-in-bits='0'> + <var-decl name='x' type-id='95e97e5e' visibility='default'/> + </data-member> + <data-member access='public'> + <var-decl name='' type-id='e7f43f72' visibility='default'/> + </data-member> +</class-decl> diff --git a/testdata/abigail_tree_4.xml b/testdata/abigail_tree_4.xml new file mode 100644 index 0000000..ef627bf --- /dev/null +++ b/testdata/abigail_tree_4.xml @@ -0,0 +1,9 @@ +<!-- An element is missing. --> +<class-decl name='s' size-in-bits='64' is-struct='yes' visibility='default' id='70d9d65a'> + <member-type access='private'> + <typedef-decl name='X' type-id='95e97e5e' id='b35c4d3f'/> + </member-type> + <data-member access='public' layout-offset-in-bits='0'> + <var-decl name='x' type-id='95e97e5e' visibility='default'/> + </data-member> +</class-decl> diff --git a/testdata/abigail_tree_5.xml b/testdata/abigail_tree_5.xml new file mode 100644 index 0000000..9293499 --- /dev/null +++ b/testdata/abigail_tree_5.xml @@ -0,0 +1,12 @@ +<!-- An attribute is changed. --> +<class-decl name='s' size-in-bits='64' is-struct='yes' visibility='default' id='70d9d65a'> + <member-type access='private'> + <typedef-decl name='X' type-id='95e97e5e' id='b35c4d3f'/> + </member-type> + <data-member access='public' layout-offset-in-bits='0'> + <var-decl name='x' type-id='95e97e5e' visibility='default'/> + </data-member> + <data-member access='public'> + <var-decl name='y' type-id='e7f43f72' visibility='default'/> + </data-member> +</class-decl> diff --git a/testdata/abigail_tree_6.xml b/testdata/abigail_tree_6.xml new file mode 100644 index 0000000..65db721 --- /dev/null +++ b/testdata/abigail_tree_6.xml @@ -0,0 +1,16 @@ +<!-- An element is changed. --> +<class-decl name='s' size-in-bits='64' is-struct='yes' visibility='default' id='70d9d65a'> + <member-type access='private'> + <typedef-decl name='X' type-id='95e97e5e' id='b35c4d3f'/> + </member-type> + <data-member access='public' layout-offset-in-bits='0'> + <var-decl name='x' type-id='95e97e5e' visibility='default'/> + </data-member> + <member-function access='public' vtable-offset='0'> + <function-decl name='fun' mangled-name='fun' visibility='default' binding='global' size-in-bits='64'> + <parameter type-id='70d9d65a' name='a'/> + <return type-id='95e97e5e'/> + </function-decl> + <function-decl name='' type-id='e7f43f72' visibility='default'/> + </member-function> +</class-decl> diff --git a/testdata/abigail_tree_7.xml b/testdata/abigail_tree_7.xml new file mode 100644 index 0000000..8f2831b --- /dev/null +++ b/testdata/abigail_tree_7.xml @@ -0,0 +1,12 @@ +<!-- member-type access is missing. --> +<class-decl name='s' size-in-bits='64' is-struct='yes' visibility='default' id='70d9d65a'> + <member-type> + <typedef-decl name='X' type-id='95e97e5e' id='b35c4d3f'/> + </member-type> + <data-member access='public' layout-offset-in-bits='0'> + <var-decl name='x' type-id='95e97e5e' visibility='default'/> + </data-member> + <data-member access='public' layout-offset-in-bits='32'> + <var-decl name='' type-id='e7f43f72' visibility='default'/> + </data-member> +</class-decl> diff --git a/type_normalisation.cc b/type_normalisation.cc index 74eb723..72cc94b 100644 --- a/type_normalisation.cc +++ b/type_normalisation.cc @@ -19,6 +19,8 @@ #include "type_normalisation.h" +#include <map> +#include <string> #include <unordered_map> #include <unordered_set> #include <vector> @@ -80,6 +82,12 @@ struct FindQualifiedTypesAndFunctions { } } + void operator()(const std::map<std::string, Id>& x) { + for (const auto& [_, id] : x) { + (*this)(id); + } + } + void operator()(const Void&, Id) {} void operator()(const Variadic&, Id) {} @@ -150,9 +158,8 @@ struct FindQualifiedTypesAndFunctions { } void operator()(const Interface& x, Id) { - for (auto& [_, id] : x.symbols) { - (*this)(id); - } + (*this)(x.symbols); + (*this)(x.types); } const Graph& graph; diff --git a/type_resolution.cc b/type_resolution.cc index aa187d8..bde42f6 100644 --- a/type_resolution.cc +++ b/type_resolution.cc @@ -21,7 +21,6 @@ #include <functional> #include <map> -#include <sstream> #include <string> #include <unordered_map> #include <unordered_set> @@ -59,6 +58,12 @@ struct NamedTypes { } } + void operator()(const std::map<std::string, Id>& x) { + for (const auto& [_, id] : x) { + (*this)(id); + } + } + // main entry point void operator()(Id id) { if (seen.Insert(id)) { @@ -169,9 +174,8 @@ struct NamedTypes { } void operator()(const Interface& x, Id) { - for (const auto& [_, symbol] : x.symbols) { - (*this)(symbol); - } + (*this)(x.symbols); + (*this)(x.types); } const Graph& graph; @@ -437,7 +441,8 @@ struct Unify { } bool operator()(const Interface& x1, const Interface& x2) { - return (*this)(x1.symbols, x2.symbols); + return (*this)(x1.symbols, x2.symbols) + && (*this)(x1.types, x2.types); } bool Mismatch() { |