aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2023-06-28 13:35:39 +0000
committerAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2023-06-28 13:35:39 +0000
commit1fa2cdf4ba19bb2c174bdad4aef4bcb195a8f75a (patch)
treef6b717bd926a7b9c83dd6ddd51e0b27633ae0399
parentad8e0a5eb946febb9dd3ae27f7260bae8bf69b63 (diff)
parentbec78e9003ef0773664dab43b91e8c919bd861f0 (diff)
downloadstg-1fa2cdf4ba19bb2c174bdad4aef4bcb195a8f75a.tar.gz
Snap for 10399941 from bec78e9003ef0773664dab43b91e8c919bd861f0 to sdk-releaseplatform-tools-34.0.4
Change-Id: Iefa1aa1e5ff1550b3dd82204cc148895ee77ba9c
-rw-r--r--Android.bp1
-rw-r--r--abigail_reader.cc46
-rw-r--r--abigail_reader.h3
-rw-r--r--abigail_reader_test.cc1
-rw-r--r--comparison.cc20
-rw-r--r--comparison.h2
-rw-r--r--doc/stgdiff.md13
-rw-r--r--dwarf_processor.cc218
-rw-r--r--dwarf_wrappers.cc76
-rw-r--r--dwarf_wrappers.h1
-rw-r--r--elf_loader.cc2
-rw-r--r--elf_reader.cc13
-rw-r--r--equality_cache.h1
-rw-r--r--error.h28
-rw-r--r--file_descriptor.cc4
-rw-r--r--graph.cc23
-rw-r--r--graph.h15
-rw-r--r--proto_reader.cc30
-rw-r--r--proto_writer.cc2
-rw-r--r--proto_writer.h2
-rw-r--r--scope.h59
-rw-r--r--stg.cc17
-rw-r--r--stg.proto1
-rw-r--r--stgdiff.cc3
-rw-r--r--stgdiff_test.cc57
-rw-r--r--symbol_filter.cc4
-rw-r--r--testdata/crc_change_0.stg21
-rw-r--r--testdata/crc_change_1.stg21
-rw-r--r--testdata/crc_change_small_diff3
-rw-r--r--testdata/interface_addition_0.stg5
-rw-r--r--testdata/interface_addition_1.stg20
-rw-r--r--testdata/interface_addition_small_diff2
-rw-r--r--testdata/type_addition_0.stg5
-rw-r--r--testdata/type_addition_1.stg11
-rw-r--r--testdata/type_addition_small_diff2
-rw-r--r--type_resolution.cc307
-rw-r--r--unification.cc267
-rw-r--r--unification.h93
38 files changed, 944 insertions, 455 deletions
diff --git a/Android.bp b/Android.bp
index 1010fd0..f2ad130 100644
--- a/Android.bp
+++ b/Android.bp
@@ -90,6 +90,7 @@ cc_library_host_static {
"stg.proto",
"type_normalisation.cc",
"type_resolution.cc",
+ "unification.cc",
],
proto: {
export_proto_headers: true,
diff --git a/abigail_reader.cc b/abigail_reader.cc
index c414a59..f6f5654 100644
--- a/abigail_reader.cc
+++ b/abigail_reader.cc
@@ -46,6 +46,7 @@
#include "error.h"
#include "file_descriptor.h"
#include "graph.h"
+#include "scope.h"
#include "type_normalisation.h"
namespace stg {
@@ -163,6 +164,8 @@ std::optional<ElfSymbol::SymbolType> Parse<ElfSymbol::SymbolType>(
return {ElfSymbol::SymbolType::COMMON};
} else if (value == "tls-type") {
return {ElfSymbol::SymbolType::TLS};
+ } else if (value == "gnu-ifunc-type") {
+ return {ElfSymbol::SymbolType::GNU_IFUNC};
}
return {};
}
@@ -678,24 +681,6 @@ std::optional<PointerReference::Kind> ParseReferenceKind(
return {};
}
-class PushScopeName {
- public:
- PushScopeName(std::string& scope_name, const std::string& name)
- : scope_name_(scope_name), old_size_(scope_name.size()) {
- scope_name_ += name;
- scope_name_ += "::";
- }
- PushScopeName(const PushScopeName& other) = delete;
- PushScopeName& operator=(const PushScopeName& other) = delete;
- ~PushScopeName() {
- scope_name_.resize(old_size_);
- }
-
- private:
- std::string& scope_name_;
- const size_t old_size_;
-};
-
} // namespace
Abigail::Abigail(Graph& graph) : graph_(graph) {}
@@ -754,6 +739,11 @@ Id Abigail::ProcessRoot(xmlNodePtr root) {
} else {
Die() << "unrecognised root element '" << name << "'";
}
+ for (const auto& [type_id, id] : type_ids_) {
+ if (!graph_.Is(id)) {
+ Warn() << "no definition found for type '" << type_id << "'";
+ }
+ }
const Id id = BuildSymbols();
RemoveUselessQualifiers(graph_, id);
return id;
@@ -884,7 +874,7 @@ void Abigail::ProcessInstr(xmlNodePtr instr) {
void Abigail::ProcessNamespace(xmlNodePtr scope) {
const auto name = GetAttributeOrDie(scope, "name");
- PushScopeName push_scope_name(scope_name_, name);
+ const PushScopeName push_scope_name(scope_name_, "namespace", name);
ProcessScope(scope);
}
@@ -1009,17 +999,13 @@ void Abigail::ProcessStructUnion(Id id, bool is_struct,
const auto kind = is_struct
? StructUnion::Kind::STRUCT
: StructUnion::Kind::UNION;
- const auto name = ReadAttribute<bool>(struct_union, "is-anonymous", false)
- ? std::string()
- : GetAttributeOrDie(struct_union, "name");
- const auto full_name = name.empty() ? std::string() : scope_name_ + name;
- std::ostringstream scope_name_os;
- if (name.empty()) {
- scope_name_os << "<unnamed " << kind << ">";
- } else {
- scope_name_os << name;
- }
- PushScopeName push_scope_name(scope_name_, scope_name_os.str());
+ const bool is_anonymous =
+ ReadAttribute<bool>(struct_union, "is-anonymous", false);
+ const auto name =
+ is_anonymous ? std::string() : GetAttributeOrDie(struct_union, "name");
+ const auto full_name =
+ is_anonymous ? std::string() : scope_name_ + name;
+ const PushScopeName push_scope_name(scope_name_, kind, name);
if (forward) {
graph_.Set<StructUnion>(id, kind, full_name);
return;
diff --git a/abigail_reader.h b/abigail_reader.h
index b040352..90c2dc1 100644
--- a/abigail_reader.h
+++ b/abigail_reader.h
@@ -33,6 +33,7 @@
#include <libxml/tree.h>
#include "graph.h"
#include "metrics.h"
+#include "scope.h"
namespace stg {
namespace abixml {
@@ -105,7 +106,7 @@ class Abigail {
symbol_id_and_full_name_;
// Full name of the current scope.
- std::string scope_name_;
+ Scope scope_name_;
Id GetNode(const std::string& type_id);
Id GetEdge(xmlNodePtr element);
diff --git a/abigail_reader_test.cc b/abigail_reader_test.cc
index d7f251c..45d3634 100644
--- a/abigail_reader_test.cc
+++ b/abigail_reader_test.cc
@@ -195,6 +195,7 @@ TEST_CASE("Tidy") {
// Read inputs.
stg::Graph graph;
std::vector<stg::Id> ids;
+ ids.reserve(test.files.size());
for (const char* file : test.files) {
ids.push_back(Read(graph, file));
}
diff --git a/comparison.cc b/comparison.cc
index 29453b8..b02f07c 100644
--- a/comparison.cc
+++ b/comparison.cc
@@ -42,13 +42,15 @@ struct IgnoreDescriptor {
Ignore::Value value;
};
-static constexpr std::array<IgnoreDescriptor, 6> kIgnores{{
+static constexpr std::array<IgnoreDescriptor, 8> kIgnores{{
{"type_declaration_status", Ignore::TYPE_DECLARATION_STATUS},
{"symbol_type_presence", Ignore::SYMBOL_TYPE_PRESENCE },
{"primitive_type_encoding", Ignore::PRIMITIVE_TYPE_ENCODING},
{"member_size", Ignore::MEMBER_SIZE },
{"enum_underlying_type", Ignore::ENUM_UNDERLYING_TYPE },
{"qualifier", Ignore::QUALIFIER },
+ {"interface_addition", Ignore::INTERFACE_ADDITION },
+ {"linux_symbol_crc", Ignore::SYMBOL_CRC },
}};
std::optional<Ignore::Value> ParseIgnore(std::string_view ignore) {
@@ -374,7 +376,8 @@ void CompareNodes(Result& result, Compare& compare, const std::vector<Id>& ids1,
void CompareNodes(Result& result, Compare& compare,
const std::map<std::string, Id>& x1,
- const std::map<std::string, Id>& x2) {
+ const std::map<std::string, Id>& x2,
+ bool ignore_added) {
// Group diffs into removed, added and changed symbols for readability.
std::vector<Id> removed;
std::vector<Id> added;
@@ -391,7 +394,9 @@ void CompareNodes(Result& result, Compare& compare,
++it1;
} else if (it1 == end1 || (it2 != end2 && it1->first > it2->first)) {
// added
- added.push_back(it2->second);
+ if (!ignore_added) {
+ added.push_back(it2->second);
+ }
++it2;
} else {
// in both
@@ -645,7 +650,9 @@ Result Compare::operator()(const ElfSymbol& x1, const ElfSymbol& x2) {
result.MaybeAddNodeDiff("symbol type", x1.symbol_type, x2.symbol_type);
result.MaybeAddNodeDiff("binding", x1.binding, x2.binding);
result.MaybeAddNodeDiff("visibility", x1.visibility, x2.visibility);
- result.MaybeAddNodeDiff("CRC", x1.crc, x2.crc);
+ if (!ignore.Test(Ignore::SYMBOL_CRC)) {
+ result.MaybeAddNodeDiff("CRC", x1.crc, x2.crc);
+ }
result.MaybeAddNodeDiff("namespace", x1.ns, x2.ns);
if (x1.type_id && x2.type_id) {
@@ -668,8 +675,9 @@ Result Compare::operator()(const ElfSymbol& x1, const ElfSymbol& x2) {
Result Compare::operator()(const Interface& x1, const Interface& x2) {
Result result;
result.diff_.holds_changes = true;
- CompareNodes(result, *this, x1.symbols, x2.symbols);
- CompareNodes(result, *this, x1.types, x2.types);
+ const bool ignore_added = ignore.Test(Ignore::INTERFACE_ADDITION);
+ CompareNodes(result, *this, x1.symbols, x2.symbols, ignore_added);
+ CompareNodes(result, *this, x1.types, x2.types, ignore_added);
return result;
}
diff --git a/comparison.h b/comparison.h
index 724c0ca..fc66580 100644
--- a/comparison.h
+++ b/comparison.h
@@ -51,6 +51,8 @@ struct Ignore {
MEMBER_SIZE = 1<<3,
ENUM_UNDERLYING_TYPE = 1<<4,
QUALIFIER = 1<<5,
+ INTERFACE_ADDITION = 1<<6,
+ SYMBOL_CRC = 1<<7,
};
using Bitset = std::underlying_type_t<Value>;
diff --git a/doc/stgdiff.md b/doc/stgdiff.md
index a2c613c..b3e7789 100644
--- a/doc/stgdiff.md
+++ b/doc/stgdiff.md
@@ -20,7 +20,7 @@ stgdiff
implicit defaults: --abi --format plain
--exact (node equality) cannot be combined with --output
output formats: plain flat small short viz
-ignore options: type_declaration_status symbol_type_presence primitive_type_encoding member_size enum_underlying_type qualifier
+ignore options: type_declaration_status symbol_type_presence primitive_type_encoding member_size enum_underlying_type qualifier interface_addition linux_symbol_crc
```
## Input
@@ -114,6 +114,17 @@ in how much (DWARF) information they preserve.
Ignore qualifiers during comparison. Both libabigail and STG interpret and
adjust type qualifiers but sometimes do so differently.
+* `interface_addition`
+
+ Ignore interface additions during comparison. This can be useful for ABI
+ comparisons where symbol / type additions are allowed.
+
+* `linux_symbol_crc`
+
+ Ignore Linux kernel symbol CRC changes during comparison. This can be
+ useful for ABI comparisons across different toolchains, where CRC changes
+ are often large and not useful.
+
### Fidelity Reporting
* `-F|--fidelity`
diff --git a/dwarf_processor.cc b/dwarf_processor.cc
index c144010..df0ec5b 100644
--- a/dwarf_processor.cc
+++ b/dwarf_processor.cc
@@ -20,7 +20,10 @@
#include "dwarf_processor.h"
#include <dwarf.h>
+#include <elfutils/libdw.h>
+#include <algorithm>
+#include <cstddef>
#include <ios>
#include <iostream>
#include <optional>
@@ -33,6 +36,7 @@
#include "dwarf_wrappers.h"
#include "error.h"
#include "graph.h"
+#include "scope.h"
namespace stg {
namespace dwarf {
@@ -41,7 +45,7 @@ namespace {
std::string EntryToString(Entry& entry) {
std::ostringstream os;
- os << "DWARF entry <0x" << std::hex << entry.GetOffset() << ">";
+ os << "DWARF entry <" << Hex(entry.GetOffset()) << ">";
return os.str();
}
@@ -107,7 +111,7 @@ Primitive::Encoding GetEncoding(Entry& entry) {
case DW_ATE_UTF:
return Primitive::Encoding::UTF;
default:
- Die() << "Unknown encoding 0x" << std::hex << *dwarf_encoding << " for "
+ Die() << "Unknown encoding " << Hex(*dwarf_encoding) << " for "
<< EntryToString(entry);
}
}
@@ -282,7 +286,7 @@ class Processor {
ProcessStructUnion(entry, StructUnion::Kind::UNION);
break;
case DW_TAG_member:
- ProcessMember(entry);
+ Die() << "DW_TAG_member outside of struct/class/union";
break;
case DW_TAG_pointer_type:
ProcessReference<PointerReference>(
@@ -343,12 +347,31 @@ class Processor {
void CheckUnresolvedIds() const {
for (const auto& [offset, id] : id_map_) {
if (!graph_.Is(id)) {
- Die() << "unresolved id " << id << ", DWARF offset 0x" << std::hex
- << offset;
+ Die() << "unresolved id " << id << ", DWARF offset " << Hex(offset);
}
}
}
+ void ResolveSymbolSpecifications() {
+ std::sort(unresolved_symbol_specifications_.begin(),
+ unresolved_symbol_specifications_.end());
+ std::sort(scoped_names_.begin(), scoped_names_.end());
+ auto symbols_it = unresolved_symbol_specifications_.begin();
+ auto names_it = scoped_names_.begin();
+ while (symbols_it != unresolved_symbol_specifications_.end()) {
+ while (names_it != scoped_names_.end() &&
+ names_it->first < symbols_it->first) {
+ ++names_it;
+ }
+ if (names_it == scoped_names_.end() ||
+ names_it->first != symbols_it->first) {
+ Die() << "Scoped name not found for entry " << Hex(symbols_it->first);
+ }
+ result_.symbols[symbols_it->second].name = names_it->second;
+ ++symbols_it;
+ }
+ }
+
private:
void ProcessAllChildren(Entry& entry) {
for (auto& child : entry.GetChildren()) {
@@ -379,10 +402,9 @@ class Processor {
}
void ProcessTypedef(Entry& entry) {
- std::string type_name = GetName(entry);
+ const std::string type_name = scope_ + GetName(entry);
auto referred_type_id = GetIdForReferredType(MaybeGetReferredType(entry));
- const Id id = AddProcessedNode<Typedef>(entry, std::move(type_name),
- referred_type_id);
+ const Id id = AddProcessedNode<Typedef>(entry, type_name, referred_type_id);
AddNamedTypeNode(id);
}
@@ -402,16 +424,17 @@ class Processor {
}
void ProcessStructUnion(Entry& entry, StructUnion::Kind kind) {
- // TODO: add scoping
std::string name = GetNameOrEmpty(entry);
+ const std::string full_name = name.empty() ? std::string() : scope_ + name;
+ const PushScopeName push_scope_name(scope_, kind, name);
if (entry.GetFlag(DW_AT_declaration)) {
// It is expected to have only name and no children in declaration.
// However, it is not guaranteed and we should do something if we find an
// example.
CheckNoChildren(entry);
- const Id id = AddProcessedNode<StructUnion>(entry, kind, name);
- if (!name.empty()) {
+ const Id id = AddProcessedNode<StructUnion>(entry, kind, full_name);
+ if (!full_name.empty()) {
AddNamedTypeNode(id);
}
return;
@@ -419,6 +442,7 @@ class Processor {
auto byte_size = GetByteSize(entry);
std::vector<Id> members;
+ std::vector<Id> methods;
for (auto& child : entry.GetChildren()) {
auto child_tag = child.GetTag();
@@ -426,9 +450,11 @@ class Processor {
switch (child_tag) {
case DW_TAG_member:
members.push_back(GetIdForEntry(child));
+ ProcessMember(child);
break;
case DW_TAG_subprogram:
- // TODO: process methods
+ methods.push_back(GetIdForEntry(child));
+ ProcessMethod(child);
break;
case DW_TAG_inheritance:
// TODO: process base classes
@@ -439,22 +465,20 @@ class Processor {
case DW_TAG_enumeration_type:
case DW_TAG_typedef:
case DW_TAG_variable:
+ Process(child);
break;
default:
Die() << "Unexpected tag for child of struct/class/union: 0x"
<< std::hex << child_tag;
}
- Process(child);
}
// TODO: support base classes
- // TODO: support methods
const Id id = AddProcessedNode<StructUnion>(
- entry, kind, name, byte_size,
+ entry, kind, full_name, byte_size,
/* base_classes = */ std::vector<Id>{},
- /* methods = */ std::vector<Id>{},
- std::move(members));
- if (!name.empty()) {
+ std::move(methods), std::move(members));
+ if (!full_name.empty()) {
AddNamedTypeNode(id);
}
}
@@ -474,6 +498,38 @@ class Processor {
GetDataBitOffset(entry, bit_size, is_little_endian_binary_), bit_size);
}
+ void ProcessMethod(Entry& entry) {
+ Subprogram subprogram = GetSubprogram(entry);
+ auto id = graph_.Add<Function>(std::move(subprogram.node));
+ if (subprogram.external && subprogram.address) {
+ // Only external functions with address are useful for ABI monitoring
+ // TODO: cover virtual methods
+ const auto new_symbol_idx = result_.symbols.size();
+ result_.symbols.push_back(Types::Symbol{
+ .name = GetScopedNameForSymbol(
+ new_symbol_idx, subprogram.name_with_context),
+ .linkage_name = subprogram.linkage_name,
+ .address = *subprogram.address,
+ .id = id});
+ }
+
+ // TODO: support kind
+ const Method::Kind kind = Method::Kind::NON_VIRTUAL;
+ // TODO: support vtable_offset
+ if (!subprogram.name_with_context.unscoped_name) {
+ Die() << "Method " << EntryToString(entry) << " should have name";
+ }
+ if (subprogram.name_with_context.specification) {
+ Die() << "Method " << EntryToString(entry)
+ << " shouldn't have specification";
+ }
+ // TODO: proper handling of missing linkage name
+ AddProcessedNode<Method>(entry,
+ subprogram.linkage_name.value_or("{missing}"),
+ *subprogram.name_with_context.unscoped_name, kind,
+ /* vtable_offset = */ std::nullopt, id);
+ }
+
void ProcessArray(Entry& entry) {
auto referred_type = GetReferredType(entry);
auto referred_type_id = GetIdForEntry(referred_type);
@@ -537,6 +593,85 @@ class Processor {
}
}
+ struct NameWithContext {
+ std::optional<Dwarf_Off> specification;
+ std::optional<std::string> unscoped_name;
+ std::optional<std::string> scoped_name;
+ };
+
+ NameWithContext GetNameWithContext(Entry& entry) {
+ NameWithContext result;
+ // Leaf of specification tree is usually a declaration (of a function or a
+ // method). Then goes definition, which references declaration by
+ // DW_AT_specification. And on top we have instantiation, which references
+ // definition by DW_AT_abstract_origin. In the worst case we have:
+ // * instantiation
+ // >-DW_AT_abstract_origin-> definition
+ // >-DW_AT_specification-> declaration
+ //
+ // By using attribute integration we fold all information from definition to
+ // instantiation, flattening hierarchy:
+ // * instantiation + definition
+ // >-DW_AT_specification-> declaration
+ // NB: DW_AT_abstract_origin attribute is also visible, but it should be
+ // ignored, since we already used it during integration.
+ //
+ // We also need to support this case, when we don't have separate
+ // declaration:
+ // * instantiation +
+ // >-DW_AT_abstract_origin -> definition
+ //
+ // So the final algorithm is to get final DW_AT_specification through the
+ // whole chain, or use DW_AT_abstract_origin if there is no
+ // DW_AT_specification.
+ if (auto specification = entry.MaybeGetReference(DW_AT_specification)) {
+ result.specification = specification->GetOffset();
+ } else if (auto abstract_origin =
+ entry.MaybeGetReference(DW_AT_abstract_origin)) {
+ result.specification = abstract_origin->GetOffset();
+ }
+ result.unscoped_name = entry.MaybeGetDirectString(DW_AT_name);
+ if (!result.unscoped_name && !result.specification) {
+ // If there is no name and specification, then this entry is anonymous.
+ // Anonymous entries are modelled as the empty string and not nullopt.
+ // This allows us to fill and register scoped_name (also empty string) to
+ // be used in references.
+ result.unscoped_name = std::string();
+ }
+ if (result.unscoped_name) {
+ result.scoped_name = scope_ + *result.unscoped_name;
+ scoped_names_.emplace_back(
+ entry.GetOffset(), *result.scoped_name);
+ }
+ return result;
+ }
+
+ std::string GetScopedNameForSymbol(size_t symbol_idx,
+ const NameWithContext& name) {
+ // This method is designed to resolve this topology:
+ // A: specification=B
+ // B: name="foo"
+ // Any other topologies are rejected:
+ // * Name and specification in one DIE: checked right below.
+ // * Chain of specifications will result in symbol referencing another
+ // specification, which will not be in scoped_names_, because "name and
+ // specification in one DIE" is rejected.
+ if (name.scoped_name) {
+ if (name.specification) {
+ Die() << "Entry has name " << *name.scoped_name
+ << " and specification " << Hex(*name.specification);
+ }
+ return *name.scoped_name;
+ }
+ if (name.specification) {
+ unresolved_symbol_specifications_.emplace_back(*name.specification,
+ symbol_idx);
+ // Name will be filled in ResolveSymbolSpecifications
+ return {};
+ }
+ Die() << "Entry should have either name or specification";
+ }
+
void ProcessVariable(Entry& entry) {
// Skip:
// * anonymous variables (for example, anonymous union)
@@ -563,6 +698,29 @@ class Processor {
}
void ProcessFunction(Entry& entry) {
+ Subprogram subprogram = GetSubprogram(entry);
+ const Id id = AddProcessedNode<Function>(entry, std::move(subprogram.node));
+ if (subprogram.external && subprogram.address) {
+ // Only external functions with address are useful for ABI monitoring
+ const auto new_symbol_idx = result_.symbols.size();
+ result_.symbols.push_back(Types::Symbol{
+ .name = GetScopedNameForSymbol(
+ new_symbol_idx, subprogram.name_with_context),
+ .linkage_name = std::move(subprogram.linkage_name),
+ .address = *subprogram.address,
+ .id = id});
+ }
+ }
+
+ struct Subprogram {
+ Function node;
+ NameWithContext name_with_context;
+ std::optional<std::string> linkage_name;
+ std::optional<size_t> address;
+ bool external;
+ };
+
+ Subprogram GetSubprogram(Entry& entry) {
auto return_type_id = GetIdForReferredType(MaybeGetReferredType(entry));
std::vector<Id> parameters;
@@ -595,19 +753,12 @@ class Processor {
<< EntryToString(child);
}
}
- const Id id = AddProcessedNode<Function>(entry, return_type_id, parameters);
-
- if (entry.GetFlag(DW_AT_external)) {
- auto address = entry.MaybeGetAddress(DW_AT_low_pc);
- if (address) {
- // Only external functions with address are useful for ABI monitoring
- result_.symbols.push_back(Types::Symbol{
- .name = GetNameOrEmpty(entry),
- .linkage_name = entry.MaybeGetString(DW_AT_linkage_name),
- .address = *address,
- .id = id});
- }
- }
+
+ return Subprogram{.node = Function(return_type_id, parameters),
+ .name_with_context = GetNameWithContext(entry),
+ .linkage_name = entry.MaybeGetString(DW_AT_linkage_name),
+ .address = entry.MaybeGetAddress(DW_AT_low_pc),
+ .external = entry.GetFlag(DW_AT_external)};
}
// Allocate or get already allocated STG Id for Entry.
@@ -644,6 +795,10 @@ class Processor {
bool is_little_endian_binary_;
Types& result_;
std::unordered_map<Dwarf_Off, Id> id_map_;
+ // Current scope.
+ Scope scope_;
+ std::vector<std::pair<Dwarf_Off, std::string>> scoped_names_;
+ std::vector<std::pair<Dwarf_Off, size_t>> unresolved_symbol_specifications_;
};
Types ProcessEntries(std::vector<Entry> entries, bool is_little_endian_binary,
@@ -657,6 +812,7 @@ Types ProcessEntries(std::vector<Entry> entries, bool is_little_endian_binary,
processor.Process(entry);
}
processor.CheckUnresolvedIds();
+ processor.ResolveSymbolSpecifications();
return result;
}
diff --git a/dwarf_wrappers.cc b/dwarf_wrappers.cc
index 3d66145..ed0c22a 100644
--- a/dwarf_wrappers.cc
+++ b/dwarf_wrappers.cc
@@ -58,6 +58,7 @@ std::optional<Dwarf_Attribute> GetAttribute(Dwarf_Die* die,
// DW_AT_specification references, fetching the attribute from the linked DIE.
//
// libdw has infinite loop protection, as it stops after 16 dereferences.
+ // TODO: don't use dwarf_attr_integrate by default
if (!dwarf_attr_integrate(die, attribute, &result.value())) {
result.reset();
}
@@ -67,7 +68,7 @@ std::optional<Dwarf_Attribute> GetAttribute(Dwarf_Die* die,
// Get the attribute directly from DIE without following DW_AT_specification and
// DW_AT_abstract_origin references.
std::optional<Dwarf_Attribute> GetDirectAttribute(Dwarf_Die* die,
- int attribute) {
+ uint32_t attribute) {
// Create an optional with default-initialized value already inside
std::optional<Dwarf_Attribute> result(std::in_place);
if (!dwarf_attr(die, attribute, &result.value())) {
@@ -82,7 +83,7 @@ void CheckOrDwflError(bool condition, const char* caller) {
const char* errmsg = dwfl_errmsg(dwfl_error);
if (errmsg == nullptr) {
// There are some cases when DWFL fails to produce an error message.
- Die() << caller << " returned error code 0x" << std::hex << dwfl_error;
+ Die() << caller << " returned error code " << Hex(dwfl_error);
}
Die() << caller << " returned error: " << errmsg;
}
@@ -185,26 +186,36 @@ std::optional<std::string> Entry::MaybeGetString(uint32_t attribute) {
}
const char* value = dwarf_formstring(&dwarf_attribute.value());
- Check(value) << "dwarf_formstring returned error";
+ Check(value != nullptr) << "dwarf_formstring returned error";
result.emplace(value);
return result;
}
-std::optional<uint64_t> Entry::MaybeGetUnsignedConstant(
- uint32_t attribute) {
- std::optional<uint64_t> result;
- auto dwarf_attribute = GetAttribute(&die, attribute);
+std::optional<std::string> Entry::MaybeGetDirectString(uint32_t attribute) {
+ std::optional<std::string> result;
+ auto dwarf_attribute = GetDirectAttribute(&die, attribute);
if (!dwarf_attribute) {
return result;
}
- // Place default-initialized value inside to be filled with dwarf_formudata
- result.emplace();
- Check(dwarf_formudata(&dwarf_attribute.value(), &result.value()) == kReturnOk)
- << "dwarf_formudata returned error";
+ const char* value = dwarf_formstring(&dwarf_attribute.value());
+ Check(value != nullptr) << "dwarf_formstring returned error";
+ result.emplace(value);
return result;
}
+std::optional<uint64_t> Entry::MaybeGetUnsignedConstant(uint32_t attribute) {
+ auto dwarf_attribute = GetAttribute(&die, attribute);
+ if (!dwarf_attribute) {
+ return {};
+ }
+
+ uint64_t value;
+ Check(dwarf_formudata(&dwarf_attribute.value(), &value) == kReturnOk)
+ << "dwarf_formudata returned error";
+ return value;
+}
+
bool Entry::GetFlag(uint32_t attribute) {
bool result = false;
auto dwarf_attribute = (attribute == DW_AT_declaration)
@@ -234,8 +245,7 @@ std::optional<Entry> Entry::MaybeGetReference(uint32_t attribute) {
namespace {
-void GetAddressFromLocation(Dwarf_Attribute& attribute,
- std::optional<uint64_t>& result) {
+std::optional<uint64_t> GetAddressFromLocation(Dwarf_Attribute& attribute) {
Dwarf_Op* expr = nullptr;
size_t expr_len = 0;
@@ -247,55 +257,51 @@ void GetAddressFromLocation(Dwarf_Attribute& attribute,
Dwarf_Attribute result_attribute;
if (dwarf_getlocation_attr(&attribute, expr, &result_attribute) ==
kReturnOk) {
- result.emplace();
- Check(dwarf_formaddr(&result_attribute, &result.value()) == kReturnOk)
+ uint64_t addr;
+ Check(dwarf_formaddr(&result_attribute, &addr) == kReturnOk)
<< "dwarf_formaddr returned error";
- } else if (expr_len == 1 && expr->atom == DW_OP_addr) {
+ return addr;
+ }
+ if (expr_len == 1 && expr->atom == DW_OP_addr) {
// DW_OP_addr is unsupported by dwarf_getlocation_attr, so we need to
// manually extract the address from expression.
- result.emplace(expr->number);
- } else {
- Die() << "Unsupported data location expression";
+ return expr->number;
}
+
+ Die() << "Unsupported data location expression";
}
} // namespace
std::optional<uint64_t> Entry::MaybeGetAddress(uint32_t attribute) {
- std::optional<uint64_t> result;
auto dwarf_attribute = GetAttribute(&die, attribute);
if (!dwarf_attribute) {
- return result;
+ return {};
}
if (attribute == DW_AT_location) {
- GetAddressFromLocation(*dwarf_attribute, result);
- return result;
+ return GetAddressFromLocation(*dwarf_attribute);
}
- result.emplace();
- Check(dwarf_formaddr(&dwarf_attribute.value(), &result.value()) == kReturnOk)
+ uint64_t addr;
+ Check(dwarf_formaddr(&dwarf_attribute.value(), &addr) == kReturnOk)
<< "dwarf_formaddr returned error";
- return result;
+ return addr;
}
std::optional<uint64_t> Entry::MaybeGetMemberByteOffset() {
- std::optional<uint64_t> result;
auto attribute = GetAttribute(&die, DW_AT_data_member_location);
if (!attribute) {
- return result;
+ return {};
}
- result.emplace();
+ uint64_t offset;
// Try to interpret attribute as an unsigned integer constant
- if (dwarf_formudata(&attribute.value(), &result.value()) == kReturnOk) {
- return result;
- } else {
- Die() << "dwarf_formudata returned error, " << std::hex << GetOffset();
+ if (dwarf_formudata(&attribute.value(), &offset) == kReturnOk) {
+ return offset;
}
// TODO: support location expressions
-
- return std::nullopt;
+ Die() << "dwarf_formudata returned error, " << std::hex << GetOffset();
}
} // namespace dwarf
diff --git a/dwarf_wrappers.h b/dwarf_wrappers.h
index 1f20b16..89f7705 100644
--- a/dwarf_wrappers.h
+++ b/dwarf_wrappers.h
@@ -57,6 +57,7 @@ struct Entry {
int GetTag();
Dwarf_Off GetOffset();
std::optional<std::string> MaybeGetString(uint32_t attribute);
+ std::optional<std::string> MaybeGetDirectString(uint32_t attribute);
std::optional<uint64_t> MaybeGetUnsignedConstant(uint32_t attribute);
bool GetFlag(uint32_t attribute);
std::optional<Entry> MaybeGetReference(uint32_t attribute);
diff --git a/elf_loader.cc b/elf_loader.cc
index 75062f8..0163304 100644
--- a/elf_loader.cc
+++ b/elf_loader.cc
@@ -320,7 +320,7 @@ std::ostream& operator<<(std::ostream& os, SymbolTableEntry::SymbolType type) {
case SymbolType::TLS:
return os << "TLS";
case SymbolType::GNU_IFUNC:
- return os << "indirect function";
+ return os << "indirect (ifunc) function";
}
}
diff --git a/elf_reader.cc b/elf_reader.cc
index 9afb8c6..a5eb93f 100644
--- a/elf_reader.cc
+++ b/elf_reader.cc
@@ -89,6 +89,8 @@ ElfSymbol::SymbolType ConvertSymbolType(
return ElfSymbol::SymbolType::COMMON;
case SymbolTableEntry::SymbolType::TLS:
return ElfSymbol::SymbolType::TLS;
+ case SymbolTableEntry::SymbolType::GNU_IFUNC:
+ return ElfSymbol::SymbolType::GNU_IFUNC;
default:
Die() << "Unsupported ELF symbol type: " << symbol_type;
}
@@ -156,7 +158,8 @@ bool IsPublicFunctionOrVariable(const SymbolTableEntry& symbol) {
// Reject symbols that are not functions or variables.
if (symbol_type != SymbolTableEntry::SymbolType::FUNCTION &&
symbol_type != SymbolTableEntry::SymbolType::OBJECT &&
- symbol_type != SymbolTableEntry::SymbolType::TLS) {
+ symbol_type != SymbolTableEntry::SymbolType::TLS &&
+ symbol_type != SymbolTableEntry::SymbolType::GNU_IFUNC) {
return false;
}
@@ -230,7 +233,7 @@ class Typing {
// In general, we want to handle as many of the following cases as possible.
// In practice, determining the correct ELF-DWARF match may be impossible.
//
- // * compiler-driven aliasing - mutliple symbols with same address
+ // * compiler-driven aliasing - multiple symbols with same address
// * zero-size symbol false aliasing - multiple symbols and types with same
// address
// * weak/strong linkage symbols - multiple symbols and types with same
@@ -250,8 +253,8 @@ class Typing {
// TODO: allow "compatible" duplicates, for example
// "void foo(int bar)" vs "void foo(const int bar)"
if (!IsEqual(symbol, other)) {
- Die() << "Duplicate DWARF symbol: address=0x" << std::hex
- << symbol.address << std::dec << ", name=" << symbol.name;
+ Die() << "Duplicate DWARF symbol: address=" << Hex(symbol.address)
+ << ", name=" << symbol.name;
}
}
}
@@ -358,7 +361,7 @@ Id Reader::Read() {
}
public_functions_and_variables.shrink_to_fit();
- if (elf_.IsLinuxKernelBinary()) {
+ if (is_linux_kernel) {
crc_values_ = GetCRCValuesMap(all_symbols, elf_);
namespaces_ = GetNamespacesMap(all_symbols, elf_);
}
diff --git a/equality_cache.h b/equality_cache.h
index 35d58a8..490b5ad 100644
--- a/equality_cache.h
+++ b/equality_cache.h
@@ -26,7 +26,6 @@
#include <unordered_set>
#include <vector>
-#include "equality.h"
#include "graph.h"
#include "hashing.h"
#include "metrics.h"
diff --git a/error.h b/error.h
index c301a07..6eed80e 100644
--- a/error.h
+++ b/error.h
@@ -21,8 +21,10 @@
#define STG_ERROR_H_
#include <exception>
+#include <ios>
#include <iostream>
#include <optional>
+#include <ostream>
#include <sstream>
#include <string>
@@ -95,8 +97,30 @@ class Warn {
std::ostringstream os_;
};
-inline std::string ErrnoToString(int error) {
- return std::system_error(error, std::generic_category()).what();
+struct Error {
+ explicit Error(int number) : number(number) {}
+ int number;
+};
+
+inline std::ostream& operator<<(std::ostream& os, Error error) {
+ return os << std::system_error(error.number, std::generic_category()).what();
+}
+
+template <typename T>
+struct Hex {
+ explicit Hex(const T& value) : value(value) {}
+ const T& value;
+};
+
+template <typename T> Hex(const T&) -> Hex<T>;
+
+template <typename T>
+std::ostream& operator<<(std::ostream& os, const Hex<T>& hex_value) {
+ // not quite right if an exception is thrown
+ const auto flags = os.flags();
+ os << std::hex << std::showbase << hex_value.value;
+ os.flags(flags);
+ return os;
}
} // namespace stg
diff --git a/file_descriptor.cc b/file_descriptor.cc
index 70ca720..8c4fc91 100644
--- a/file_descriptor.cc
+++ b/file_descriptor.cc
@@ -33,14 +33,14 @@ namespace stg {
FileDescriptor::FileDescriptor(const char* filename, int flags, mode_t mode)
: fd_(open(filename, flags, mode)) {
if (fd_ < 0) {
- Die() << "open failed: " << ErrnoToString(errno);
+ Die() << "open failed: " << Error(errno);
}
}
FileDescriptor::~FileDescriptor() noexcept(false) {
// If we're unwinding, ignore any close failure.
if (fd_ >= 0 && close(fd_) != 0 && std::uncaught_exceptions() == 0) {
- Die() << "close failed: " << ErrnoToString(errno);
+ Die() << "close failed: " << Error(errno);
}
fd_ = -1;
}
diff --git a/graph.cc b/graph.cc
index 342434f..0153710 100644
--- a/graph.cc
+++ b/graph.cc
@@ -25,6 +25,7 @@
#include <limits>
#include <ostream>
#include <string>
+#include <string_view>
namespace stg {
@@ -54,15 +55,27 @@ std::ostream& operator<<(std::ostream& os, Method::Kind kind) {
}
}
-std::ostream& operator<<(std::ostream& os, StructUnion::Kind kind) {
+namespace {
+
+std::string_view ToString(StructUnion::Kind kind) {
switch (kind) {
case StructUnion::Kind::STRUCT:
- return os << "struct";
+ return "struct";
case StructUnion::Kind::UNION:
- return os << "union";
+ return "union";
}
}
+} // namespace
+
+std::ostream& operator<<(std::ostream& os, StructUnion::Kind kind) {
+ return os << ToString(kind);
+}
+
+std::string& operator+=(std::string& os, StructUnion::Kind kind) {
+ return os += ToString(kind);
+}
+
std::ostream& operator<<(std::ostream& os, Qualifier qualifier) {
switch (qualifier) {
case Qualifier::CONST:
@@ -84,6 +97,8 @@ std::ostream& operator<<(std::ostream& os, ElfSymbol::SymbolType type) {
return os << "common";
case ElfSymbol::SymbolType::TLS:
return os << "TLS";
+ case ElfSymbol::SymbolType::GNU_IFUNC:
+ return os << "indirect (ifunc) function";
}
}
@@ -125,7 +140,7 @@ std::string VersionedSymbolName(const ElfSymbol& symbol) {
}
std::ostream& operator<<(std::ostream& os, ElfSymbol::CRC crc) {
- return os << "0x" << std::hex << crc.number;
+ return os << Hex(crc.number);
}
std::ostream& operator<<(std::ostream& os, Primitive::Encoding encoding) {
diff --git a/graph.h b/graph.h
index d91fba9..6dd1b66 100644
--- a/graph.h
+++ b/graph.h
@@ -221,6 +221,7 @@ struct StructUnion {
};
std::ostream& operator<<(std::ostream& os, StructUnion::Kind kind);
+std::string& operator+=(std::string& os, StructUnion::Kind kind);
struct Enumeration {
using Enumerators = std::vector<std::pair<std::string, int64_t>>;
@@ -246,7 +247,7 @@ struct Function {
};
struct ElfSymbol {
- enum class SymbolType { OBJECT, FUNCTION, COMMON, TLS };
+ enum class SymbolType { OBJECT, FUNCTION, COMMON, TLS, GNU_IFUNC };
enum class Binding { GLOBAL, LOCAL, WEAK, GNU_UNIQUE };
enum class Visibility { DEFAULT, PROTECTED, HIDDEN, INTERNAL };
struct VersionInfo {
@@ -402,7 +403,9 @@ class Graph {
template <typename Node, typename... Args>
void Set(Id id, Args&&... args) {
auto& reference = indirection_[id.ix_];
- Check(reference.first == Which::ABSENT) << "node value already set";
+ if (reference.first != Which::ABSENT) {
+ Die() << "node value already set: " << id;
+ }
if constexpr (std::is_same_v<Node, Void>) {
reference = {Which::VOID, void_.size()};
void_.emplace_back(std::forward<Args>(args)...);
@@ -470,7 +473,9 @@ class Graph {
void Unset(Id id) {
auto& reference = indirection_[id.ix_];
- Check(reference.first != Which::ABSENT) << "node value already unset";
+ if (reference.first == Which::ABSENT) {
+ Die() << "node value already unset: " << id;
+ }
reference = {Which::ABSENT, 0};
}
@@ -534,7 +539,7 @@ Result Graph::Apply(FunctionObject& function, Id id, Args&&... args) const {
const auto& [which, ix] = indirection_[id.ix_];
switch (which) {
case Which::ABSENT:
- Die() << "undefined node";
+ Die() << "undefined node: " << id;
case Which::VOID:
return function(void_[ix], std::forward<Args>(args)...);
case Which::VARIADIC:
@@ -580,7 +585,7 @@ Result Graph::Apply2(
}
switch (which1) {
case Which::ABSENT:
- Die() << "undefined node";
+ Die() << "undefined nodes: " << id1 << ", " << id2;
case Which::VOID:
return function(void_[ix1], void_[ix2],
std::forward<Args>(args)...);
diff --git a/proto_reader.cc b/proto_reader.cc
index 61f5c5b..adc906d 100644
--- a/proto_reader.cc
+++ b/proto_reader.cc
@@ -273,7 +273,7 @@ std::map<std::string, Id> Transformer::Transform(
const Id stg_id = GetId(id);
const auto [it, inserted] = result.emplace(get_key(stg_id), stg_id);
if (!inserted) {
- Die() << "found conflicting interface nodes: " << it->first << '\n';
+ Die() << "conflicting interface nodes: " << it->first;
}
}
return result;
@@ -288,8 +288,7 @@ stg::PointerReference::Kind Transformer::Transform(PointerReference::Kind x) {
case PointerReference::RVALUE_REFERENCE:
return stg::PointerReference::Kind::RVALUE_REFERENCE;
default:
- Die() << "Encountered unsupported value for PointerReference Kind " << x
- << '\n';
+ Die() << "unknown PointerReference::Kind " << x;
}
}
@@ -302,7 +301,7 @@ stg::Qualifier Transformer::Transform(Qualified::Qualifier x) {
case Qualified::RESTRICT:
return stg::Qualifier::RESTRICT;
default:
- Die() << "Encountered unsupported value for Qualifier " << x << '\n';
+ Die() << "unknown Qualified::Qualifier " << x;
}
}
@@ -325,8 +324,7 @@ stg::Primitive::Encoding Transformer::Transform(Primitive::Encoding x) {
case Primitive::UTF:
return stg::Primitive::Encoding::UTF;
default:
- Die() << "Encountered unsupported value for Primitive type Encoding " << x
- << '\n';
+ Die() << "unknown Primitive::Encoding " << x;
}
}
@@ -337,8 +335,7 @@ stg::BaseClass::Inheritance Transformer::Transform(BaseClass::Inheritance x) {
case BaseClass::VIRTUAL:
return stg::BaseClass::Inheritance::VIRTUAL;
default:
- Die() << "Encountered unsupported value for BaseClass Inheritance " << x
- << '\n';
+ Die() << "unknown BaseClass::Inheritance " << x;
}
}
@@ -351,7 +348,7 @@ stg::Method::Kind Transformer::Transform(Method::Kind x) {
case Method::VIRTUAL:
return stg::Method::Kind::VIRTUAL;
default:
- Die() << "Encountered unsupported value for Method Kind " << x << '\n';
+ Die() << "unknown Method::Kind " << x;
}
}
@@ -362,8 +359,7 @@ stg::StructUnion::Kind Transformer::Transform(StructUnion::Kind x) {
case StructUnion::UNION:
return stg::StructUnion::Kind::UNION;
default:
- Die() << "Encountered unsupported value for StructUnion Kind " << x
- << '\n';
+ Die() << "unknown StructUnion::Kind " << x;
}
}
@@ -377,8 +373,10 @@ stg::ElfSymbol::SymbolType Transformer::Transform(ElfSymbol::SymbolType x) {
return stg::ElfSymbol::SymbolType::COMMON;
case ElfSymbol::TLS:
return stg::ElfSymbol::SymbolType::TLS;
+ case ElfSymbol::GNU_IFUNC:
+ return stg::ElfSymbol::SymbolType::GNU_IFUNC;
default:
- Die() << "Encountered unsupported value for ElfSymbol Type " << x << '\n';
+ Die() << "unknown ElfSymbol::SymbolType " << x;
}
}
@@ -393,8 +391,7 @@ stg::ElfSymbol::Binding Transformer::Transform(ElfSymbol::Binding x) {
case ElfSymbol::GNU_UNIQUE:
return stg::ElfSymbol::Binding::GNU_UNIQUE;
default:
- Die() << "Encountered unsupported value for ElfSymbol Binding " << x
- << '\n';
+ Die() << "unknown ElfSymbol::Binding " << x;
}
}
@@ -409,8 +406,7 @@ stg::ElfSymbol::Visibility Transformer::Transform(ElfSymbol::Visibility x) {
case ElfSymbol::INTERNAL:
return stg::ElfSymbol::Visibility::INTERNAL;
default:
- Die() << "Encountered unsupported value for ElfSymbol Visibility " << x
- << '\n';
+ Die() << "unknown ElfSymbol::Visibility " << x;
}
}
@@ -460,7 +456,7 @@ void CheckFormatVersion(uint32_t version, std::optional<std::string> path) {
Id Read(Graph& graph, const std::string& path) {
std::ifstream ifs(path);
Check(ifs.good()) << "error opening file '" << path
- << "' for reading: " << ErrnoToString(errno);
+ << "' for reading: " << Error(errno);
google::protobuf::io::IstreamInputStream is(&ifs);
proto::STG stg;
google::protobuf::TextFormat::Parse(&is, &stg);
diff --git a/proto_writer.cc b/proto_writer.cc
index 503fa17..bdf4ea6 100644
--- a/proto_writer.cc
+++ b/proto_writer.cc
@@ -388,6 +388,8 @@ ElfSymbol::SymbolType Transform<MapId>::operator()(
return ElfSymbol::COMMON;
case stg::ElfSymbol::SymbolType::TLS:
return ElfSymbol::TLS;
+ case stg::ElfSymbol::SymbolType::GNU_IFUNC:
+ return ElfSymbol::GNU_IFUNC;
}
}
diff --git a/proto_writer.h b/proto_writer.h
index ff5be85..59e2b38 100644
--- a/proto_writer.h
+++ b/proto_writer.h
@@ -30,7 +30,7 @@ namespace proto {
class Writer {
public:
- Writer(const stg::Graph& graph)
+ explicit Writer(const stg::Graph& graph)
: graph_(graph) {}
void Write(const Id&, std::ostream&);
diff --git a/scope.h b/scope.h
new file mode 100644
index 0000000..e02e76e
--- /dev/null
+++ b/scope.h
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// -*- mode: C++ -*-
+//
+// Copyright 2022-2023 Google LLC
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions (the
+// "License"); you may not use this file except in compliance with the
+// License. You may obtain a copy of the License at
+//
+// https://llvm.org/LICENSE.txt
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Author: Ignes Simeonova
+// Author: Aleksei Vetrov
+
+#ifndef STG_SCOPE_H_
+#define STG_SCOPE_H_
+
+#include <cstddef>
+#include <string>
+
+namespace stg {
+
+using Scope = std::string;
+
+class PushScopeName {
+ public:
+ template <typename Kind>
+ PushScopeName(Scope& scope_, Kind&& kind, const std::string& name)
+ : scope_name_(scope_), old_size_(scope_name_.size()) {
+ if (name.empty()) {
+ scope_name_ += "<unnamed ";
+ scope_name_ += kind;
+ scope_name_ += ">::";
+ } else {
+ scope_name_ += name;
+ scope_name_ += "::";
+ }
+ }
+
+ PushScopeName(const PushScopeName& other) = delete;
+ PushScopeName& operator=(const PushScopeName& other) = delete;
+ ~PushScopeName() {
+ scope_name_.resize(old_size_);
+ }
+
+ private:
+ std::string& scope_name_;
+ const size_t old_size_;
+};
+
+} // namespace stg
+
+#endif // STG_SCOPE_H_
diff --git a/stg.cc b/stg.cc
index 07f4ef4..8f7c898 100644
--- a/stg.cc
+++ b/stg.cc
@@ -44,8 +44,6 @@
namespace stg {
namespace {
-Metrics metrics;
-
struct GetInterface {
Interface& operator()(Interface& x) {
return x;
@@ -88,7 +86,7 @@ void Filter(Graph& graph, Id root, const SymbolFilter& filter) {
std::swap(interface.symbols, symbols);
}
-void Write(const Graph& graph, Id root, const char* output) {
+void Write(const Graph& graph, Id root, const char* output, Metrics& metrics) {
std::ofstream os(output);
{
Time x(metrics, "write");
@@ -198,26 +196,27 @@ int main(int argc, char* argv[]) {
try {
stg::Graph graph;
+ stg::Metrics metrics;
std::vector<stg::Id> roots;
roots.reserve(inputs.size());
for (auto input : inputs) {
roots.push_back(stg::Read(graph, opt_input_format, input,
- opt_read_options, stg::metrics));
+ opt_read_options, metrics));
}
stg::Id root = roots.size() == 1 ? roots[0] : stg::Merge(graph, roots);
if (opt_symbols) {
stg::Filter(graph, root, *opt_symbols);
}
if (!opt_keep_duplicates) {
- stg::ResolveTypes(graph, {std::ref(root)}, stg::metrics);
- const auto hashes = stg::Fingerprint(graph, root, stg::metrics);
- root = stg::Deduplicate(graph, root, hashes, stg::metrics);
+ stg::ResolveTypes(graph, {std::ref(root)}, metrics);
+ const auto hashes = stg::Fingerprint(graph, root, metrics);
+ root = stg::Deduplicate(graph, root, hashes, metrics);
}
for (auto output : outputs) {
- stg::Write(graph, root, output);
+ stg::Write(graph, root, output, metrics);
}
if (opt_metrics) {
- stg::Report(stg::metrics, std::cerr);
+ stg::Report(metrics, std::cerr);
}
return 0;
} catch (const stg::Exception& e) {
diff --git a/stg.proto b/stg.proto
index bf7259a..7e8894f 100644
--- a/stg.proto
+++ b/stg.proto
@@ -205,6 +205,7 @@ message ElfSymbol {
FUNCTION = 2;
COMMON = 3;
TLS = 4;
+ GNU_IFUNC = 5;
}
enum Binding {
diff --git a/stgdiff.cc b/stgdiff.cc
index 1d07d26..b712a47 100644
--- a/stgdiff.cc
+++ b/stgdiff.cc
@@ -42,8 +42,6 @@
namespace {
-stg::Metrics metrics;
-
const int kAbiChange = 4;
const int kFidelityChange = 8;
const size_t kMaxCrcOnlyChanges = 3;
@@ -270,6 +268,7 @@ int main(int argc, char* argv[]) {
}
try {
+ stg::Metrics metrics;
const int status = opt_exact ? RunExact(inputs, opt_read_options, metrics)
: Run(inputs, outputs, opt_ignore,
opt_read_options, opt_fidelity, metrics);
diff --git a/stgdiff_test.cc b/stgdiff_test.cc
index 7610883..7ae2da2 100644
--- a/stgdiff_test.cc
+++ b/stgdiff_test.cc
@@ -164,7 +164,62 @@ TEST_CASE("ignore") {
"qualifier_1.stg",
stg::Ignore(stg::Ignore::QUALIFIER),
"empty",
- true}));
+ true}),
+ IgnoreTestCase(
+ {"interface addition",
+ stg::InputFormat::STG,
+ "interface_addition_0.stg",
+ stg::InputFormat::STG,
+ "interface_addition_1.stg",
+ stg::Ignore(),
+ "interface_addition_small_diff",
+ false}),
+ IgnoreTestCase(
+ {"type addition",
+ stg::InputFormat::STG,
+ "type_addition_0.stg",
+ stg::InputFormat::STG,
+ "type_addition_1.stg",
+ stg::Ignore(),
+ "type_addition_small_diff",
+ false}),
+ IgnoreTestCase(
+ {"interface addition ignored",
+ stg::InputFormat::STG,
+ "interface_addition_0.stg",
+ stg::InputFormat::STG,
+ "interface_addition_1.stg",
+ stg::Ignore(stg::Ignore::INTERFACE_ADDITION),
+ "empty",
+ true}),
+ IgnoreTestCase(
+ {"type addition ignored",
+ stg::InputFormat::STG,
+ "type_addition_0.stg",
+ stg::InputFormat::STG,
+ "type_addition_1.stg",
+ stg::Ignore(stg::Ignore::INTERFACE_ADDITION),
+ "empty",
+ true}),
+ IgnoreTestCase(
+ {"CRC change",
+ stg::InputFormat::STG,
+ "crc_change_0.stg",
+ stg::InputFormat::STG,
+ "crc_change_1.stg",
+ stg::Ignore(),
+ "crc_change_small_diff",
+ false}),
+ IgnoreTestCase(
+ {"CRC change ignored",
+ stg::InputFormat::STG,
+ "crc_change_0.stg",
+ stg::InputFormat::STG,
+ "crc_change_1.stg",
+ stg::Ignore(stg::Ignore::SYMBOL_CRC),
+ "empty",
+ true})
+ );
SECTION(test.name) {
stg::Metrics metrics;
diff --git a/symbol_filter.cc b/symbol_filter.cc
index 866957e..f1eedaa 100644
--- a/symbol_filter.cc
+++ b/symbol_filter.cc
@@ -47,7 +47,7 @@ SymbolSet ReadAbigail(const std::string& filename) {
SymbolSet symbols;
std::ifstream file(filename);
Check(file.good()) << "error opening symbol file '" << filename << ": "
- << ErrnoToString(errno);
+ << Error(errno);
bool in_symbol_section = false;
std::string line;
while (std::getline(file, line)) {
@@ -89,7 +89,7 @@ SymbolSet ReadAbigail(const std::string& filename) {
}
}
Check(file.eof()) << "error reading symbol file '" << filename << ": "
- << ErrnoToString(errno);
+ << Error(errno);
return symbols;
}
diff --git a/testdata/crc_change_0.stg b/testdata/crc_change_0.stg
new file mode 100644
index 0000000..80057d5
--- /dev/null
+++ b/testdata/crc_change_0.stg
@@ -0,0 +1,21 @@
+version: 0x00000001
+root_id: 0x84ea5130
+primitive {
+ id: 0x6720d32f
+ name: "int"
+ encoding: SIGNED_INTEGER
+ bytesize: 0x00000004
+}
+elf_symbol {
+ id: 0x7709bd40
+ name: "x"
+ is_defined: true
+ symbol_type: FUNCTION
+ crc: 0x1d24c881
+ type_id: 0x6720d32f
+ full_name: "x"
+}
+interface {
+ id: 0x84ea5130
+ symbol_id: 0x7709bd40
+}
diff --git a/testdata/crc_change_1.stg b/testdata/crc_change_1.stg
new file mode 100644
index 0000000..fc73858
--- /dev/null
+++ b/testdata/crc_change_1.stg
@@ -0,0 +1,21 @@
+version: 0x00000001
+root_id: 0x84ea5130
+primitive {
+ id: 0x6720d32f
+ name: "int"
+ encoding: SIGNED_INTEGER
+ bytesize: 0x00000004
+}
+elf_symbol {
+ id: 0x7709bd40
+ name: "x"
+ is_defined: true
+ symbol_type: FUNCTION
+ crc: 0x6c6bbe0a
+ type_id: 0x6720d32f
+ full_name: "x"
+}
+interface {
+ id: 0x84ea5130
+ symbol_id: 0x7709bd40
+}
diff --git a/testdata/crc_change_small_diff b/testdata/crc_change_small_diff
new file mode 100644
index 0000000..148fd90
--- /dev/null
+++ b/testdata/crc_change_small_diff
@@ -0,0 +1,3 @@
+function symbol 'int x' changed
+ CRC changed from 0x1d24c881 to 0x6c6bbe0a
+
diff --git a/testdata/interface_addition_0.stg b/testdata/interface_addition_0.stg
new file mode 100644
index 0000000..2071972
--- /dev/null
+++ b/testdata/interface_addition_0.stg
@@ -0,0 +1,5 @@
+version: 0x00000001
+root_id: 0x84ea5130
+interface {
+ id: 0x84ea5130
+}
diff --git a/testdata/interface_addition_1.stg b/testdata/interface_addition_1.stg
new file mode 100644
index 0000000..1ad5ece
--- /dev/null
+++ b/testdata/interface_addition_1.stg
@@ -0,0 +1,20 @@
+version: 0x00000001
+root_id: 0x84ea5130
+primitive {
+ id: 0x6720d32f
+ name: "int"
+ encoding: SIGNED_INTEGER
+ bytesize: 0x00000004
+}
+elf_symbol {
+ id: 0x7709bd40
+ name: "x"
+ is_defined: true
+ symbol_type: OBJECT
+ type_id: 0x6720d32f
+ full_name: "x"
+}
+interface {
+ id: 0x84ea5130
+ symbol_id: 0x7709bd40
+}
diff --git a/testdata/interface_addition_small_diff b/testdata/interface_addition_small_diff
new file mode 100644
index 0000000..2d9c8a9
--- /dev/null
+++ b/testdata/interface_addition_small_diff
@@ -0,0 +1,2 @@
+variable symbol 'int x' was added
+
diff --git a/testdata/type_addition_0.stg b/testdata/type_addition_0.stg
new file mode 100644
index 0000000..2071972
--- /dev/null
+++ b/testdata/type_addition_0.stg
@@ -0,0 +1,5 @@
+version: 0x00000001
+root_id: 0x84ea5130
+interface {
+ id: 0x84ea5130
+}
diff --git a/testdata/type_addition_1.stg b/testdata/type_addition_1.stg
new file mode 100644
index 0000000..e449dae
--- /dev/null
+++ b/testdata/type_addition_1.stg
@@ -0,0 +1,11 @@
+version: 0x00000001
+root_id: 0x84ea5130
+struct_union {
+ id: 0xa8a0bc6a
+ kind: UNION
+ name: "U"
+}
+interface {
+ id: 0x84ea5130
+ type_id: 0xa8a0bc6a
+}
diff --git a/testdata/type_addition_small_diff b/testdata/type_addition_small_diff
new file mode 100644
index 0000000..8409545
--- /dev/null
+++ b/testdata/type_addition_small_diff
@@ -0,0 +1,2 @@
+type 'union U' was added
+
diff --git a/type_resolution.cc b/type_resolution.cc
index bde42f6..5f23cb5 100644
--- a/type_resolution.cc
+++ b/type_resolution.cc
@@ -22,14 +22,13 @@
#include <functional>
#include <map>
#include <string>
-#include <unordered_map>
-#include <unordered_set>
#include <utility>
#include <vector>
#include "error.h"
#include "graph.h"
#include "substitution.h"
+#include "unification.h"
namespace stg {
@@ -140,7 +139,6 @@ struct NamedTypes {
} else {
Check(named) << "anonymous forward declaration";
info.declarations.push_back(id);
- incomplete.insert(id);
++declarations;
}
}
@@ -157,7 +155,6 @@ struct NamedTypes {
} else {
Check(named) << "anonymous forward declaration";
info.declarations.push_back(id);
- incomplete.insert(id);
++declarations;
}
}
@@ -182,291 +179,12 @@ struct NamedTypes {
// ordered map for consistency and sequential processing of related types
std::map<Type, Info> type_info;
Graph::DenseIdSet seen;
- std::unordered_set<Id> incomplete;
Counter nodes;
Counter types;
Counter definitions;
Counter declarations;
};
-// Keep track of which type nodes have been unified together, avoiding mapping
-// definitions to declarations.
-struct UnificationCache {
- UnificationCache(const std::unordered_set<Id>& incomplete,
- Graph::DenseIdMapping& mapping, Metrics& metrics)
- : incomplete(incomplete),
- mapping(mapping),
- find_query(metrics, "cache.find_query"),
- find_halved(metrics, "cache.find_halved"),
- union_known(metrics, "cache.union_known"),
- union_unknown(metrics, "cache.union_unknown"),
- union_unknown_forced1(metrics, "cache.union_unknown_forced1"),
- union_unknown_forced2(metrics, "cache.union_unknown_forced2") {}
-
- Id Find(Id id) {
- ++find_query;
- // path halving - tiny performance gain
- while (true) {
- // note: safe to take references as mapping cannot grow after this
- auto& parent = mapping[id];
- if (parent == id) {
- return id;
- }
- auto& parent_parent = mapping[parent];
- if (parent_parent == parent) {
- return parent;
- }
- id = parent = parent_parent;
- ++find_halved;
- }
- }
-
- void Union(Id id1, Id id2) {
- // no union by rank - overheads result in a performance loss
- const Id fid1 = Find(id1);
- const Id fid2 = Find(id2);
- if (fid1 == fid2) {
- ++union_known;
- return;
- }
- const bool prefer1 = incomplete.find(fid1) == incomplete.end();
- const bool prefer2 = incomplete.find(fid2) == incomplete.end();
- if (prefer1 == prefer2) {
- mapping[fid1] = fid2;
- ++union_unknown;
- } else if (prefer1 < prefer2) {
- mapping[fid1] = fid2;
- ++union_unknown_forced1;
- } else {
- mapping[fid2] = fid1;
- ++union_unknown_forced2;
- }
- }
-
- const std::unordered_set<Id>& incomplete;
- Graph::DenseIdMapping& mapping;
- Counter find_query;
- Counter find_halved;
- Counter union_known;
- Counter union_unknown;
- Counter union_unknown_forced1;
- Counter union_unknown_forced2;
-};
-
-// Type Unification
-//
-// This is very similar to Equals. The differences are the recursion control,
-// caching and handling of StructUnion and Enum nodes.
-//
-// During unification, keep track of which pairs of types need to be equal, but
-// do not add them to the cache. The caller will do that iff unification
-// succeeds.
-//
-// A declaration and definition of the same named type can be unified. This is
-// forward declaration resolution.
-struct Unify {
- Unify(const Graph& graph, UnificationCache& cache)
- : graph(graph), cache(cache) {}
-
- bool operator()(Id id1, Id id2) {
- Id fid1 = Find(id1);
- Id fid2 = Find(id2);
- if (fid1 == fid2) {
- return true;
- }
-
- // Check if the comparison has an already known result.
- //
- // Opportunistic as seen is unaware of new mappings.
- if (!seen.emplace(fid1, fid2).second) {
- return true;
- }
-
- if (!graph.Apply2<bool>(*this, fid1, fid2)) {
- return false;
- }
-
- // These will occasionally get substituted due to a recursive call.
- fid1 = Find(fid1);
- fid2 = Find(fid2);
- if (fid1 == fid2) {
- return true;
- }
-
- const bool prefer1 = cache.incomplete.find(fid1) == cache.incomplete.end();
- const bool prefer2 = cache.incomplete.find(fid2) == cache.incomplete.end();
- if (prefer1 <= prefer2) {
- mapping.insert({fid1, fid2});
- } else {
- mapping.insert({fid2, fid1});
- }
-
- return true;
- }
-
- bool operator()(const std::vector<Id>& ids1, const std::vector<Id>& ids2) {
- bool result = ids1.size() == ids2.size();
- for (size_t ix = 0; result && ix < ids1.size(); ++ix) {
- result = (*this)(ids1[ix], ids2[ix]);
- }
- return result;
- }
-
- template <typename Key>
- bool operator()(const std::map<Key, Id>& ids1,
- const std::map<Key, Id>& ids2) {
- bool result = ids1.size() == ids2.size();
- auto it1 = ids1.begin();
- auto it2 = ids2.begin();
- const auto end1 = ids1.end();
- const auto end2 = ids2.end();
- while (result && it1 != end1 && it2 != end2) {
- result = it1->first == it2->first
- && (*this)(it1->second, it2->second);
- ++it1;
- ++it2;
- }
- return result && it1 == end1 && it2 == end2;
- }
-
- bool operator()(const Void&, const Void&) {
- return true;
- }
-
- bool operator()(const Variadic&, const Variadic&) {
- return true;
- }
-
- bool operator()(const PointerReference& x1,
- const PointerReference& x2) {
- return x1.kind == x2.kind
- && (*this)(x1.pointee_type_id, x2.pointee_type_id);
- }
-
- bool operator()(const PointerToMember& x1, const PointerToMember& x2) {
- return (*this)(x1.containing_type_id, x2.containing_type_id)
- && (*this)(x1.pointee_type_id, x2.pointee_type_id);
- }
-
- bool operator()(const Typedef& x1, const Typedef& x2) {
- return x1.name == x2.name
- && (*this)(x1.referred_type_id, x2.referred_type_id);
- }
-
- bool operator()(const Qualified& x1, const Qualified& x2) {
- return x1.qualifier == x2.qualifier
- && (*this)(x1.qualified_type_id, x2.qualified_type_id);
- }
-
- bool operator()(const Primitive& x1, const Primitive& x2) {
- return x1.name == x2.name
- && x1.encoding == x2.encoding
- && x1.bytesize == x2.bytesize;
- }
-
- bool operator()(const Array& x1, const Array& x2) {
- return x1.number_of_elements == x2.number_of_elements
- && (*this)(x1.element_type_id, x2.element_type_id);
- }
-
- bool operator()(const BaseClass& x1, const BaseClass& x2) {
- return x1.offset == x2.offset
- && x1.inheritance == x2.inheritance
- && (*this)(x1.type_id, x2.type_id);
- }
-
- bool operator()(const Method& x1, const Method& x2) {
- return x1.mangled_name == x2.mangled_name
- && x1.name == x2.name
- && x1.kind == x2.kind
- && x1.vtable_offset == x2.vtable_offset
- && (*this)(x1.type_id, x2.type_id);
- }
-
- bool operator()(const Member& x1, const Member& x2) {
- return x1.name == x2.name
- && x1.offset == x2.offset
- && x1.bitsize == x2.bitsize
- && (*this)(x1.type_id, x2.type_id);
- }
-
- bool operator()(const StructUnion& x1, const StructUnion& x2) {
- const auto& definition1 = x1.definition;
- const auto& definition2 = x2.definition;
- bool result = x1.kind == x2.kind
- && x1.name == x2.name;
- // allow mismatches as forward declarations are always unifiable
- if (result && definition1.has_value() && definition2.has_value()) {
- result = definition1->bytesize == definition2->bytesize
- && (*this)(definition1->base_classes, definition2->base_classes)
- && (*this)(definition1->methods, definition2->methods)
- && (*this)(definition1->members, definition2->members);
- }
- return result;
- }
-
- bool operator()(const Enumeration& x1, const Enumeration& x2) {
- const auto& definition1 = x1.definition;
- const auto& definition2 = x2.definition;
- bool result = x1.name == x2.name;
- // allow mismatches as forward declarations are always unifiable
- if (result && definition1.has_value() && definition2.has_value()) {
- result = (*this)(definition1->underlying_type_id,
- definition2->underlying_type_id)
- && definition1->enumerators == definition2->enumerators;
- }
- return result;
- }
-
- bool operator()(const Function& x1, const Function& x2) {
- return (*this)(x1.parameters, x2.parameters)
- && (*this)(x1.return_type_id, x2.return_type_id);
- }
-
- bool operator()(const ElfSymbol& x1, const ElfSymbol& x2) {
- bool result = x1.symbol_name == x2.symbol_name
- && x1.version_info == x2.version_info
- && x1.is_defined == x2.is_defined
- && x1.symbol_type == x2.symbol_type
- && x1.binding == x2.binding
- && x1.visibility == x2.visibility
- && x1.crc == x2.crc
- && x1.ns == x2.ns
- && x1.full_name == x2.full_name
- && x1.type_id.has_value() == x2.type_id.has_value();
- if (result && x1.type_id.has_value()) {
- result = (*this)(x1.type_id.value(), x2.type_id.value());
- }
- return result;
- }
-
- bool operator()(const Interface& x1, const Interface& x2) {
- return (*this)(x1.symbols, x2.symbols)
- && (*this)(x1.types, x2.types);
- }
-
- bool Mismatch() {
- return false;
- }
-
- Id Find(Id id) {
- while (true) {
- id = cache.Find(id);
- auto it = mapping.find(id);
- if (it != mapping.end()) {
- id = it->second;
- continue;
- }
- return id;
- }
- }
-
- const Graph& graph;
- UnificationCache& cache;
- std::unordered_set<Pair> seen;
- std::unordered_map<Id, Id> mapping;
-};
-
} // namespace
void ResolveTypes(Graph& graph,
@@ -483,8 +201,7 @@ void ResolveTypes(Graph& graph,
}
}
- Graph::DenseIdMapping mapping = graph.MakeDenseIdMapping();
- UnificationCache cache(named_types.incomplete, mapping, metrics);
+ Unification unification(graph, metrics);
{
const Time time(metrics, "resolve.unification");
Counter definition_unified(metrics, "resolve.definition.unified");
@@ -499,12 +216,8 @@ void ResolveTypes(Graph& graph,
std::vector<Id> todo;
distinct_definitions.push_back(candidate);
for (size_t i = 1; i < definitions.size(); ++i) {
- Unify unify(graph, cache);
- if (unify(definitions[i], candidate)) {
- // unification succeeded, commit the mappings
- for (const auto& s : unify.mapping) {
- cache.Union(s.first, s.second);
- }
+ if (Unify(graph, unification, definitions[i], candidate)) {
+ // unification succeeded
++definition_unified;
} else {
// unification failed, conflicting definitions
@@ -518,7 +231,7 @@ void ResolveTypes(Graph& graph,
if (distinct_definitions.size() == 1) {
const Id candidate = distinct_definitions[0];
for (auto id : info.declarations) {
- cache.Union(id, candidate);
+ unification.Union(id, candidate);
++declaration_unified;
}
}
@@ -529,16 +242,12 @@ void ResolveTypes(Graph& graph,
const Time time(metrics, "resolve.rewrite");
Counter removed(metrics, "resolve.removed");
Counter retained(metrics, "resolve.retained");
- auto remap = [&cache](Id& id) {
- // update id to representative id, avoiding silent stores
- const Id fid = cache.Find(id);
- if (fid != id) {
- id = fid;
- }
+ auto remap = [&unification](Id& id) {
+ unification.Update(id);
};
Substitute<decltype(remap)> substitute(graph, remap);
named_types.seen.ForEach([&](Id id) {
- const Id fid = cache.Find(id);
+ const Id fid = unification.Find(id);
if (fid != id) {
graph.Remove(id);
++removed;
diff --git a/unification.cc b/unification.cc
new file mode 100644
index 0000000..c3a13b1
--- /dev/null
+++ b/unification.cc
@@ -0,0 +1,267 @@
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// -*- mode: C++ -*-
+//
+// Copyright 2022-2023 Google LLC
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions (the
+// "License"); you may not use this file except in compliance with the
+// License. You may obtain a copy of the License at
+//
+// https://llvm.org/LICENSE.txt
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Author: Giuliano Procida
+
+#include "unification.h"
+
+#include <utility>
+
+namespace stg {
+
+namespace {
+
+// Type Unification
+//
+// This is very similar to Equals. The differences are the recursion control,
+// caching and handling of StructUnion and Enum nodes.
+//
+// During unification, keep track of which pairs of types need to be equal, but
+// do not add them immediately to the unification substitutions. The caller can
+// do that if the whole unification succeeds.
+//
+// A declaration and definition of the same named type can be unified. This is
+// forward declaration resolution.
+struct Unifier {
+ enum Winner { Neither, Right, Left }; // makes p ? Right : Neither a no-op
+
+ Unifier(const Graph& graph, Unification& unification)
+ : graph(graph), unification(unification) {}
+
+ bool operator()(Id id1, Id id2) {
+ Id fid1 = Find(id1);
+ Id fid2 = Find(id2);
+ if (fid1 == fid2) {
+ return true;
+ }
+
+ // Check if the comparison has an already known result.
+ //
+ // Opportunistic as seen is unaware of new mappings.
+ if (!seen.emplace(fid1, fid2).second) {
+ return true;
+ }
+
+ const auto winner = graph.Apply2<Winner>(*this, fid1, fid2);
+ if (winner == Neither) {
+ return false;
+ }
+
+ // These will occasionally get substituted due to a recursive call.
+ fid1 = Find(fid1);
+ fid2 = Find(fid2);
+ if (fid1 == fid2) {
+ return true;
+ }
+
+ if (winner == Left) {
+ std::swap(fid1, fid2);
+ }
+ mapping.insert({fid1, fid2});
+
+ return true;
+ }
+
+ bool operator()(const std::vector<Id>& ids1, const std::vector<Id>& ids2) {
+ bool result = ids1.size() == ids2.size();
+ for (size_t ix = 0; result && ix < ids1.size(); ++ix) {
+ result = (*this)(ids1[ix], ids2[ix]);
+ }
+ return result;
+ }
+
+ template <typename Key>
+ bool operator()(const std::map<Key, Id>& ids1,
+ const std::map<Key, Id>& ids2) {
+ bool result = ids1.size() == ids2.size();
+ auto it1 = ids1.begin();
+ auto it2 = ids2.begin();
+ const auto end1 = ids1.end();
+ const auto end2 = ids2.end();
+ while (result && it1 != end1 && it2 != end2) {
+ result = it1->first == it2->first
+ && (*this)(it1->second, it2->second);
+ ++it1;
+ ++it2;
+ }
+ return result && it1 == end1 && it2 == end2;
+ }
+
+ Winner operator()(const Void&, const Void&) {
+ return Right;
+ }
+
+ Winner operator()(const Variadic&, const Variadic&) {
+ return Right;
+ }
+
+ Winner operator()(const PointerReference& x1,
+ const PointerReference& x2) {
+ return x1.kind == x2.kind
+ && (*this)(x1.pointee_type_id, x2.pointee_type_id)
+ ? Right : Neither;
+ }
+
+ Winner operator()(const PointerToMember& x1, const PointerToMember& x2) {
+ return (*this)(x1.containing_type_id, x2.containing_type_id)
+ && (*this)(x1.pointee_type_id, x2.pointee_type_id)
+ ? Right : Neither;
+ }
+
+ Winner operator()(const Typedef& x1, const Typedef& x2) {
+ return x1.name == x2.name
+ && (*this)(x1.referred_type_id, x2.referred_type_id)
+ ? Right : Neither;
+ }
+
+ Winner operator()(const Qualified& x1, const Qualified& x2) {
+ return x1.qualifier == x2.qualifier
+ && (*this)(x1.qualified_type_id, x2.qualified_type_id)
+ ? Right : Neither;
+ }
+
+ Winner operator()(const Primitive& x1, const Primitive& x2) {
+ return x1.name == x2.name
+ && x1.encoding == x2.encoding
+ && x1.bytesize == x2.bytesize
+ ? Right : Neither;
+ }
+
+ Winner operator()(const Array& x1, const Array& x2) {
+ return x1.number_of_elements == x2.number_of_elements
+ && (*this)(x1.element_type_id, x2.element_type_id)
+ ? Right : Neither;
+ }
+
+ Winner operator()(const BaseClass& x1, const BaseClass& x2) {
+ return x1.offset == x2.offset
+ && x1.inheritance == x2.inheritance
+ && (*this)(x1.type_id, x2.type_id)
+ ? Right : Neither;
+ }
+
+ Winner operator()(const Method& x1, const Method& x2) {
+ return x1.mangled_name == x2.mangled_name
+ && x1.name == x2.name
+ && x1.kind == x2.kind
+ && x1.vtable_offset == x2.vtable_offset
+ && (*this)(x1.type_id, x2.type_id)
+ ? Right : Neither;
+ }
+
+ Winner operator()(const Member& x1, const Member& x2) {
+ return x1.name == x2.name
+ && x1.offset == x2.offset
+ && x1.bitsize == x2.bitsize
+ && (*this)(x1.type_id, x2.type_id)
+ ? Right : Neither;
+ }
+
+ Winner operator()(const StructUnion& x1, const StructUnion& x2) {
+ const auto& definition1 = x1.definition;
+ const auto& definition2 = x2.definition;
+ bool result = x1.kind == x2.kind
+ && x1.name == x2.name;
+ // allow mismatches as forward declarations are always unifiable
+ if (result && definition1.has_value() && definition2.has_value()) {
+ result = definition1->bytesize == definition2->bytesize
+ && (*this)(definition1->base_classes, definition2->base_classes)
+ && (*this)(definition1->methods, definition2->methods)
+ && (*this)(definition1->members, definition2->members);
+ }
+ return result ? definition2.has_value() ? Right : Left : Neither;
+ }
+
+ Winner operator()(const Enumeration& x1, const Enumeration& x2) {
+ const auto& definition1 = x1.definition;
+ const auto& definition2 = x2.definition;
+ bool result = x1.name == x2.name;
+ // allow mismatches as forward declarations are always unifiable
+ if (result && definition1.has_value() && definition2.has_value()) {
+ result = (*this)(definition1->underlying_type_id,
+ definition2->underlying_type_id)
+ && definition1->enumerators == definition2->enumerators;
+ }
+ return result ? definition2.has_value() ? Right : Left : Neither;
+ }
+
+ Winner operator()(const Function& x1, const Function& x2) {
+ return (*this)(x1.parameters, x2.parameters)
+ && (*this)(x1.return_type_id, x2.return_type_id)
+ ? Right : Neither;
+ }
+
+ Winner operator()(const ElfSymbol& x1, const ElfSymbol& x2) {
+ bool result = x1.symbol_name == x2.symbol_name
+ && x1.version_info == x2.version_info
+ && x1.is_defined == x2.is_defined
+ && x1.symbol_type == x2.symbol_type
+ && x1.binding == x2.binding
+ && x1.visibility == x2.visibility
+ && x1.crc == x2.crc
+ && x1.ns == x2.ns
+ && x1.full_name == x2.full_name
+ && x1.type_id.has_value() == x2.type_id.has_value();
+ if (result && x1.type_id.has_value()) {
+ result = (*this)(x1.type_id.value(), x2.type_id.value());
+ }
+ return result ? Right : Neither;
+ }
+
+ Winner operator()(const Interface& x1, const Interface& x2) {
+ return (*this)(x1.symbols, x2.symbols)
+ && (*this)(x1.types, x2.types)
+ ? Right : Neither;
+ }
+
+ Winner Mismatch() {
+ return Neither;
+ }
+
+ Id Find(Id id) {
+ while (true) {
+ id = unification.Find(id);
+ auto it = mapping.find(id);
+ if (it != mapping.end()) {
+ id = it->second;
+ continue;
+ }
+ return id;
+ }
+ }
+
+ const Graph& graph;
+ Unification& unification;
+ std::unordered_set<Pair> seen;
+ std::unordered_map<Id, Id> mapping;
+};
+
+} // namespace
+
+bool Unify(const Graph& graph, Unification& unification, Id id1, Id id2) {
+ Unifier unifier(graph, unification);
+ if (unifier(id1, id2)) {
+ // commit
+ for (const auto& s : unifier.mapping) {
+ unification.Union(s.first, s.second);
+ }
+ return true;
+ }
+ return false;
+}
+
+} // namespace stg
diff --git a/unification.h b/unification.h
new file mode 100644
index 0000000..0f1ab5d
--- /dev/null
+++ b/unification.h
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// -*- mode: C++ -*-
+//
+// Copyright 2022-2023 Google LLC
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions (the
+// "License"); you may not use this file except in compliance with the
+// License. You may obtain a copy of the License at
+//
+// https://llvm.org/LICENSE.txt
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Author: Giuliano Procida
+
+#ifndef STG_UNIFICATION_H_
+#define STG_UNIFICATION_H_
+
+#include <unordered_map>
+#include <unordered_set>
+
+#include "graph.h"
+#include "metrics.h"
+
+namespace stg {
+
+// Keep track of which nodes have been substituted.
+class Unification {
+ public:
+ Unification(const Graph& graph, Metrics& metrics)
+ : mapping_(graph.MakeDenseIdMapping()),
+ find_query_(metrics, "unification.find_query"),
+ find_halved_(metrics, "unification.find_halved"),
+ union_known_(metrics, "unification.union_known"),
+ union_unknown_(metrics, "unification.union_unknown") {}
+
+ Id Find(Id id) {
+ ++find_query_;
+ // path halving - tiny performance gain
+ while (true) {
+ // note: safe to take references as mapping cannot grow after this
+ auto& parent = mapping_[id];
+ if (parent == id) {
+ return id;
+ }
+ auto& parent_parent = mapping_[parent];
+ if (parent_parent == parent) {
+ return parent;
+ }
+ id = parent = parent_parent;
+ ++find_halved_;
+ }
+ }
+
+ void Union(Id id1, Id id2) {
+ // id2 will always be preferred as a parent node; interpreted as a
+ // substitution, id1 will be replaced by id2
+ const Id fid1 = Find(id1);
+ const Id fid2 = Find(id2);
+ if (fid1 == fid2) {
+ ++union_known_;
+ return;
+ }
+ mapping_[fid1] = fid2;
+ ++union_unknown_;
+ }
+
+ // update id to representative id
+ void Update(Id& id) {
+ const Id fid = Find(id);
+ // avoid silent stores
+ if (fid != id) {
+ id = fid;
+ }
+ }
+
+ private:
+ Graph::DenseIdMapping mapping_;
+ Counter find_query_;
+ Counter find_halved_;
+ Counter union_known_;
+ Counter union_unknown_;
+};
+
+bool Unify(const Graph& graph, Unification& unification, Id id1, Id id2);
+
+} // namespace stg
+
+#endif // STG_UNIFICATION_H_