aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Hamilton <benhamilton@google.com>2023-04-21 12:02:42 -0600
committerJoshua Peraza <jperaza@chromium.org>2023-04-24 16:46:57 +0000
commitf548d75c9fa8bc062a580dbe5edb2fae2106d5c9 (patch)
treeefa606b498b2c00d0ce6592d111bca9fa9a19799
parent16cee17997a52e200b38fbded8545bc76127d791 (diff)
downloadgoogle-breakpad-f548d75c9fa8bc062a580dbe5edb2fae2106d5c9.tar.gz
[dump_syms/Mac] New -x option to prefer extern names when there's a mismatch
When built with -gmlt, .dSYMs are (by design) missing the `DW_AT_linkage_name` which Breakpad uses to fill out the (name-mangled) function names. Thankfully, the .dSYM contains both the old-school LC_SYMTAB command containing the STABS-format symbols (which include the fully-qualified C++ symbol names we want, but no actual compilation unit data), as well as the LC_SEGMENT_64 containing the __DWARF segment with the minimal -gmlt debug information (which excludes the name-mangled C++ symbols). Unfortunately, since the .dSYM's STABS does not define compilation units, the usual path in `StabsReader` ignores all the fully-qualified C++ symbol names for the functions: https://chromium.googlesource.com/breakpad/breakpad/+/bd9d94c70843620adeebcd73c243001237c6d426/src/common/stabs_reader.cc#100 Fortunately, when built for macOS platforms (`HAVE_MACH_O_NLIST_H`), `StabsReader` supports storing all the STABS-format symbols as `Extern`s, regardless of whether or not they're in a compilation unit: https://chromium.googlesource.com/breakpad/breakpad/+/bd9d94c70843620adeebcd73c243001237c6d426/src/common/stabs_reader.cc#119 Currently, when there's both a `Function` and an `Extern` with the same address, `Module` discards the `Extern`: https://chromium.googlesource.com/breakpad/breakpad/+/bd9d94c70843620adeebcd73c243001237c6d426/src/common/module.cc#161 This CL adds a new `-x` option to the Mac `dump_syms` which prefers the Extern function name if there's a mismatch. Bug: https://bugs.chromium.org/p/google-breakpad/issues/detail?id=883 Change-Id: I0d32adc64fbf567600b0a5ca63c71c422b7f0f8c Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/4453650 Reviewed-by: Joshua Peraza <jperaza@chromium.org>
-rw-r--r--src/common/dwarf_cu_to_module.cc32
-rw-r--r--src/common/dwarf_cu_to_module_unittest.cc13
-rw-r--r--src/common/mac/dump_syms.cc2
-rw-r--r--src/common/mac/dump_syms.h15
-rw-r--r--src/common/module.cc15
-rw-r--r--src/common/module.h16
-rw-r--r--src/common/module_unittest.cc31
-rw-r--r--src/tools/mac/dump_syms/dump_syms_tool.cc17
8 files changed, 123 insertions, 18 deletions
diff --git a/src/common/dwarf_cu_to_module.cc b/src/common/dwarf_cu_to_module.cc
index 43b468c1..708ed143 100644
--- a/src/common/dwarf_cu_to_module.cc
+++ b/src/common/dwarf_cu_to_module.cc
@@ -330,7 +330,10 @@ class DwarfCUToModule::GenericDIEHandler: public DIEHandler {
// Use this from EndAttributes member functions, not ProcessAttribute*
// functions; only the former can be sure that all the DIE's attributes
// have been seen.
- StringView ComputeQualifiedName();
+ //
+ // On return, if has_qualified_name is non-NULL, *has_qualified_name is set to
+ // true if the DIE includes a fully-qualified name, false otherwise.
+ StringView ComputeQualifiedName(bool* has_qualified_name);
CUContext* cu_context_;
DIEContext* parent_context_;
@@ -466,7 +469,8 @@ void DwarfCUToModule::GenericDIEHandler::ProcessAttributeString(
}
}
-StringView DwarfCUToModule::GenericDIEHandler::ComputeQualifiedName() {
+StringView DwarfCUToModule::GenericDIEHandler::ComputeQualifiedName(
+ bool* has_qualified_name) {
// Use the demangled name, if one is available. Demangled names are
// preferable to those inferred from the DWARF structure because they
// include argument types.
@@ -482,6 +486,15 @@ StringView DwarfCUToModule::GenericDIEHandler::ComputeQualifiedName() {
StringView* unqualified_name = nullptr;
StringView* enclosing_name = nullptr;
if (!qualified_name) {
+ if (has_qualified_name) {
+ // dSYMs built with -gmlt do not include the DW_AT_linkage_name
+ // with the unmangled symbol, but rather include it in the
+ // LC_SYMTAB STABS, which end up in the externs of the module.
+ //
+ // Remember this so the Module can copy over the extern name later.
+ *has_qualified_name = false;
+ }
+
// Find the unqualified name. If the DIE has its own DW_AT_name
// attribute, then use that; otherwise, check the specification.
if (!name_attribute_.empty()) {
@@ -500,6 +513,10 @@ StringView DwarfCUToModule::GenericDIEHandler::ComputeQualifiedName() {
} else if (parent_context_) {
enclosing_name = &parent_context_->name;
}
+ } else {
+ if (has_qualified_name) {
+ *has_qualified_name = true;
+ }
}
// Prepare the return value before upcoming mutations possibly invalidate the
@@ -722,7 +739,8 @@ class DwarfCUToModule::FuncHandler: public GenericDIEHandler {
ranges_form_(DW_FORM_sec_offset),
ranges_data_(0),
inline_(false),
- handle_inline_(handle_inline) {}
+ handle_inline_(handle_inline),
+ has_qualified_name_(false) {}
void ProcessAttributeUnsigned(enum DwarfAttribute attr,
enum DwarfForm form,
@@ -745,6 +763,7 @@ class DwarfCUToModule::FuncHandler: public GenericDIEHandler {
bool inline_;
vector<unique_ptr<Module::Inline>> child_inlines_;
bool handle_inline_;
+ bool has_qualified_name_;
DIEContext child_context_; // A context for our children.
};
@@ -808,7 +827,7 @@ DIEHandler* DwarfCUToModule::FuncHandler::FindChildHandler(
bool DwarfCUToModule::FuncHandler::EndAttributes() {
// Compute our name, and record a specification, if appropriate.
- name_ = ComputeQualifiedName();
+ name_ = ComputeQualifiedName(&has_qualified_name_);
if (name_.empty() && abstract_origin_) {
name_ = abstract_origin_->name;
}
@@ -881,6 +900,9 @@ void DwarfCUToModule::FuncHandler::Finish() {
scoped_ptr<Module::Function> func(new Module::Function(name, low_pc_));
func->ranges = ranges;
func->parameter_size = 0;
+ // If the name was unqualified, prefer the Extern name if there's a mismatch
+ // (the Extern name will be fully-qualified in that case).
+ func->prefer_extern_name = !has_qualified_name_;
if (func->address) {
// If the function address is zero this is a sign that this function
// description is just empty debug data and should just be discarded.
@@ -915,7 +937,7 @@ void DwarfCUToModule::FuncHandler::Finish() {
}
bool DwarfCUToModule::NamedScopeHandler::EndAttributes() {
- child_context_.name = ComputeQualifiedName();
+ child_context_.name = ComputeQualifiedName(NULL);
if (child_context_.name.empty() && no_specification) {
cu_context_->reporter->UnknownSpecification(offset_, specification_offset_);
}
diff --git a/src/common/dwarf_cu_to_module_unittest.cc b/src/common/dwarf_cu_to_module_unittest.cc
index 7ab2f15c..134b2c24 100644
--- a/src/common/dwarf_cu_to_module_unittest.cc
+++ b/src/common/dwarf_cu_to_module_unittest.cc
@@ -267,6 +267,10 @@ class CUFixtureBase {
void TestFunction(int i, const string& name,
Module::Address address, Module::Address size);
+ // Test that the I'th function (ordered by address) in the module
+ // this.module_ has the given prefer_extern_name.
+ void TestFunctionPreferExternName(int i, bool prefer_extern_name);
+
// Test that the number of source lines owned by the I'th function
// in the module this.module_ is equal to EXPECTED.
void TestLineCount(int i, size_t expected);
@@ -615,6 +619,15 @@ void CUFixtureBase::TestFunction(int i, const string& name,
EXPECT_EQ(0U, function->parameter_size);
}
+void CUFixtureBase::TestFunctionPreferExternName(int i,
+ bool prefer_extern_name) {
+ FillFunctions();
+ ASSERT_LT((size_t)i, functions_.size());
+
+ Module::Function* function = functions_[i];
+ EXPECT_EQ(prefer_extern_name, function->prefer_extern_name);
+}
+
void CUFixtureBase::TestLineCount(int i, size_t expected) {
FillFunctions();
ASSERT_LT((size_t) i, functions_.size());
diff --git a/src/common/mac/dump_syms.cc b/src/common/mac/dump_syms.cc
index 04ccae25..dd91196a 100644
--- a/src/common/mac/dump_syms.cc
+++ b/src/common/mac/dump_syms.cc
@@ -420,7 +420,7 @@ bool DumpSymbols::CreateEmptyModule(scoped_ptr<Module>& module) {
// Create a module to hold the debugging information.
module.reset(new Module(module_name, "mac", selected_arch_name, identifier,
- "", enable_multiple_));
+ "", enable_multiple_, prefer_extern_name_));
return true;
}
diff --git a/src/common/mac/dump_syms.h b/src/common/mac/dump_syms.h
index d5aa7185..5bcb0b58 100644
--- a/src/common/mac/dump_syms.h
+++ b/src/common/mac/dump_syms.h
@@ -57,7 +57,8 @@ class DumpSymbols {
DumpSymbols(SymbolData symbol_data,
bool handle_inter_cu_refs,
bool enable_multiple = false,
- const std::string& module_name = "")
+ const std::string& module_name = "",
+ bool prefer_extern_name = false)
: symbol_data_(symbol_data),
handle_inter_cu_refs_(handle_inter_cu_refs),
object_filename_(),
@@ -68,7 +69,8 @@ class DumpSymbols {
selected_object_file_(),
selected_object_name_(),
enable_multiple_(enable_multiple),
- module_name_(module_name) {}
+ module_name_(module_name),
+ prefer_extern_name_(prefer_extern_name) {}
~DumpSymbols() = default;
// Prepare to read debugging information from |filename|. |filename| may be
@@ -205,6 +207,15 @@ class DumpSymbols {
// If non-empty, used as the module name. Otherwise, the basename of
// |object_filename_| is used as the module name.
const std::string module_name_;
+
+ // If a Function and an Extern share the same address but have a different
+ // name, prefer the name of the Extern.
+ //
+ // Use this when dumping Mach-O .dSYMs built with -gmlt (Minimum Line Tables),
+ // as the Function's fully-qualified name will only be present in the STABS
+ // (which are placed in the Extern), not in the DWARF symbols (which are
+ // placed in the Function).
+ bool prefer_extern_name_;
};
} // namespace google_breakpad
diff --git a/src/common/module.cc b/src/common/module.cc
index a5c1b6ad..e61c3b7a 100644
--- a/src/common/module.cc
+++ b/src/common/module.cc
@@ -105,14 +105,16 @@ Module::Module(const string& name,
const string& architecture,
const string& id,
const string& code_id /* = "" */,
- bool enable_multiple_field /* = false*/)
+ bool enable_multiple_field /* = false*/,
+ bool prefer_extern_name /* = false*/)
: name_(name),
os_(os),
architecture_(architecture),
id_(id),
code_id_(code_id),
load_address_(0),
- enable_multiple_field_(enable_multiple_field) {}
+ enable_multiple_field_(enable_multiple_field),
+ prefer_extern_name_(prefer_extern_name) {}
Module::~Module() {
for (FileByNameMap::iterator it = files_.begin(); it != files_.end(); ++it)
@@ -152,11 +154,14 @@ bool Module::AddFunction(Function* function) {
it_ext = externs_.find(&arm_thumb_ext);
}
if (it_ext != externs_.end()) {
+ Extern* found_ext = it_ext->get();
+ bool name_mismatch = found_ext->name != function->name;
if (enable_multiple_field_) {
- Extern* found_ext = it_ext->get();
// If the PUBLIC is for the same symbol as the FUNC, don't mark multiple.
- function->is_multiple |=
- found_ext->name != function->name || found_ext->is_multiple;
+ function->is_multiple |= name_mismatch || found_ext->is_multiple;
+ }
+ if (name_mismatch && prefer_extern_name_) {
+ function->name = AddStringToPool(it_ext->get()->name);
}
externs_.erase(it_ext);
}
diff --git a/src/common/module.h b/src/common/module.h
index c1fd9f59..d736701f 100644
--- a/src/common/module.h
+++ b/src/common/module.h
@@ -131,6 +131,10 @@ class Module {
// If this symbol has been folded with other symbols in the linked binary.
bool is_multiple = false;
+
+ // If the function's name should be filled out from a matching Extern,
+ // should they not match.
+ bool prefer_extern_name = false;
};
struct InlineOrigin {
@@ -317,7 +321,8 @@ class Module {
const string& architecture,
const string& id,
const string& code_id = "",
- bool enable_multiple_field = false);
+ bool enable_multiple_field = false,
+ bool prefer_extern_name = false);
~Module();
// Set the module's load address to LOAD_ADDRESS; addresses given
@@ -502,6 +507,15 @@ class Module {
// at
// https://chromium.googlesource.com/breakpad/breakpad/+/master/docs/symbol_files.md#records-3
bool enable_multiple_field_;
+
+ // If a Function and an Extern share the same address but have a different
+ // name, prefer the name of the Extern.
+ //
+ // Use this when dumping Mach-O .dSYMs built with -gmlt (Minimum Line Tables),
+ // as the Function's fully-qualified name will only be present in the STABS
+ // (which are placed in the Extern), not in the DWARF symbols (which are
+ // placed in the Function).
+ bool prefer_extern_name_;
};
} // namespace google_breakpad
diff --git a/src/common/module_unittest.cc b/src/common/module_unittest.cc
index 213b3154..6a6762e5 100644
--- a/src/common/module_unittest.cc
+++ b/src/common/module_unittest.cc
@@ -641,6 +641,37 @@ TEST(Module, ConstructFunctionsAndExternsWithSameAddress) {
}
// If there exists an extern and a function at the same address, only write
+// out the FUNC entry.
+TEST(Module, ConstructFunctionsAndExternsWithSameAddressPreferExternName) {
+ stringstream s;
+ Module m(MODULE_NAME, MODULE_OS, MODULE_ARCH, MODULE_ID, "", false, true);
+
+ // Two externs.
+ auto extern1 = std::make_unique<Module::Extern>(0xabc0);
+ extern1->name = "extern1";
+ auto extern2 = std::make_unique<Module::Extern>(0xfff0);
+ extern2->name = "extern2";
+
+ m.AddExtern(std::move(extern1));
+ m.AddExtern(std::move(extern2));
+
+ Module::Function* function = new Module::Function("function2", 0xfff0);
+ Module::Range range(0xfff0, 0x10);
+ function->ranges.push_back(range);
+ function->parameter_size = 0;
+ m.AddFunction(function);
+
+ m.Write(s, ALL_SYMBOL_DATA);
+ string contents = s.str();
+
+ EXPECT_STREQ("MODULE " MODULE_OS " " MODULE_ARCH " " MODULE_ID " " MODULE_NAME
+ "\n"
+ "FUNC fff0 10 0 extern2\n"
+ "PUBLIC abc0 0 extern1\n",
+ contents.c_str());
+}
+
+// If there exists an extern and a function at the same address, only write
// out the FUNC entry, and mark it with `m` if the multiple field is enabled.
TEST(Module, ConstructFunctionsAndExternsWithSameAddressMultiple) {
stringstream s;
diff --git a/src/tools/mac/dump_syms/dump_syms_tool.cc b/src/tools/mac/dump_syms/dump_syms_tool.cc
index ab36164f..9fb8d13f 100644
--- a/src/tools/mac/dump_syms/dump_syms_tool.cc
+++ b/src/tools/mac/dump_syms/dump_syms_tool.cc
@@ -64,7 +64,8 @@ struct Options {
handle_inter_cu_refs(true),
handle_inlines(false),
enable_multiple(false),
- module_name() {}
+ module_name(),
+ prefer_extern_name(false) {}
string srcPath;
string dsymPath;
@@ -75,6 +76,7 @@ struct Options {
bool handle_inlines;
bool enable_multiple;
string module_name;
+ bool prefer_extern_name;
};
static bool StackFrameEntryComparator(const Module::StackFrameEntry* a,
@@ -151,7 +153,8 @@ static bool Start(const Options& options) {
(options.handle_inlines ? INLINES : NO_DATA) |
(options.cfi ? CFI : NO_DATA) | SYMBOLS_AND_FILES;
DumpSymbols dump_symbols(symbol_data, options.handle_inter_cu_refs,
- options.enable_multiple, options.module_name);
+ options.enable_multiple, options.module_name,
+ options.prefer_extern_name);
// For x86_64 binaries, the CFI data is in the __TEXT,__eh_frame of the
// Mach-O file, which is not copied into the dSYM. Whereas in i386, the CFI
@@ -244,7 +247,7 @@ static void Usage(int argc, const char *argv[]) {
fprintf(stderr, "Output a Breakpad symbol file from a Mach-o file.\n");
fprintf(stderr,
"Usage: %s [-a ARCHITECTURE] [-c] [-g dSYM path] "
- "[-n MODULE] <Mach-o file>\n",
+ "[-n MODULE] [-x] <Mach-o file>\n",
argv[0]);
fprintf(stderr, "\t-i: Output module header information only.\n");
fprintf(stderr, "\t-a: Architecture type [default: native, or whatever is\n");
@@ -260,6 +263,9 @@ static void Usage(int argc, const char *argv[]) {
fprintf(stderr,
"\t-n: Use MODULE as the name of the module rather than \n"
"the basename of the Mach-O file/dSYM.\n");
+ fprintf(stderr,
+ "\t-x: Prefer the PUBLIC (extern) name over the FUNC if\n"
+ "they do not match.\n");
fprintf(stderr, "\t-h: Usage\n");
fprintf(stderr, "\t-?: Usage\n");
}
@@ -269,7 +275,7 @@ static void SetupOptions(int argc, const char *argv[], Options *options) {
extern int optind;
signed char ch;
- while ((ch = getopt(argc, (char* const*)argv, "ia:g:crdm?hn:")) != -1) {
+ while ((ch = getopt(argc, (char* const*)argv, "ia:g:crdm?hn:x")) != -1) {
switch (ch) {
case 'i':
options->header_only = true;
@@ -302,6 +308,9 @@ static void SetupOptions(int argc, const char *argv[], Options *options) {
case 'n':
options->module_name = optarg;
break;
+ case 'x':
+ options->prefer_extern_name = true;
+ break;
case '?':
case 'h':
Usage(argc, argv);